# Unit2-Example7Use of Multilayer Perceptron Classifier
## Subject: Machine Learning - MSc. Computational Biology 
Author: Esteban García-Cuesta, Departamento de Inteligencia Artificial, UPM (License CC-BY-NC)

This code has been developed to be used exclusively for educational purposes.

## Objectives: 
  - Learn how the Multilayer Perceptron Neural Networks Classifier works (Part I)
  - Learn how to interpret confusion matrix results (Part II)
  - Learn how to apply Multilayer Perceptron Neural Network Classifiers (Parts I, II)

## TO-DO as homework
  - Try different parameters of the model.

In [18]:
#Part I Learn how the Multilayer Perceptron Classifier model works

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_score


#Load the data from the zoo.csv file
path ='zoo.csv'
data = pd.read_csv(path)

#Extract the X input data and the labels y
X = data.iloc[:,1:17]
y = data.iloc[:,17]

#This counters are used to obtain the accuracy "by hand"
contTrain = 0
contTest = 0

# Split train and test using sklearn.model_selection.train_test_split function
XTrain, XTest, yTrain, yTest = train_test_split(X, y, test_size=0.10, random_state=np.random.randint(100,size=1)[0])

#Convert pandas df to numerical arrays
XTrain = XTrain.values
XTest = XTest.values
yTrain = yTrain.values
yTest = yTest.values

#Define parameters of the MLP to be checked with cross-validation
param_grid = {'max_iter': [500, 1000, 2000],
              'hidden_layer_sizes': [5,10,100],
              'solver': ['lbfgs']}

# Create a gridsearch using the MLP Classifier
clf = GridSearchCV(MLPClassifier(), param_grid, cv=3)

# Train the classifier using training dataset
clf.fit(XTrain , yTrain)

#Obtain in the variable model the best MLP classifier
print("Best estimator found by grid search:")
print(clf.best_estimator_)
model = clf.best_estimator_

#Calculate the Cross-validation error durint the training step
scores = cross_val_score(model, XTrain, yTrain, cv=3, scoring='accuracy')
print('Validation classification Accuracies: ', + scores)
print('Mean Validation Classification Accuracy: ', + np.mean(scores))


# Compute the train prediction according to the model
yhatTrain = clf.predict(XTrain)

# Check the result on the train examples
#print('Predicted value : ' + str(yhat) , ', real target : ' + str(yTrain))
for i in range(0,len(yTrain),1) :
    if (yhatTrain[i] == yTrain[i]):
        contTrain = contTrain + 1

# Compute the test prediction according to the model
yhatTest = clf.predict(XTest)

# Check the result on the test examples
#print('Predicted value : ' + str(yhat) , ', real target : ' + str(yTest))
for i in range(0,len(yTest),1) :
    if (yhatTest[i] == yTest[i]):
        contTest = contTest + 1


#Return the metric of accuracy
print('The train accuracy is: ' + str(contTrain/len(yTrain)))
print('The test accuracy is: ' + str(contTest/len(yTest)))


Best estimator found by grid search:
MLPClassifier(hidden_layer_sizes=10, max_iter=500, solver='lbfgs')
Validation classification Accuracies:  [0.93333333 0.96666667 0.8       ]
Mean Validation Classification Accuracy:  0.9
The train accuracy is: 1.0
The test accuracy is: 0.9090909090909091


In [19]:
#Part II Learn how to interpret confusion matrix results

from sklearn.metrics import classification_report,confusion_matrix

#Print train confusion matrix
print('----------------Train Confusion Matrix------------------')
print(confusion_matrix(yTrain,yhatTrain))
print(classification_report(yTrain,yhatTrain))


----------------Train Confusion Matrix------------------
[[38  0  0  0  0  0  0]
 [ 0 17  0  0  0  0  0]
 [ 0  0  5  0  0  0  0]
 [ 0  0  0 10  0  0  0]
 [ 0  0  0  0  4  0  0]
 [ 0  0  0  0  0  6  0]
 [ 0  0  0  0  0  0 10]]
              precision    recall  f1-score   support

           1       1.00      1.00      1.00        38
           2       1.00      1.00      1.00        17
           3       1.00      1.00      1.00         5
           4       1.00      1.00      1.00        10
           5       1.00      1.00      1.00         4
           6       1.00      1.00      1.00         6
           7       1.00      1.00      1.00        10

    accuracy                           1.00        90
   macro avg       1.00      1.00      1.00        90
weighted avg       1.00      1.00      1.00        90



In [21]:
#Part II Learn how to interpret confusion matrix results

#Print test confusion matrix
print('----------------Test Confusion Matrix------------------')
print(confusion_matrix(yTest,yhatTest))
print(classification_report(yTest,yhatTest))


----------------Test Confusion Matrix------------------
[[3 0 0 0 0]
 [0 3 0 0 0]
 [0 0 3 0 0]
 [0 0 0 1 1]
 [0 0 0 0 0]]
              precision    recall  f1-score   support

           1       1.00      1.00      1.00         3
           2       1.00      1.00      1.00         3
           4       1.00      1.00      1.00         3
           6       1.00      0.50      0.67         2
           7       0.00      0.00      0.00         0

    accuracy                           0.91        11
   macro avg       0.80      0.70      0.73        11
weighted avg       1.00      0.91      0.94        11



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
