## Redes Neurais - Multilayer Perceptron 

#### Aluno: Altiéris Marcelino Peixoto

##### Carregamento das bibliotecas utilizadas

In [40]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

In [3]:
data_types = {
    "wti_variance":"float",
    "wti_skewness":"float",
    "wti_curtosis":"float",
    "image_entropy":"float",
    "class":"int"
}

columns = ["wti_variance", "wti_skewness", "wti_curtosis", "image_entropy","class"]

dataset = pd.read_csv('dados_autent_bancaria.txt',dtype=data_types,names=columns)

##### 1 - Definição do conjunto de treinamento e teste - (80,20)

In [4]:
X = dataset.drop('class', axis=1)
y = dataset['class']

In [29]:
from sklearn.model_selection import StratifiedShuffleSplit    

sss = StratifiedShuffleSplit(train_size=0.90, n_splits=1, test_size=0.10, random_state=0)  

for train_index, test_index in sss.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y[train_index], y[test_index]

In [28]:
dataset.groupby('class').count()

Unnamed: 0_level_0,wti_variance,wti_skewness,wti_curtosis,image_entropy
class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,762,762,762,762
1,610,610,610,610


In [32]:
y_test.groupby(y_test).count()

class
0    77
1    61
Name: class, dtype: int64

##### 2 - Treinamento e otimização de parâmetros de uma rede Neural utilizando GridSearchCV

In [43]:
#  activation : {'identity', 'logistic', 'tanh', 'relu'}, default 'relu'
#        Activation function for the hidden layer.
#
#        - 'identity', no-op activation, useful to implement linear bottleneck,
#          returns f(x) = x
#
#        - 'logistic', the logistic sigmoid function,
#          returns f(x) = 1 / (1 + exp(-x)).
#
#        - 'tanh', the hyperbolic tan function,
#          returns f(x) = tanh(x).
#
#        - 'relu', the rectified linear unit function,
#          returns f(x) = max(0, x)

#  solver : {'lbfgs', 'sgd', 'adam'}, default 'adam'
#        The solver for weight optimization.
#
#        - 'lbfgs' is an optimizer in the family of quasi-Newton methods.
#
#        - 'sgd' refers to stochastic gradient descent.
#
#        - 'adam' refers to a stochastic gradient-based optimizer proposed
#          by Kingma, Diederik, and Jimmy Ba
#
#        Note: The default solver 'adam' works pretty well on relatively
#        large datasets (with thousands of training samples or more) in terms of
#        both training time and validation score.
#        For small datasets, however, 'lbfgs' can converge faster and perform
#        better.          
#

#  hidden_layer_sizes : tuple, length = n_layers - 2, default (100,)
#        The ith element represents the number of neurons in the ith
#        hidden layer.
#

#  momentum : float, default 0.9
#        Momentum for gradient descent update. Should be between 0 and 1. Only
#        used when solver='sgd'.

#  batch_size : int, optional, default 'auto'
#       Size of minibatches for stochastic optimizers.
#        If the solver is 'lbfgs', the classifier will not use minibatch.
#        When set to "auto", `batch_size=min(200, n_samples)`
    
#     learning_rate : {'constant', 'invscaling', 'adaptive'}, default 'constant'
#        Learning rate schedule for weight updates.
#
#        - 'constant' is a constant learning rate given by
#          'learning_rate_init'.
#
#        - 'invscaling' gradually decreases the learning rate ``learning_rate_``
#          at each time step 't' using an inverse scaling exponent of 'power_t'.
#          effective_learning_rate = learning_rate_init / pow(t, power_t)
#
#        - 'adaptive' keeps the learning rate constant to
#          'learning_rate_init' as long as training loss keeps decreasing.
#          Each time two consecutive epochs fail to decrease training loss by at
#          least tol, or fail to increase validation score by at least tol if
#          'early_stopping' is on, the current learning rate is divided by 5.
#
#        Only used when ``solver='sgd'``.
#
#    learning_rate_init : double, optional, default 0.001
#        The initial learning rate used. It controls the step-size
#        in updating the weights. Only used when solver='sgd' or 'adam'.

#    shuffle : bool, optional, default True
#        Whether to shuffle samples in each iteration. Only used when
#        solver='sgd' or 'adam'.

        
param_grid = [
        {
            'activation' : ['identity', 'logistic', 'tanh', 'relu'],
            'solver' : ['lbfgs', 'sgd', 'adam'],
            'hidden_layer_sizes': [(1,),(2,),(3,),(4,),(5,),(6,)]
        }
       ]

clf = GridSearchCV(
      MLPClassifier(learning_rate='adaptive', 
                    learning_rate_init=1., 
                    early_stopping=True, 
                    shuffle=True
                    ,random_state=42), param_grid, cv=3, n_jobs=-1, scoring='accuracy')

clf.fit(X_train,y_train)

GridSearchCV(cv=3, error_score='raise',
       estimator=MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=True, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='adaptive',
       learning_rate_init=1.0, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=42, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False),
       fit_params=None, iid=True, n_jobs=-1,
       param_grid=[{'activation': ['identity', 'logistic', 'tanh', 'relu'], 'solver': ['lbfgs', 'sgd', 'adam'], 'hidden_layer_sizes': [(1,), (2,), (3,), (4,), (5,), (6,)]}],
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='accuracy', verbose=0)

In [38]:
print("Best parameters set found on development set:")
print(clf.best_params_)

Best parameters set found on development set:
{'activation': 'logistic', 'hidden_layer_sizes': (4,), 'solver': 'lbfgs'}


##### 3 - Aplicação da Rede Neural treinada para predição do conjunto de teste

In [49]:
y_pred = clf.predict(X_test)

##### 4 - Acurária do classificador

In [51]:
accuracy_score(y_test, y_pred)

1.0

##### 5 - Matriz de confusão

In [44]:
confusion_matrix(y_test, y_pred)

array([[77,  0],
       [ 0, 61]])

In [45]:
pd.crosstab(y_test, y_pred, rownames=['True'], colnames=['Predicted'], margins=True)

Predicted,0,1,All
True,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,77,0,77
1,0,61,61
All,77,61,138
