In [3]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.colors import Normalize

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split

In [9]:
# Carrega os dados
data = pd.read_csv("DSL-StrongPasswordData.csv", header = 0)
# Reinicia o index
data = data.reset_index()
# Pega os IDs unicos campo 'subject'
unisub = list(data['subject'].unique())
# Cria ID numerico sequencial
mlist = [int(x) for x in range(len(unisub))]
# Vincula o Id numerico com o campo 'subject'
newvalue = dict(zip(unisub, mlist))
data['subject'] = data['subject'].map(newvalue)

# Divide o conjunto de dados utilizando a proporção 80:20
train, test = train_test_split(data, test_size = 0.2, random_state=42)

features = list(data.columns[2:])

X = train[features]
y = train['subject'].values

X_test = test[features]
y_test = test['subject']

#Pre processa os dados
scaler = StandardScaler()
scaler.fit(X)

scaler.transform(X)
scaler.transform(X_test)

array([[ 0.65629302,  0.52267595,  1.75794114, ..., -0.21079328,
        -0.50536119,  1.8930323 ],
       [-0.21580746,  0.87016701, -0.09628472, ..., -0.36634048,
        -0.3541073 , -0.3925109 ],
       [ 0.65629302, -0.3808008 , -0.88272055, ..., -0.60100994,
        -0.56104016, -0.58632496],
       ..., 
       [ 0.22024278, -0.86728828,  0.18167967, ..., -0.33307316,
        -0.23485785,  0.30960797],
       [-0.21580746, -0.03330974, -0.33018158, ...,  1.44088411,
         1.4837371 , -0.67408982],
       [ 1.52839349, -1.28427755, -0.04882738, ..., -0.51469473,
        -0.51939054, -0.28646169]])

In [10]:
param_grid={
    'activation': ["identity", "logistic","tanh", "relu"],
    'solver': ["lbfgs", "sgd", "adam"],
    'learning_rate' : ["constant", "invscaling", "adaptive"],
    'hidden_layer_sizes': np.random.randint(5,100, size=(1, 15))[0] }

In [14]:
param_grid2={
    'alpha': 10.0 ** -np.arange(1, 7)
}    

In [12]:
net = MLPClassifier(random_state=42,hidden_layer_sizes=(84, ),max_iter=600,activation= 'relu', learning_rate= 'invscaling', solver='adam')

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)

In [15]:
grid = GridSearchCV(net, param_grid=param_grid2, cv=cv,n_jobs=1)
grid.fit(X, y)

print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))

The best parameters are {'alpha': 9.9999999999999995e-07} with a score of 0.90
