In [2]:
print(__doc__)

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from matplotlib.colors import Normalize

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_iris
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import GridSearchCV


Automatically created module for IPython interactive environment


In [3]:
# Utility function to move the midpoint of a colormap to be around
# the values of interest.

class MidpointNormalize(Normalize):

    def __init__(self, vmin=None, vmax=None, midpoint=None, clip=False):
        self.midpoint = midpoint
        Normalize.__init__(self, vmin, vmax, clip)

    def __call__(self, value, clip=None):
        x, y = [self.vmin, self.midpoint, self.vmax], [0, 0.5, 1]
        return np.ma.masked_array(np.interp(value, x, y))

In [4]:
# Carrega os dados
data = pd.read_csv("DSL-StrongPasswordData.csv", header = 0)
# Reinicia o index
data = data.reset_index()
# Pega os IDs unicos campo 'subject'
unisub = list(data['subject'].unique())
# Cria ID numerico sequencial
mlist = [int(x) for x in range(len(unisub))]
# Vincula o Id numerico com o campo 'subject'
newvalue = dict(zip(unisub, mlist))
# Exibe dados tratados
data['subject'] = data['subject'].map(newvalue)


# import some data to play with



features = list(data.columns[2:])

X = data[features].values
y = data['subject'].values


In [9]:
# It is usually a good idea to scale the data for SVM training.
# We are cheating a bit in this example in scaling all of the data,
# instead of fitting the transformation on the training set and
# just applying it on the test set.

scaler = StandardScaler()
X = scaler.fit_transform(X)

# #############################################################################
# Train classifiers
#
# For an initial search, a logarithmic grid with basis
# 10 is often helpful. Using a basis of 2, a finer
# tuning can be achieved but at a much higher cost.

param_grid={
    'activation': ["identity", "logistic","tanh", "relu"],
    'solver': ["lbfgs", "sgd", "adam"],
    'learning_rate' : ["constant", "invscaling", "adaptive"],
    'hidden_layer_sizes': np.random.randint(5,100, size=(1, 15))[0] }
    #[(x,) for x in list(range(1,len(features)))]#}

cv = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)


grid = GridSearchCV(MLPClassifier(hidden_layer_sizes=(200, ),max_iter=600), param_grid=param_grid, cv=cv,n_jobs=1)
grid.fit(X, y)

print("The best parameters are %s with a score of %0.2f"
      % (grid.best_params_, grid.best_score_))







































The best parameters are {'activation': 'relu', 'hidden_layer_sizes': 84, 'learning_rate': 'invscaling', 'solver': 'adam'} with a score of 0.92



The best parameters are {'activation': 'tanh', 'learning_rate': 'constant', 'solver': 'adam'} with a score of 0.94

The best parameters are {'activation': 'relu', 'hidden_layer_sizes': 84, 'learning_rate': 'invscaling', 'solver': 'adam'} with a score of 0.92

In [8]:
np.random.randint(5,100, size=(1, 15))[0]

array([94, 78, 28, 67, 53, 45, 24, 20, 75, 15, 47, 84, 61, 78, 56])