In [1]:
import numpy as np
import scipy

from scipy.stats import randint, uniform
from sklearn import datasets
from sklearn.model_selection  import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier

In [15]:
import warnings
warnings.filterwarnings('ignore')

## Load Dataset
- Load 'digits' dataset in Scikit-learn
- http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html

In [2]:
data = datasets.load_digits()

In [3]:
X_data = data.images   # load X_data
y_data = data.target   # load y_data

In [4]:
X_data = X_data.reshape(X_data.shape[0], X_data.shape[1] * X_data.shape[2])    # flatten X_data

In [5]:
print(X_data.shape)
print(y_data.shape)

(1797, 64)
(1797,)


In [6]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.2, random_state = 7)    # split data into train & test set

In [7]:
clf = MLPClassifier(hidden_layer_sizes = (5, 5))   # create a MLP with two hidden layers with five neurons
clf

MLPClassifier(hidden_layer_sizes=(5, 5))

## Grid Search

In [8]:
# assign parameters for grid search
activation = ['tanh', 'relu']
solver = ['sgd', 'adam']
batch_size = [50, 100]
learning_rate_init = [0.1, 0.01, 0.001]
max_iter = [200, 300, 400, 500]

In [9]:
# create a dictionary to carry parameter settings
parameters = dict(activation = activation, solver = solver, batch_size = batch_size, learning_rate_init = learning_rate_init, max_iter = max_iter)

In [10]:
searcher = GridSearchCV(estimator = clf, param_grid = parameters, cv = 5, scoring = 'accuracy')

In [11]:
grid_result = searcher.fit(X_train, y_train)









In [42]:
# print out accuracy results and model setting
print("Best model: %s with accuracy of %f" % (grid_result.best_params_, grid_result.best_score_))
# for params, mean_score, scores in grid_result.cv_results_:
#     print("MEAN: %f (STD: %f) with: %r" % (scores.mean(), scores.std(), params))
for score,param in zip(grid_result.cv_results_['mean_test_score'], grid_result.cv_results_['params']):
    print("MEAN: %f with: %r" % (score, param))

Best model: {'activation': 'relu', 'batch_size': 50, 'learning_rate_init': 0.001, 'max_iter': 300, 'solver': 'adam'} with accuracy of 0.888686
MEAN: 0.247171 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 200, 'solver': 'sgd'}
MEAN: 0.178119 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 200, 'solver': 'adam'}
MEAN: 0.290222 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 300, 'solver': 'sgd'}
MEAN: 0.211593 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 300, 'solver': 'adam'}
MEAN: 0.270739 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 400, 'solver': 'sgd'}
MEAN: 0.203811 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 400, 'solver': 'adam'}
MEAN: 0.195490 with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 500, 'solver': 

In [30]:
grid_result.cv_results_.keys()

dict_keys(['mean_fit_time', 'std_fit_time', 'mean_score_time', 'std_score_time', 'param_activation', 'param_batch_size', 'param_learning_rate_init', 'param_max_iter', 'param_solver', 'params', 'split0_test_score', 'split1_test_score', 'split2_test_score', 'split3_test_score', 'split4_test_score', 'mean_test_score', 'std_test_score', 'rank_test_score'])

In [38]:
len(grid_result.cv_results_['mean_test_score'])

96

## Random Search

In [43]:
# assign parameters for random search
activation = ['tanh', 'relu']
solver = ['sgd', 'adam']
batch_size = randint(50, 100)
learning_rate_init = uniform(0.001, 0.1)
max_iter = randint(200, 500)

# Note the randint from scipy

In [44]:
# create a dictionary to carry parameter settings
parameters = dict(activation = activation, solver = solver, 
                  batch_size = batch_size, 
                  learning_rate_init = learning_rate_init, 
                  max_iter = max_iter)

In [45]:
searcher = RandomizedSearchCV(estimator = clf, param_distributions = parameters, 
                              cv = 5,
                              n_iter = 96, 
                              scoring = 'accuracy')
# Note n_iter = 96. It will do 96 parameter search.

In [46]:
random_result = searcher.fit(X_train, y_train)

In [55]:
# print out accuracy results and model setting
print("Best model: %s with accuracy of %f" % (random_result.best_params_, random_result.best_score_))
for score,param in zip(random_result.cv_results_['mean_test_score'], random_result.cv_results_['params']):
    print("MEAN: %f with: %r" % (score, param))

Best model: {'activation': 'relu', 'batch_size': 90, 'learning_rate_init': 0.004367532000464535, 'max_iter': 354, 'solver': 'adam'} with accuracy of 0.803044
MEAN: 0.128697 with: {'activation': 'relu', 'batch_size': 86, 'learning_rate_init': 0.10079093664347084, 'max_iter': 407, 'solver': 'sgd'}
MEAN: 0.103688 with: {'activation': 'relu', 'batch_size': 82, 'learning_rate_init': 0.09420728678303941, 'max_iter': 234, 'solver': 'adam'}
MEAN: 0.482910 with: {'activation': 'tanh', 'batch_size': 84, 'learning_rate_init': 0.046480291586405244, 'max_iter': 385, 'solver': 'adam'}
MEAN: 0.729249 with: {'activation': 'tanh', 'batch_size': 74, 'learning_rate_init': 0.012083563507862105, 'max_iter': 293, 'solver': 'adam'}
MEAN: 0.552901 with: {'activation': 'relu', 'batch_size': 93, 'learning_rate_init': 0.006953728391856195, 'max_iter': 208, 'solver': 'sgd'}
MEAN: 0.105081 with: {'activation': 'relu', 'batch_size': 67, 'learning_rate_init': 0.0374567978831606, 'max_iter': 481, 'solver': 'sgd'}
MEA

In [54]:
len(random_result.cv_results_['mean_test_score'])

96