In [21]:
import numpy as np
import scipy

from scipy.stats import randint, uniform
from sklearn import datasets
from sklearn.model_selection  import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.neural_network import MLPClassifier

## Load Dataset
- Load 'digits' dataset in Scikit-learn
- http://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html

In [3]:
data = datasets.load_digits()

In [4]:
X_data = data.images   # load X_data
y_data = data.target   # load y_data

In [5]:
X_data = X_data.reshape(X_data.shape[0], X_data.shape[1] * X_data.shape[2])    # flatten X_data

In [6]:
print(X_data.shape)
print(y_data.shape)

(1797, 64)
(1797,)


In [7]:
X_train, X_test, y_train, y_test = train_test_split(X_data, y_data, test_size = 0.2, random_state = 7)    # split data into train & test set

In [8]:
clf = MLPClassifier(hidden_layer_sizes = (5, 5))   # create a MLP with two hidden layers with five neurons
clf

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(5, 5), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)

## Grid Search

In [11]:
# assign parameters for grid search
activation = ['tanh', 'relu']
solver = ['sgd', 'adam']
batch_size = [50, 100]
learning_rate_init = [0.1, 0.01, 0.001]
max_iter = [200, 300, 400, 500]

In [12]:
# create a dictionary to carry parameter settings
parameters = dict(activation = activation, solver = solver, batch_size = batch_size, learning_rate_init = learning_rate_init, max_iter = max_iter)

In [13]:
searcher = GridSearchCV(estimator = clf, param_grid = parameters, cv = 5, scoring = 'accuracy')

In [14]:
grid_result = searcher.fit(X_train, y_train)





In [15]:
# print out accuracy results and model setting
print("Best model: %s with accuracy of %f" % (grid_result.best_params_, grid_result.best_score_))
for params, mean_score, scores in grid_result.grid_scores_:
    print("MEAN: %f (STD: %f) with: %r" % (scores.mean(), scores.std(), params))

Best model: {'activation': 'relu', 'batch_size': 100, 'learning_rate_init': 0.001, 'max_iter': 500, 'solver': 'adam'} with accuracy of 0.857342
MEAN: 0.346619 (STD: 0.107287) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 200, 'solver': 'sgd'}
MEAN: 0.166152 (STD: 0.061252) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 200, 'solver': 'adam'}
MEAN: 0.260556 (STD: 0.081210) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 300, 'solver': 'sgd'}
MEAN: 0.242276 (STD: 0.113562) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 300, 'solver': 'adam'}
MEAN: 0.300940 (STD: 0.118937) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 400, 'solver': 'sgd'}
MEAN: 0.221964 (STD: 0.119108) with: {'activation': 'tanh', 'batch_size': 50, 'learning_rate_init': 0.1, 'max_iter': 400, 'solver': 'adam'}
MEAN: 0.277233 (STD



## Random Search

In [19]:
# assign parameters for random search
activation = ['tanh', 'relu']
solver = ['sgd', 'adam']
batch_size = randint(50, 100)
learning_rate_init = uniform(0.001, 0.1)
max_iter = randint(200, 500)

In [20]:
# create a dictionary to carry parameter settings
parameters = dict(activation = activation, solver = solver, batch_size = batch_size, learning_rate_init = learning_rate_init, max_iter = max_iter)

In [23]:
searcher = RandomizedSearchCV(estimator = clf, param_distributions = parameters, cv = 5, n_iter = 96, scoring = 'accuracy')

In [25]:
random_result = searcher.fit(X_train, y_train)

In [26]:
# print out accuracy results and model setting
print("Best model: %s with accuracy of %f" % (random_result.best_params_, random_result.best_score_))
for params, mean_score, scores in random_result.grid_scores_:
    print("MEAN: %f (STD: %f) with: %r" % (scores.mean(), scores.std(), params))

Best model: {'activation': 'relu', 'batch_size': 79, 'learning_rate_init': 0.0019578074034639174, 'max_iter': 495, 'solver': 'adam'} with accuracy of 0.868476
MEAN: 0.650903 (STD: 0.119895) with: {'activation': 'tanh', 'batch_size': 90, 'learning_rate_init': 0.0091287060757332707, 'max_iter': 352, 'solver': 'sgd'}
MEAN: 0.365655 (STD: 0.103631) with: {'activation': 'tanh', 'batch_size': 52, 'learning_rate_init': 0.05074528454631725, 'max_iter': 277, 'solver': 'sgd'}
MEAN: 0.228467 (STD: 0.161759) with: {'activation': 'relu', 'batch_size': 86, 'learning_rate_init': 0.033906002127962354, 'max_iter': 410, 'solver': 'adam'}
MEAN: 0.382917 (STD: 0.162967) with: {'activation': 'tanh', 'batch_size': 53, 'learning_rate_init': 0.081074768375678077, 'max_iter': 352, 'solver': 'sgd'}
MEAN: 0.105076 (STD: 0.002222) with: {'activation': 'relu', 'batch_size': 50, 'learning_rate_init': 0.060448530462735961, 'max_iter': 388, 'solver': 'sgd'}
MEAN: 0.263180 (STD: 0.072728) with: {'activation': 'tanh', 

