## Hyperparameter tuning using RandomSearchCV

In [8]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

from sklearn.metrics import accuracy_score
from sklearn.model_selection import RandomizedSearchCV
from keras.datasets import mnist
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow.keras.callbacks import EarlyStopping
from keras.utils import to_categorical
from tensorflow.keras import backend as K
from tensorflow.keras import Model, Sequential, models, layers

np.random.seed(6)

In [9]:
(x_train, y_train), (x_test, y_test) = mnist.load_data()

img_rows, img_cols = 28, 28
num_classes = 10

if K.image_data_format() == 'channels_first':
    x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
    x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
    input_shape = (1, img_rows, img_cols)
else:
    x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
    x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
    input_shape = (img_rows, img_cols, 1)
    
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [10]:
max_int = np.random.randint(5,6)
min_int = np.random.randint(2,4)

hidden_layers = []

for rep in range(min_int, max_int):
    layer = np.repeat(np.random.randint(32, 64),rep).tolist()
    hidden_layers.append(layer)

dropout_rates = np.round(np.arange(0.1,0.5,0.1),1).tolist()
epochs = list(range(1,10))
batches = list(range(50,200))
# l2_penalty_opts = np.arange(0.1,0.5,0.1).tolist()
optimizers = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
callbacks = [EarlyStopping(monitor = 'val_loss', patience = 1)]

fit_params = {   
    'callbacks': callbacks,
    'validation_data': (x_test, y_test),
    'verbose': 0,
    'epochs': epochs,
    'batch_size': batches
}

fit_params = dict(callbacks = callbacks,)

param_options = {
    'hidden_layers': hidden_layers,
    'dropout_rate': dropout_rates,
    'optimizer': optimizers
}

In [11]:
hidden_layers

[[41, 41], [35, 35, 35], [52, 52, 52, 52]]

In [14]:
def cnn_model(hidden_layers = [32, 64], activation = 'relu', dropout_rate = 0, optimizer = 'SGD',
                     input_shape = input_shape, num_classes = num_classes):
  
    model = Sequential()
    
    for index, units in enumerate(hidden_layers):
        if index == 0:
            model.add(layers.Conv2D(units, (3,3), input_shape = input_shape, activation = activation))
        else:
            model.add(layers.Conv2D(units, (3,3), activation = activation))

    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    
    if dropout_rate:
        model.add(layers.Dropout(dropout_rate))

    model.add(layers.Flatten())
    neurons = max(hidden_layers)*2
    model.add(layers.Dense(neurons, activation='relu'))
    
    if dropout_rate:
        model.add(layers.Dropout(dropout_rate))
    
    model.add(layers.Dense(num_classes, activation = "softmax"))
    model.compile(loss = 'categorical_crossentropy', optimizer = optimizer, metrics = ['accuracy'])   
    return model

In [18]:
kc_model = KerasClassifier(build_fn = cnn_model)

kc_model_cv = RandomizedSearchCV( 
    kc_model, 
    param_distributions = param_options,
    n_iter = 3,
    cv = 3,
    scoring="neg_log_loss",
#     n_jobs = -1,
    verbose=10
)

kc_model_cv.fit(x_train, y_train)

print('Best score obtained: {0}'.format(kc_model_cv.best_score_))
print('Parameters:')
for param, value in kc_model_cv.best_params_.items():
    print('\t{}: {}'.format(param, value))
    
# Show the results
# print("Best: %f using %s" % (random_search.best_score_, random_search.best_params_))
# means = random_search.cv_results_['mean_test_score']
# stds = random_search.cv_results_['std_test_score']
# params = random_search.cv_results_['params']
# for mean, stdev, param in zip(means, stds, params):
#     print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 3 candidates, totalling 9 fits
[CV] optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


Train on 40000 samples
[CV]  optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.081, total= 2.7min
[CV] optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.7min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.102, total= 2.6min
[CV] optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  5.3min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=RMSprop, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.080, total= 2.5min
[CV] optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2 ....


[Parallel(n_jobs=1)]: Done   3 out of   3 | elapsed:  7.8min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2, score=-31.473, total= 1.1min
[CV] optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2 ....


[Parallel(n_jobs=1)]: Done   4 out of   4 | elapsed:  8.9min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2, score=-0.078, total= 1.2min
[CV] optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2 ....


[Parallel(n_jobs=1)]: Done   5 out of   5 | elapsed: 10.1min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Adam, hidden_layers=[35, 35, 35], dropout_rate=0.2, score=-31.087, total= 1.2min
[CV] optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Done   6 out of   6 | elapsed: 11.2min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.067, total= 2.5min
[CV] optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Done   7 out of   7 | elapsed: 13.8min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.063, total= 2.5min
[CV] optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2 


[Parallel(n_jobs=1)]: Done   8 out of   8 | elapsed: 16.3min remaining:    0.0s


Train on 40000 samples
[CV]  optimizer=Nadam, hidden_layers=[52, 52, 52, 52], dropout_rate=0.2, score=-0.068, total= 2.4min


[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 18.6min remaining:    0.0s
[Parallel(n_jobs=1)]: Done   9 out of   9 | elapsed: 18.6min finished


Train on 60000 samples
Best score obtained: -0.06588739765515375
Parameters:
	optimizer: Nadam
	hidden_layers: [52, 52, 52, 52]
	dropout_rate: 0.2
