# CNN Hyperparameters Optimization - Keras - MNIST

In [1]:
import tensorflow as tf
import numpy as np

from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import InputLayer, Input
from tensorflow.python.keras.layers import Reshape, MaxPooling2D
from tensorflow.python.keras.layers import Conv2D, Dense, Flatten
from tensorflow.python.keras.optimizers import sgd


In [2]:
from keras.datasets import mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

Using TensorFlow backend.


In [3]:
print("Size of:")
print("- Training-set:\t\t{}".format(x_train.shape[0]))
print("- Test-set:\t\t{}".format(x_test.shape[0]))
print(" Shape of train target set:{}".format(y_train.shape))

Size of:
- Training-set:		60000
- Test-set:		10000
 Shape of train target set:(60000,)


Let's perform a one hot encoding on _y_train_ so that it have the sahpe (60000,10):

In [4]:
from sklearn.preprocessing import OneHotEncoder
enc = OneHotEncoder()
enc.fit(y_train.reshape(-1, 1))
y_train_onehot = enc.transform(y_train.reshape(-1, 1)).toarray()
y_train_onehot.shape

(60000, 10)

Same for _y_test_

In [5]:
enc = OneHotEncoder()
enc.fit(y_test.reshape(-1, 1))
y_test_onehot = enc.transform(y_test.reshape(-1, 1)).toarray()
y_test_onehot.shape

(10000, 10)

### Grid search for hyperparameters

In [6]:
# define the grid search parameters
batch_size = [128,256]
learning_rate = [0.01,0.001]
output_channels = [[512,128],[128,512],[256,256]]
activation_fct = ['sigmoid','tanh','relu']

In [8]:
# %%time
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV


def create_model(activation_fct='relu',learning_rate=0.001,output_channels = [512,128]):
    # Start construction of the Keras Sequential model.
    model = Sequential()

    # Add an input layer 
    model.add(InputLayer(input_shape=((28, 28))))

    # Convolutional layers expect images with shape (28, 28, 1)
    model.add(Reshape((28, 28, 1)))

    # First convolutional layer with ReLU-activation and max-pooling.
    model.add(Conv2D(kernel_size=3, strides=2, filters=output_channels[0], padding='same',
                     activation=activation_fct, name='layer_conv1'))
    model.add(MaxPooling2D(pool_size=2, strides=2))

    # Second convolutional layer with ReLU-activation and max-pooling.
    model.add(Conv2D(kernel_size=3, strides=2, filters=output_channels[1], padding='same',
                     activation=activation_fct, name='layer_conv2'))
    model.add(MaxPooling2D(pool_size=2, strides=2))

    # Flatten the 4-rank output of the convolutional layers to 2-rank that can be input to a fully-connected / dense layer.
    model.add(Flatten())

    # Last fully-connected / dense layer with softmax-activation for use in classification.
    model.add(Dense(10, activation='softmax'))
    
    optimizer = sgd(lr=learning_rate)
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

# create model
model = KerasClassifier(build_fn=create_model, epochs=10, verbose=1)
# parameters dictionary
param_grid = dict(batch_size=batch_size,
                  learning_rate=learning_rate,
                  output_channels=output_channels)
# perform grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(x_train, y_train_onehot)


All results:

In [12]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.968217 using {'batch_size': 128, 'learning_rate': 0.001, 'output_channels': [512, 128]}
0.104100 (0.007320) with: {'batch_size': 128, 'learning_rate': 0.01, 'output_channels': [512, 128]}
0.095050 (0.003091) with: {'batch_size': 128, 'learning_rate': 0.01, 'output_channels': [128, 512]}
0.099983 (0.004962) with: {'batch_size': 128, 'learning_rate': 0.01, 'output_channels': [256, 256]}
0.968217 (0.001370) with: {'batch_size': 128, 'learning_rate': 0.001, 'output_channels': [512, 128]}
0.719600 (0.161870) with: {'batch_size': 128, 'learning_rate': 0.001, 'output_channels': [128, 512]}
0.936750 (0.042957) with: {'batch_size': 128, 'learning_rate': 0.001, 'output_channels': [256, 256]}
0.101133 (0.003080) with: {'batch_size': 256, 'learning_rate': 0.01, 'output_channels': [512, 128]}
0.102283 (0.004754) with: {'batch_size': 256, 'learning_rate': 0.01, 'output_channels': [128, 512]}
0.095833 (0.003475) with: {'batch_size': 256, 'learning_rate': 0.01, 'output_channels': [256, 256]}
0