# Keras, GridSearchCV

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense , Dropout
from keras.optimizers import RMSprop

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


## Data load & transform

In [2]:
train = (pd.read_csv('./input/train.csv')).astype('float32')
print "train shape:", train.shape

train shape: (42000, 785)


In [3]:
X, Y = train.drop('label', axis=1).values, train['label'].values

print "X shape:", X.shape
print "Y shape:", Y.shape

X shape: (42000, 784)
Y shape: (42000,)


In [4]:
# Reduce datapoints
X = X[:5000,:]
Y = Y[:5000]

print "X shape:", X.shape
print "Y shape:", Y.shape

X shape: (5000, 784)
Y shape: (5000,)


In [5]:
# Normalization
X = X / 255

In [6]:
from keras.utils.np_utils import to_categorical
Y = to_categorical(Y, 10)

print "Y shape:", Y.shape

Y shape: (5000, 10)


## NN

In [7]:
# fix random seed for reproducibility
seed = 9
np.random.seed(seed)

In [8]:
def create_model(optimizer='adam'):
    
    # create model
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=(28 * 28)))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.15))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.15))
    model.add(Dense(10, activation='softmax'))
    
    # Compile model
    model.compile(
        optimizer = optimizer,
        loss = 'categorical_crossentropy',
        metrics = ['accuracy']
    )    

    return model

In [9]:
# create model
keras_classifier = KerasClassifier(
    build_fn = create_model, 
    verbose = 0
)

In [10]:
grid = GridSearchCV(
    estimator = keras_classifier, 
    param_grid = dict(
#         epochs = [5, 10, 15],
#         batch_size = [5, 10, 20],
#         optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
        epochs = [5, 15],
        batch_size = [10, 20],
#         optimizer = ['SGD', 'Adagrad', 'Adam', 'Nadam']
        optimizer = ['RMSprop', 'Adadelta', 'Adamax']
    ),
    cv = 10,
    n_jobs=1, 
    verbose=2
)

In [11]:
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 10 folds for each of 12 candidates, totalling 120 fits
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   5.0s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    5.2s remaining:    0.0s


[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   4.8s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   4.7s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   5.5s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   6.0s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   5.6s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   5.0s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] ....... epochs=5, optimizer=RMSprop, batch_size=10, total=   5.0s
[CV] epochs=5, optimizer=RMSprop, batch_size=10 ......................
[CV] .

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed: 21.6min finished


Best: 0.944400 using {'epochs': 15, 'optimizer': 'RMSprop', 'batch_size': 10}
0.930400 (0.011377) with: {'epochs': 5, 'optimizer': 'RMSprop', 'batch_size': 10}
0.911000 (0.013921) with: {'epochs': 5, 'optimizer': 'Adadelta', 'batch_size': 10}
0.921800 (0.009271) with: {'epochs': 5, 'optimizer': 'Adamax', 'batch_size': 10}
0.944400 (0.008890) with: {'epochs': 15, 'optimizer': 'RMSprop', 'batch_size': 10}
0.933800 (0.010898) with: {'epochs': 15, 'optimizer': 'Adadelta', 'batch_size': 10}
0.941600 (0.011552) with: {'epochs': 15, 'optimizer': 'Adamax', 'batch_size': 10}
0.928200 (0.012147) with: {'epochs': 5, 'optimizer': 'RMSprop', 'batch_size': 20}
0.909600 (0.010500) with: {'epochs': 5, 'optimizer': 'Adadelta', 'batch_size': 20}
0.920200 (0.010371) with: {'epochs': 5, 'optimizer': 'Adamax', 'batch_size': 20}
0.937000 (0.010440) with: {'epochs': 15, 'optimizer': 'RMSprop', 'batch_size': 20}
0.932600 (0.008857) with: {'epochs': 15, 'optimizer': 'Adadelta', 'batch_size': 20}
0.940600 (0.01