# Keras, GridSearchCV

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf

from keras.models import Sequential
from keras.layers import Dense , Dropout
from keras.optimizers import RMSprop

from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV

Using TensorFlow backend.


## Data load & transform

In [2]:
train = (pd.read_csv('./input/train.csv')).astype('float32')
print "train shape:", train.shape

train shape: (42000, 785)


In [3]:
X, Y = train.drop('label', axis=1).values, train['label'].values

print "X shape:", X.shape
print "Y shape:", Y.shape

X shape: (42000, 784)
Y shape: (42000,)


In [4]:
# Reduce datapoints
X = X[:5000,:]
Y = Y[:5000]

print "X shape:", X.shape
print "Y shape:", Y.shape

X shape: (500, 784)
Y shape: (500,)


In [5]:
# Normalization
X = X / 255

In [6]:
from keras.utils.np_utils import to_categorical
Y = to_categorical(Y, 10)

print "Y shape:", Y.shape

Y shape: (500, 10)


## NN

In [7]:
# fix random seed for reproducibility
seed = 9
np.random.seed(seed)

In [8]:
def create_model(lr, rho, epsilon, decay):
    
    # create model
    model = Sequential()
    model.add(Dense(64, activation='relu', input_dim=(28 * 28)))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.15))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.15))
    model.add(Dense(10, activation='softmax'))
    
    
    
    # Compile model
    model.compile(
        optimizer=RMSprop(lr=lr, rho=rho, epsilon=epsilon, decay=decay),
        loss = 'categorical_crossentropy',
        metrics = ['accuracy']
    )    

    return model

In [9]:
# create model
keras_classifier = KerasClassifier(
    build_fn = create_model, 
    verbose = 0
)

In [10]:
grid = GridSearchCV(
    estimator = keras_classifier, 
    param_grid = dict(
#         epochs = [5, 10, 15],
#         batch_size = [5, 10, 20],
#         optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
        epochs = [15],
        batch_size = [10],
#         optimizer = ['SGD', 'Adagrad', 'Adam', 'Nadam']
#         optimizer = ['RMSprop']
        lr = [0.00001, 0.001, 0.1],
        rho = [0.5, 0.9, 1.3],
        epsilon = [1e-07, 1e-08, 1e-09],
        decay = [0.0, 0.2, 0.5]
    ),
    cv = 10,
    n_jobs=1, 
    verbose=2
)

In [11]:
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 10 folds for each of 81 candidates, totalling 810 fits
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.9s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.9s remaining:    0.0s


[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.5s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.5s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.5s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.5s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.5s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5 
[CV]  decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5, total=   1.6s
[CV] decay=0.0, epsilon=1e-07, batch_size=10, epochs=15, lr=1e-05, rho=0.5

[Parallel(n_jobs=1)]: Done 810 out of 810 | elapsed: 184.0min finished


Best: 0.868000 using {'decay': 0.0, 'epsilon': 1e-09, 'batch_size': 10, 'epochs': 15, 'lr': 0.001, 'rho': 0.9}
0.206000 (0.088566) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 1e-05, 'rho': 0.5}
0.198000 (0.070114) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 1e-05, 'rho': 0.9}
0.122000 (0.056178) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 1e-05, 'rho': 1.3}
0.840000 (0.044721) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 0.001, 'rho': 0.5}
0.848000 (0.070541) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 0.001, 'rho': 0.9}
0.082000 (0.034000) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 0.001, 'rho': 1.3}
0.106000 (0.031048) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'epochs': 15, 'lr': 0.1, 'rho': 0.5}
0.092000 (0.039192) with: {'decay': 0.0, 'epsilon': 1e-07, 'batch_size': 10, 'e