In [1]:
import numpy as np
import matplotlib.pyplot as plt
import time
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation
from keras.optimizers import SGD, Adam, RMSprop
from keras.utils import np_utils
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow import set_random_seed
from sklearn.grid_search import GridSearchCV

Using TensorFlow backend.


In [2]:
np.random.seed(2015)
set_random_seed(2015)

In [3]:
# Function to create model used by KerasClassifier
def create_model(activation="relu",
                 width_hidden=128,
                 dropout=0.3):
    # Define model
    model = Sequential([
        Dense(width_hidden, input_dim=784), Activation(activation), Dropout(dropout),
        Dense(width_hidden), Activation(activation), Dropout(dropout),
        Dense(10), Activation('softmax')
    ])
    # Compile model
    model.compile(loss='categorical_crossentropy',
              optimizer=SGD(),
              metrics=['accuracy'])
    return model

In [4]:
# Load data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

In [5]:
# Preprocessing
X_train = X_train.reshape(60000, 784)
X_test  = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test  = X_test.astype('float32')
X_train /= 255
X_test  /= 255
print(X_train.shape[0], "training samples")
print(X_test.shape[0], "testing samples")

60000 training samples
10000 testing samples


In [6]:
# Convert labels to one-hot coding
# e.g. 1 -> array([ 0.,  1.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.])
y_train = np_utils.to_categorical(y_train, 10)
y_test = np_utils.to_categorical(y_test, 10)

In [7]:
time_start = time.clock()

In [8]:
# Default hyperparameters
batch_size = 128
nb_epoch = 200

In [9]:
# Create model
model = KerasClassifier(build_fn=create_model,
                        nb_epoch=nb_epoch,
                        batch_size=batch_size,
                        verbose=0)
# Grid search hyperparameters
activations = ["softplus", "softsign", "relu", "tanh", "sigmoid", "linear"]
dropouts = [0.1, 0.2, 0.3, 0.4]
param_grid = dict(activation=activations, dropout=dropouts)

In [10]:
print("Performing grid search ...")
grid = GridSearchCV(estimator=model,
                    param_grid=param_grid,
                    verbose=10)
grid_result = grid.fit(X_train, y_train,)
print("... Done!")

Performing grid search ...
Fitting 3 folds for each of 24 candidates, totalling 72 fits
[CV] activation=softplus, dropout=0.1 ................................
[CV] ....... activation=softplus, dropout=0.1, score=0.891650 -  17.5s
[CV] activation=softplus, dropout=0.1 ................................


[Parallel(n_jobs=1)]: Done   1 tasks       | elapsed:   17.5s


[CV] ....... activation=softplus, dropout=0.1, score=0.887350 -  17.7s
[CV] activation=softplus, dropout=0.1 ................................
[CV] ....... activation=softplus, dropout=0.1, score=0.895000 -  19.8s
[CV] activation=softplus, dropout=0.2 ................................
[CV] ....... activation=softplus, dropout=0.2, score=0.879150 -  23.3s
[CV] activation=softplus, dropout=0.2 ................................


[Parallel(n_jobs=1)]: Done   4 tasks       | elapsed:  1.3min


[CV] ....... activation=softplus, dropout=0.2, score=0.877900 -  21.7s
[CV] activation=softplus, dropout=0.2 ................................
[CV] ....... activation=softplus, dropout=0.2, score=0.885300 -  21.8s
[CV] activation=softplus, dropout=0.3 ................................
[CV] ....... activation=softplus, dropout=0.3, score=0.865050 -  21.6s
[CV] activation=softplus, dropout=0.3 ................................


[Parallel(n_jobs=1)]: Done   7 tasks       | elapsed:  2.4min


[CV] ....... activation=softplus, dropout=0.3, score=0.864300 -  19.9s
[CV] activation=softplus, dropout=0.3 ................................
[CV] ....... activation=softplus, dropout=0.3, score=0.875450 -  19.6s
[CV] activation=softplus, dropout=0.4 ................................
[CV] ....... activation=softplus, dropout=0.4, score=0.852800 -  19.6s
[CV] activation=softplus, dropout=0.4 ................................
[CV] ....... activation=softplus, dropout=0.4, score=0.847500 -  19.3s
[CV] activation=softplus, dropout=0.4 ................................
[CV] ....... activation=softplus, dropout=0.4, score=0.854550 -  18.7s
[CV] activation=softsign, dropout=0.1 ................................


[Parallel(n_jobs=1)]: Done  12 tasks       | elapsed:  4.0min


[CV] ....... activation=softsign, dropout=0.1, score=0.903850 -  17.1s
[CV] activation=softsign, dropout=0.1 ................................
[CV] ....... activation=softsign, dropout=0.1, score=0.897950 -  17.3s
[CV] activation=softsign, dropout=0.1 ................................
[CV] ....... activation=softsign, dropout=0.1, score=0.903100 -  17.3s
[CV] activation=softsign, dropout=0.2 ................................
[CV] ....... activation=softsign, dropout=0.2, score=0.901100 -  17.3s
[CV] activation=softsign, dropout=0.2 ................................
[CV] ....... activation=softsign, dropout=0.2, score=0.892800 -  17.4s
[CV] activation=softsign, dropout=0.2 ................................


[Parallel(n_jobs=1)]: Done  17 tasks       | elapsed:  5.5min


[CV] ....... activation=softsign, dropout=0.2, score=0.902050 -  17.8s
[CV] activation=softsign, dropout=0.3 ................................
[CV] ....... activation=softsign, dropout=0.3, score=0.900750 -  18.3s
[CV] activation=softsign, dropout=0.3 ................................
[CV] ....... activation=softsign, dropout=0.3, score=0.895400 -  21.1s
[CV] activation=softsign, dropout=0.3 ................................
[CV] ....... activation=softsign, dropout=0.3, score=0.902000 -  18.1s
[CV] activation=softsign, dropout=0.4 ................................
[CV] ....... activation=softsign, dropout=0.4, score=0.896600 -  18.4s
[CV] activation=softsign, dropout=0.4 ................................
[CV] ....... activation=softsign, dropout=0.4, score=0.892900 -  18.7s
[CV] activation=softsign, dropout=0.4 ................................
[CV] ....... activation=softsign, dropout=0.4, score=0.897750 -  18.1s
[CV] activation=relu, dropout=0.1 ....................................


[Parallel(n_jobs=1)]: Done  24 tasks       | elapsed:  7.7min


[CV] ........... activation=relu, dropout=0.1, score=0.917650 -  17.9s
[CV] activation=relu, dropout=0.1 ....................................
[CV] ........... activation=relu, dropout=0.1, score=0.916650 -  17.9s
[CV] activation=relu, dropout=0.1 ....................................
[CV] ........... activation=relu, dropout=0.1, score=0.922100 -  20.9s
[CV] activation=relu, dropout=0.2 ....................................
[CV] ........... activation=relu, dropout=0.2, score=0.915000 -  20.5s
[CV] activation=relu, dropout=0.2 ....................................
[CV] ........... activation=relu, dropout=0.2, score=0.911800 -  22.4s
[CV] activation=relu, dropout=0.2 ....................................
[CV] ........... activation=relu, dropout=0.2, score=0.918000 -  21.2s
[CV] activation=relu, dropout=0.3 ....................................
[CV] ........... activation=relu, dropout=0.3, score=0.913200 -  18.8s
[CV] activation=relu, dropout=0.3 ....................................


[Parallel(n_jobs=1)]: Done  31 tasks       | elapsed: 10.0min


[CV] ........... activation=relu, dropout=0.3, score=0.907700 -  18.9s
[CV] activation=relu, dropout=0.3 ....................................
[CV] ........... activation=relu, dropout=0.3, score=0.914250 -  20.5s
[CV] activation=relu, dropout=0.4 ....................................
[CV] ........... activation=relu, dropout=0.4, score=0.908200 -  22.3s
[CV] activation=relu, dropout=0.4 ....................................
[CV] ........... activation=relu, dropout=0.4, score=0.904800 -  21.3s
[CV] activation=relu, dropout=0.4 ....................................
[CV] ........... activation=relu, dropout=0.4, score=0.911750 -  21.2s
[CV] activation=tanh, dropout=0.1 ....................................
[CV] ........... activation=tanh, dropout=0.1, score=0.911200 -  20.9s
[CV] activation=tanh, dropout=0.1 ....................................
[CV] ........... activation=tanh, dropout=0.1, score=0.906150 -  21.1s
[CV] activation=tanh, dropout=0.1 ....................................
[CV] .

[Parallel(n_jobs=1)]: Done  40 tasks       | elapsed: 13.2min


[CV] ........... activation=tanh, dropout=0.2, score=0.903400 -  19.7s
[CV] activation=tanh, dropout=0.2 ....................................
[CV] ........... activation=tanh, dropout=0.2, score=0.910100 -  19.8s
[CV] activation=tanh, dropout=0.3 ....................................
[CV] ........... activation=tanh, dropout=0.3, score=0.908400 -  19.8s
[CV] activation=tanh, dropout=0.3 ....................................
[CV] ........... activation=tanh, dropout=0.3, score=0.902000 -  19.9s
[CV] activation=tanh, dropout=0.3 ....................................
[CV] ........... activation=tanh, dropout=0.3, score=0.909100 -  20.2s
[CV] activation=tanh, dropout=0.4 ....................................
[CV] ........... activation=tanh, dropout=0.4, score=0.905350 -  20.1s
[CV] activation=tanh, dropout=0.4 ....................................
[CV] ........... activation=tanh, dropout=0.4, score=0.900750 -  20.3s
[CV] activation=tanh, dropout=0.4 ....................................
[CV] .

[Parallel(n_jobs=1)]: Done  49 tasks       | elapsed: 16.2min


[CV] ........ activation=sigmoid, dropout=0.1, score=0.635950 -  20.2s
[CV] activation=sigmoid, dropout=0.1 .................................
[CV] ........ activation=sigmoid, dropout=0.1, score=0.632050 -  21.0s
[CV] activation=sigmoid, dropout=0.2 .................................
[CV] ........ activation=sigmoid, dropout=0.2, score=0.669350 -  21.2s
[CV] activation=sigmoid, dropout=0.2 .................................
[CV] ........ activation=sigmoid, dropout=0.2, score=0.635450 -  20.6s
[CV] activation=sigmoid, dropout=0.2 .................................
[CV] ........ activation=sigmoid, dropout=0.2, score=0.625500 -  20.8s
[CV] activation=sigmoid, dropout=0.3 .................................
[CV] ........ activation=sigmoid, dropout=0.3, score=0.607500 -  22.6s
[CV] activation=sigmoid, dropout=0.3 .................................
[CV] ........ activation=sigmoid, dropout=0.3, score=0.610800 -  22.2s
[CV] activation=sigmoid, dropout=0.3 .................................
[CV] .

[Parallel(n_jobs=1)]: Done  60 tasks       | elapsed: 20.4min


[CV] ......... activation=linear, dropout=0.1, score=0.908500 -  26.9s
[CV] activation=linear, dropout=0.1 ..................................
[CV] ......... activation=linear, dropout=0.1, score=0.904400 -  23.3s
[CV] activation=linear, dropout=0.1 ..................................
[CV] ......... activation=linear, dropout=0.1, score=0.908550 -  25.5s
[CV] activation=linear, dropout=0.2 ..................................
[CV] ......... activation=linear, dropout=0.2, score=0.905950 -  26.2s
[CV] activation=linear, dropout=0.2 ..................................
[CV] ......... activation=linear, dropout=0.2, score=0.901100 -  27.3s
[CV] activation=linear, dropout=0.2 ..................................
[CV] ......... activation=linear, dropout=0.2, score=0.908150 -  28.0s
[CV] activation=linear, dropout=0.3 ..................................
[CV] ......... activation=linear, dropout=0.3, score=0.907200 -  27.4s
[CV] activation=linear, dropout=0.3 ..................................
[CV] .

[Parallel(n_jobs=1)]: Done  71 tasks       | elapsed: 25.1min


[CV] ......... activation=linear, dropout=0.4, score=0.908350 -  24.8s


[Parallel(n_jobs=1)]: Done  72 out of  72 | elapsed: 25.6min finished


... Done!


In [11]:
time_end = time.clock()

In [12]:
duration = time_end - time_start
print("Time to train")
print("Time elapsed in seconds : %.0f" % duration)
print("Time elapsed in minutes : %.1f" % (duration/60))
print("Time elapsed in hours   : %.1f" % (duration/3600))

Time to train
Time elapsed in seconds : 1570
Time elapsed in minutes : 26.2
Time elapsed in hours   : 0.4


In [13]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
grid_result.grid_scores_

Best: 0.918800 using {'activation': 'relu', 'dropout': 0.1}


[mean: 0.89133, std: 0.00313, params: {'activation': 'softplus', 'dropout': 0.1},
 mean: 0.88078, std: 0.00323, params: {'activation': 'softplus', 'dropout': 0.2},
 mean: 0.86827, std: 0.00509, params: {'activation': 'softplus', 'dropout': 0.3},
 mean: 0.85162, std: 0.00300, params: {'activation': 'softplus', 'dropout': 0.4},
 mean: 0.90163, std: 0.00262, params: {'activation': 'softsign', 'dropout': 0.1},
 mean: 0.89865, std: 0.00415, params: {'activation': 'softsign', 'dropout': 0.2},
 mean: 0.89938, std: 0.00286, params: {'activation': 'softsign', 'dropout': 0.3},
 mean: 0.89575, std: 0.00207, params: {'activation': 'softsign', 'dropout': 0.4},
 mean: 0.91880, std: 0.00237, params: {'activation': 'relu', 'dropout': 0.1},
 mean: 0.91493, std: 0.00253, params: {'activation': 'relu', 'dropout': 0.2},
 mean: 0.91172, std: 0.00287, params: {'activation': 'relu', 'dropout': 0.3},
 mean: 0.90825, std: 0.00284, params: {'activation': 'relu', 'dropout': 0.4},
 mean: 0.90902, std: 0.00212, pa