### Hyperparameter searching with GridSearch

In [1]:
import pandas as pd
from numpy import random
from keras.activations import relu, elu
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

Using TensorFlow backend.


The dataset includes data from 768 women with 8 characteristics, in particular:

    Number of times pregnant
    Plasma glucose concentration a 2 hours in an oral glucose tolerance test
    Diastolic blood pressure (mm Hg)
    Triceps skin fold thickness (mm)
    2-Hour serum insulin (mu U/ml)
    Body mass index (weight in kg/(height in m)^2)
    Diabetes pedigree function
    Age (years)

The last column of the dataset indicates if the person has been diagnosed with diabetes (1) or not (0)

In [2]:
df  = pd.read_csv("e:/$Notebooks/pima-indians-diabetes.csv", header = 0)

X = df.iloc[:,0:8]
y = df.iloc[:,8]

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [3]:
def model_diabetes(optimizer='Adam', init='uniform', activation='relu', dropout = 0.5, layer_size=16):
    
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation=activation,kernel_initializer=init))
    model.add(Dropout(dropout))
    model.add(Dense(layer_size, activation=activation))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation=activation))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [8]:
param_grid = {
    'layer_size': [64],
    'activation': ['relu', 'elu'],
    'optimizer' : ['Adam'],
    'dropout'   : [0.5,0.4,0.3]
}
batch_size = 10

In [9]:
# fix random seed for reproducibility
seed = 7
random.seed(seed)
# possible optimizers = ['SGD','RMSprop','Adagrad','Adadelta','Adam','Adamax','Nadam']     
# possible activations 'softmax','softplus','softsign','relu','tanh','sigmoid','hard_sigmoid','linear'
# init = ['uniform','lecun_uniform','normal','identity','orthogonal','zero','one','glorot_normal','glorot_uniform', 'he_normal', 'he_uniform']
epochs = [100]
init = ['normal']

In [10]:
m = KerasClassifier(build_fn=model_diabetes, verbose=0)  # to use it with scikit

grid = GridSearchCV(cv=3, estimator=m, param_grid=param_grid, n_jobs=1, verbose=0)
grid_result = grid.fit(X, y,epochs = 50)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

results = pd.DataFrame({'Mean':means[:],'std':stds[:],'param':params[:]})
results = results.sort_values(by=['Mean'],ascending=[0])
print(tabulate(results, headers='keys', tablefmt='psql'))

Best: 0.777344 using {'activation': 'elu', 'dropout': 0.5, 'layer_size': 64, 'optimizer': 'Adam'}
+----+----------+------------+-------------------------------------------------------------------------------+
|    |     Mean |        std | param                                                                         |
|----+----------+------------+-------------------------------------------------------------------------------|
|  3 | 0.777344 | 0.017758   | {'activation': 'elu', 'dropout': 0.5, 'layer_size': 64, 'optimizer': 'Adam'}  |
|  5 | 0.764323 | 0.0217101  | {'activation': 'elu', 'dropout': 0.3, 'layer_size': 64, 'optimizer': 'Adam'}  |
|  1 | 0.761719 | 0.019918   | {'activation': 'relu', 'dropout': 0.4, 'layer_size': 64, 'optimizer': 'Adam'} |
|  2 | 0.753906 | 0.00956832 | {'activation': 'relu', 'dropout': 0.3, 'layer_size': 64, 'optimizer': 'Adam'} |
|  0 | 0.744792 | 0.0102526  | {'activation': 'relu', 'dropout': 0.5, 'layer_size': 64, 'optimizer': 'Adam'} |
|  4 | 0.74088

2nd Round use best parameters from 1st round and try some new ones

In [19]:
param_grid = {
    'layer_size':[64,32],
    'activation': ['elu'],
    'optimizer' : ['Adam'],
    'dropout': [0.5]
}

In [20]:
grid = GridSearchCV(cv=3, estimator=m, param_grid=param_grid, n_jobs=1, verbose=0)
grid_result = grid.fit(X, y,epochs = 50)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

results = pd.DataFrame({'Mean':means[:],'std':stds[:],'parameters':params[:]})
results = results.sort_values(by=['Mean'],ascending=[0])
print(tabulate(results, headers='keys', tablefmt='psql'))

Best: 0.760417 using {'activation': 'elu', 'dropout': 0.5, 'layer_size': 64, 'optimizer': 'Adam'}
+----+----------+-----------+------------------------------------------------------------------------------+
|    |     Mean |       std | parameters                                                                   |
|----+----------+-----------+------------------------------------------------------------------------------|
|  0 | 0.760417 | 0.0243597 | {'activation': 'elu', 'dropout': 0.5, 'layer_size': 64, 'optimizer': 'Adam'} |
|  1 | 0.739583 | 0.0415036 | {'activation': 'elu', 'dropout': 0.5, 'layer_size': 32, 'optimizer': 'Adam'} |
+----+----------+-----------+------------------------------------------------------------------------------+
