### Hyperparameter searching with GridSearch
## Example application of GridSearch with Keras

In [50]:
import os
os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # to disable GPU

import pandas as pd
from numpy import random
from keras.activations import relu, elu
from tensorflow.keras.optimizers import RMSprop,SGD
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from scikeras.wrappers import KerasClassifier, KerasRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from tabulate import tabulate
import tensorflow as tf


The dataset includes data from 768 women with 8 characteristics, in particular:

    Number of times pregnant
    Plasma glucose concentration a 2 hours in an oral glucose tolerance test
    Diastolic blood pressure (mm Hg)
    Triceps skin fold thickness (mm)
    2-Hour serum insulin (mu U/ml)
    Body mass index (weight in kg/(height in m)^2)
    Diabetes pedigree function
    Age (years)

The last column of the dataset indicates if the person has been diagnosed with diabetes (1) or not (0)

In [30]:
df  = pd.read_csv("./DAL_workshop_datasets/pima-indians-diabetes.csv", header = 0)

X = df.iloc[:,0:8]
y = df.iloc[:,8]

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [31]:
def model_diabetes(optimizer='Adam', init='uniform', activation='relu', dropout = 0.5, layers=16):
    model = Sequential()
    model.add(Dense(12, input_dim=8, activation=activation,kernel_initializer=init))
    model.add(Dropout(dropout))
    model.add(Dense(layers, activation=activation))
    model.add(Dropout(dropout))
    model.add(Dense(1, activation=activation))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [32]:
param_grid = {
    'layers': [64,16],
    'activation': ['relu', 'elu'],
    'optimizer' : ['Adam'],
    'dropout'   : [0.5,0.4,0.3]
}
batch_size = 10

In [33]:
# fix random seed for reproducibility
seed = 7
random.seed(seed)
# possible optimizers = ['SGD','RMSprop','Adagrad','Adadelta','Adam','Adamax','Nadam']     
# possible activations 'softmax','softplus','softsign','relu','tanh','sigmoid','hard_sigmoid','linear'
# init = ['uniform','lecun_uniform','normal','identity','orthogonal','zero','one','glorot_normal','glorot_uniform', 'he_normal', 'he_uniform']
epochs = [100]
init = ['normal']

In [34]:
model = KerasClassifier(model =model_diabetes, verbose=0, activation='relu', layers=64, 
                        dropout = 0.1, optimizer='Adam')  # 

grid = GridSearchCV(cv=3, estimator=model, param_grid=param_grid, n_jobs=1, verbose=1)
grid_result = grid.fit(X, y,epochs = 50)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

results = pd.DataFrame({'Mean':means[:],'std':stds[:],'param':params[:]})
results = results.sort_values(by=['Mean'],ascending=[0])
print(tabulate(results, headers='keys', tablefmt='psql'))


Fitting 3 folds for each of 12 candidates, totalling 36 fits
Best: 0.772135 using {'activation': 'relu', 'dropout': 0.4, 'layers': 64, 'optimizer': 'Adam'}
+----+----------+------------+---------------------------------------------------------------------------+
|    |     Mean |        std | param                                                                     |
|----+----------+------------+---------------------------------------------------------------------------|
|  2 | 0.772135 | 0.0202557  | {'activation': 'relu', 'dropout': 0.4, 'layers': 64, 'optimizer': 'Adam'} |
|  4 | 0.770833 | 0.0217101  | {'activation': 'relu', 'dropout': 0.3, 'layers': 64, 'optimizer': 'Adam'} |
|  3 | 0.761719 | 0.00552427 | {'activation': 'relu', 'dropout': 0.4, 'layers': 16, 'optimizer': 'Adam'} |
| 10 | 0.761719 | 0.0146158  | {'activation': 'elu', 'dropout': 0.3, 'layers': 64, 'optimizer': 'Adam'}  |
| 11 | 0.761719 | 0.0223261  | {'activation': 'elu', 'dropout': 0.3, 'layers': 16, 'optimizer':

2nd Round use best parameters from 1st round and try some new ones

In [53]:

param_grid = {
    'layers':[256,128,64,32,16,8],
    'activation': ['elu','relu'],
    'optimizer' : ['Adam','RMSprop','SGD'],
    'dropout': [0.5,0.4,0.3,0.2,0.1]
}

In [54]:
model = KerasClassifier(model =model_diabetes, verbose=0, activation='elu', layers=64, 
                        dropout = 0.1, optimizer='Adam')  # 

grid = GridSearchCV(cv=3, estimator=model, param_grid=param_grid, n_jobs=1, verbose=0)
grid_result = grid.fit(X, y,epochs = 50)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

results = pd.DataFrame({'Mean':means[:],'std':stds[:],'parameters':params[:]})
results = results.sort_values(by=['Mean'],ascending=[0])
print(tabulate(results, headers='keys', tablefmt='psql'))

Best: 0.783854 using {'activation': 'relu', 'dropout': 0.2, 'layers': 64, 'optimizer': 'RMSprop'}
+-----+----------+------------+-------------------------------------------------------------------------------+
|     |     Mean |        std | parameters                                                                    |
|-----+----------+------------+-------------------------------------------------------------------------------|
| 151 | 0.783854 | 0.00663935 | {'activation': 'relu', 'dropout': 0.2, 'layers': 64, 'optimizer': 'RMSprop'}  |
|  99 | 0.782552 | 0.0157331  | {'activation': 'relu', 'dropout': 0.5, 'layers': 32, 'optimizer': 'Adam'}     |
| 153 | 0.777344 | 0.00843846 | {'activation': 'relu', 'dropout': 0.2, 'layers': 32, 'optimizer': 'Adam'}     |
|  60 | 0.777344 | 0.0127578  | {'activation': 'elu', 'dropout': 0.2, 'layers': 64, 'optimizer': 'Adam'}      |
| 172 | 0.776042 | 0.014382   | {'activation': 'relu', 'dropout': 0.1, 'layers': 32, 'optimizer': 'RMSprop'}  |
| 167 

In [None]:
#EOF