# Tune the hyperparameters of a basic neural net.  We'll benchmark a standard grid search against a Bayesian approach.

In [1]:
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import numpy as np
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasClassifier
from keras.constraints import maxnorm
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax, Nadam
from keras import backend as K
from bayes_opt import BayesianOptimization

Using TensorFlow backend.


In [2]:
# Set pandas preferences
pd.options.display.max_columns=500
pd.options.display.max_colwidth=1000

In [3]:
# Set plot preferences
plt.style.use('dark_background')

In [4]:
# Fix the random seed for reproducability
seed = 0
np.random.seed(seed)

In [5]:
# Set the number of cross validation folds
cv = 3

In [6]:
# Load the data
df = pd.read_csv('data/pima-indians-diabetes.data.csv', header=None)

In [7]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [8]:
# Get the target and feature values
X = df.drop(8, axis=1).values

In [9]:
Y = df[8].values

In [10]:
# Function to create the model, as required by KerasClassifier
def create_model(learn_rate=0.01, init_mode='uniform', activation='relu', dropout_rate=0.0, weight_constraint=0, neurons=1, optimizer=SGD()):
    # Create the model
    model = Sequential()
    model.add(Dense(neurons, input_dim=8, kernel_initializer=init_mode, activation=activation))
    model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
    # compile the model.
    K.set_value(optimizer.lr, learn_rate)
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    
    return model

In [11]:
# Define the parameter grid for basic grid search cross validation
param_grid = {'batch_size': [10, 20, 40, 60, 80, 100],
              'epochs': [10, 50, 100],
              'optimizer': [SGD(), RMSprop(), Adagrad(), Adadelta(), Adam(), Adamax(), Nadam()],
              'learn_rate': [0.001, 0.01, 0.1, 0.2, 0.3],
              'init_mode': ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform'],
              'activation': ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear'],
              'dropout_rate': [0.0, 0.1, 0.2, 0.3, 0.4],
              'neurons': [1, 5, 10, 15, 20, 25, 30]}

In [21]:
# Test this in a grid search.  We'll tackle a Bayesian approach afterwards.
# Define the model
model = KerasClassifier(build_fn=create_model, verbose=0)

In [22]:
# Setup the grid
keras_grid_search_opt = GridSearchCV(estimator=model, param_grid=param_grid, verbose=1)

In [23]:
# Let's see how long this takes...

In [24]:
# Obviously, we need to restrict the space here!
keras_grid_search_opt = keras_grid_search_opt.fit(X, Y)

Fitting 3 folds for each of 1411200 candidates, totalling 4233600 fits


KeyboardInterrupt: 

In [78]:
# Now this is an interesting problem. How do I handle the categorical variables for Bayesian hyperparameter optimization? We'll have to encode them, I think.

In [50]:
optimizer = SGD()

In [44]:
optimizer

<keras.optimizers.SGD at 0x7f940a42a550>

In [45]:
optimizer = optimizer(lr=0.01)

TypeError: 'SGD' object is not callable

In [51]:
optimizer.lr

<tf.Variable 'SGD_1/lr:0' shape=() dtype=float32_ref>

In [48]:
optimizer.

<keras.optimizers.SGD at 0x7f940a42a550>

In [49]:
optimizer.lr

0.01

In [53]:
optimizer.lr.read_value()

<tf.Tensor 'read:0' shape=() dtype=float32>

In [57]:
optimizer.lr.assign(0.01)

<tf.Tensor 'Assign:0' shape=() dtype=float32_ref>

In [60]:
optimizer.lr.get_shape()

TensorShape([])

In [63]:
optimizer.lr.value()

<tf.Tensor 'SGD_1/lr/read:0' shape=() dtype=float32>

In [64]:
from keras import backend as K

In [65]:
K.get_value(optimizer.lr)

0.0099999998

In [66]:
K.set_value(optimizer.lr, 0.1)

In [67]:
K.get_value(optimizer.lr)

0.1