In [67]:
import numpy
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense, Dropout, BatchNormalization
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from keras.optimizers import SGD, Adamax
import pandas as pd
from sklearn.model_selection import cross_val_score
from keras.callbacks import ModelCheckpoint

In [174]:
# Function to create model, required for KerasClassifier
#def create_model(learn_rate=0.01, momentum=0):# include this parameter for optimizer tuning
#def create_model(activation='relu'):
#def create_model(init_mode='uniform'):
#def create_model(dropout_rate=0.0, weight_constraint=0):
def create_model(neurons=500, neurons2=100,  neurons3=50):   
    
    optimizer = Adamax(lr=0.001)
    model = Sequential()
    model.add(Dense(neurons, input_dim=103,kernel_initializer='uniform', activation='relu'))
    model.add(Dropout(0.45))
    model.add(BatchNormalization())
    model.add(Dense(neurons2, kernel_initializer='uniform',activation='relu'))
    model.add(Dropout(0.4))
    model.add(BatchNormalization())
    model.add(Dense(neurons3, kernel_initializer='uniform',activation='relu'))
    model.add(Dropout(0.35))
    model.add(BatchNormalization())
    model.add(Dense(9, kernel_initializer='uniform',activation='softmax'))
    # Compile model
    #optimizer = SGD(lr=learn_rate, momentum=momentum)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy']) # set parameter for opt tuning
    return model

# Batch and Epochs Tuning

In [3]:
# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)
# load dataset
datadir= './task/'
df_train = pd.read_csv( datadir + 'trainData.csv')   
df_test = pd.read_csv( datadir + 'testData.csv')

feature_cols = df_train.columns[0:-1]
target_col = df_train.columns[-1]
X=df_train[feature_cols].values
y=df_train[target_col].values
encoder = LabelEncoder()
encoder.fit(y)
y = encoder.transform(y)
# convert integers to dummy variables 
y = np_utils.to_categorical(y)


In [3]:
model = KerasClassifier(build_fn=create_model, verbose=0)
# define the grid search parameters
batch_size = [16, 32, 64] # if not in the interval continue to grid seacrh
epochs = [10, 20, 30]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid)
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.801109 using {'batch_size': 64, 'epochs': 20}
0.800430 (0.004486) with: {'batch_size': 16, 'epochs': 10}
0.796325 (0.006982) with: {'batch_size': 16, 'epochs': 20}
0.793384 (0.002247) with: {'batch_size': 16, 'epochs': 30}
0.799363 (0.001789) with: {'batch_size': 32, 'epochs': 10}
0.798442 (0.003209) with: {'batch_size': 32, 'epochs': 20}
0.794887 (0.002170) with: {'batch_size': 32, 'epochs': 30}
0.799008 (0.002625) with: {'batch_size': 64, 'epochs': 10}
0.801109 (0.001831) with: {'batch_size': 64, 'epochs': 20}
0.794046 (0.002026) with: {'batch_size': 64, 'epochs': 30}


# Optimizer Gridsearch

build fn create model is called via an optimizer parameter. 

In [24]:
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(optimizer=optimizer)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_log_loss')
grid_result = grid.fit(X, y)

print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.524376 using {'optimizer': 'Adamax'}
-0.638321 (0.005728) with: {'optimizer': 'SGD'}
-0.541467 (0.019526) with: {'optimizer': 'RMSprop'}
-0.545167 (0.002207) with: {'optimizer': 'Adagrad'}
-0.537486 (0.001481) with: {'optimizer': 'Adadelta'}
-0.531548 (0.000412) with: {'optimizer': 'Adam'}
-0.524376 (0.000285) with: {'optimizer': 'Adamax'}
-0.577695 (0.008125) with: {'optimizer': 'Nadam'}


SGD

In [32]:
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9] #0.9 a popular value in practice
param_grid = dict(learn_rate=learn_rate, momentum=momentum)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_log_loss')
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.531190 using {'learn_rate': 0.01, 'momentum': 0.9}
-0.945211 (0.012189) with: {'learn_rate': 0.001, 'momentum': 0.0}
-0.867184 (0.008023) with: {'learn_rate': 0.001, 'momentum': 0.2}
-0.806572 (0.004293) with: {'learn_rate': 0.001, 'momentum': 0.4}
-0.748517 (0.005261) with: {'learn_rate': 0.001, 'momentum': 0.6}
-0.688189 (0.003425) with: {'learn_rate': 0.001, 'momentum': 0.8}
-0.637929 (0.004332) with: {'learn_rate': 0.001, 'momentum': 0.9}
-0.637241 (0.003389) with: {'learn_rate': 0.01, 'momentum': 0.0}
-0.621973 (0.002175) with: {'learn_rate': 0.01, 'momentum': 0.2}
-0.609467 (0.003353) with: {'learn_rate': 0.01, 'momentum': 0.4}
-0.577011 (0.001034) with: {'learn_rate': 0.01, 'momentum': 0.6}
-0.560955 (0.004908) with: {'learn_rate': 0.01, 'momentum': 0.8}
-0.531190 (0.000946) with: {'learn_rate': 0.01, 'momentum': 0.9}
-0.539675 (0.003205) with: {'learn_rate': 0.1, 'momentum': 0.0}
-0.541772 (0.008990) with: {'learn_rate': 0.1, 'momentum': 0.2}
-0.553682 (0.015555) with:

# Tune the Number of Neurons in the Hidden Layer

In [25]:
model = KerasClassifier(build_fn=create_model,epochs=20, batch_size=100, verbose=0)
neurons = [600,800]
neurons2=[300,200]
epochs = [8, 17, 23]
param_grid = dict(neurons=neurons, neurons2=neurons2, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_log_loss')
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.508739 using {'epochs': 17, 'neurons': 600, 'neurons2': 200}
-0.522193 (0.006182) with: {'epochs': 8, 'neurons': 600, 'neurons2': 300}
-0.523965 (0.001720) with: {'epochs': 8, 'neurons': 600, 'neurons2': 200}
-0.513871 (0.001651) with: {'epochs': 8, 'neurons': 800, 'neurons2': 300}
-0.517763 (0.001434) with: {'epochs': 8, 'neurons': 800, 'neurons2': 200}
-0.516376 (0.007269) with: {'epochs': 17, 'neurons': 600, 'neurons2': 300}
-0.508739 (0.003769) with: {'epochs': 17, 'neurons': 600, 'neurons2': 200}
-0.525197 (0.001602) with: {'epochs': 17, 'neurons': 800, 'neurons2': 300}
-0.510729 (0.001342) with: {'epochs': 17, 'neurons': 800, 'neurons2': 200}
-0.550526 (0.008768) with: {'epochs': 23, 'neurons': 600, 'neurons2': 300}
-0.539632 (0.011313) with: {'epochs': 23, 'neurons': 600, 'neurons2': 200}
-0.566808 (0.011942) with: {'epochs': 23, 'neurons': 800, 'neurons2': 300}
-0.545097 (0.005798) with: {'epochs': 23, 'neurons': 800, 'neurons2': 200}


In [5]:
model = KerasClassifier(build_fn=create_model,epochs=20, batch_size=96, verbose=0)
neurons = [600,650]
neurons2=[200,100,75]
epochs = [14, 17, 20]
param_grid = dict(neurons=neurons, neurons2=neurons2, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_log_loss')
grid_result = grid.fit(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.505109 using {'epochs': 17, 'neurons': 650, 'neurons2': 75}
-0.510174 (0.003086) with: {'epochs': 14, 'neurons': 600, 'neurons2': 200}
-0.509185 (0.004402) with: {'epochs': 14, 'neurons': 600, 'neurons2': 100}
-0.509345 (0.001641) with: {'epochs': 14, 'neurons': 600, 'neurons2': 75}
-0.507119 (0.003324) with: {'epochs': 14, 'neurons': 650, 'neurons2': 200}
-0.510551 (0.002601) with: {'epochs': 14, 'neurons': 650, 'neurons2': 100}
-0.508348 (0.001361) with: {'epochs': 14, 'neurons': 650, 'neurons2': 75}
-0.522969 (0.007784) with: {'epochs': 17, 'neurons': 600, 'neurons2': 200}
-0.508614 (0.001468) with: {'epochs': 17, 'neurons': 600, 'neurons2': 100}
-0.508832 (0.003991) with: {'epochs': 17, 'neurons': 600, 'neurons2': 75}
-0.512417 (0.009104) with: {'epochs': 17, 'neurons': 650, 'neurons2': 200}
-0.506792 (0.004205) with: {'epochs': 17, 'neurons': 650, 'neurons2': 100}
-0.505109 (0.003746) with: {'epochs': 17, 'neurons': 650, 'neurons2': 75}
-0.526323 (0.013259) with: {'epochs

# Tune the Neuron Activation Function (relu generally)

In [None]:
#activation = ['softmax', 'softplus', 'softsign', 'relu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear']
#param_grid = dict(activation=activation)

# Tune Dropout Regularization

weight_constraint = [1, 2, 3, 4, 5]
dropout_rate = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
param_grid = dict(dropout_rate=dropout_rate, weight_constraint=weight_constraint)

In [6]:
model = KerasClassifier(build_fn=create_model,epochs=17, batch_size=96, verbose=0)
neurons = [1000,1200]
neurons2=[100,200]
neurons3=[50,75]
epochs = [;19,22]
batch_size = [128,256]
param_grid = dict(neurons=neurons, neurons2=neurons2,neurons3=neurons3, epochs=epochs,batch_size=batch_size)
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='neg_log_loss')
grid_result = grid.fit""(X, y)
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: -0.503397 using {'batch_size': 256, 'epochs': 19, 'neurons': 1000, 'neurons2': 100}
-0.504018 (0.002077) with: {'batch_size': 128, 'epochs': 19, 'neurons': 900, 'neurons2': 80}
-0.511930 (0.006733) with: {'batch_size': 128, 'epochs': 19, 'neurons': 900, 'neurons2': 100}
-0.504600 (0.001919) with: {'batch_size': 128, 'epochs': 19, 'neurons': 1000, 'neurons2': 80}
-0.506462 (0.003359) with: {'batch_size': 128, 'epochs': 19, 'neurons': 1000, 'neurons2': 100}
-0.511937 (0.001237) with: {'batch_size': 128, 'epochs': 22, 'neurons': 900, 'neurons2': 80}
-0.514222 (0.006187) with: {'batch_size': 128, 'epochs': 22, 'neurons': 900, 'neurons2': 100}
-0.511364 (0.004483) with: {'batch_size': 128, 'epochs': 22, 'neurons': 1000, 'neurons2': 80}
-0.520178 (0.007607) with: {'batch_size': 128, 'epochs': 22, 'neurons': 1000, 'neurons2': 100}
-0.505698 (0.001082) with: {'batch_size': 256, 'epochs': 19, 'neurons': 900, 'neurons2': 80}
-0.512080 (0.003767) with: {'batch_size': 256, 'epochs': 19, 'neu

after you discover / tune the hyperparameters of your deep learning networks 

create  the model and train and validate on valiation data save the best weights 
by watching the validation logarthmic loss ,monitoring

In [175]:
model =create_model(1000,100,30)

In [176]:
check=ModelCheckpoint(filepath='best.hd5',save_best_only=True,mode='min',monitor='val_loss',verbose=1)
from keras.callbacks import ReduceLROnPlateau

In [177]:
reducelr=ReduceLROnPlateau(mode='min',verbose=1,patience=1,factor=0.97) 

In [None]:
callbacks_list = [check,reducelr]
model.fit(X, y, validation_split=0.33, epochs=150, batch_size=256,callbacks=callbacks_list)

Train on 41458 samples, validate on 20420 samples
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150