In [2]:
# Use scikit-learn to grid search the batch size and epochs
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import GridSearchCV
from keras.models import Sequential
from keras.layers import Dense
from scikeras.wrappers import KerasClassifier
from keras.metrics import BinaryAccuracy, AUC
# Function to create model, required for KerasClassifier

## DL Model Creation

In [3]:
def create_model():
	# create model
	model = Sequential()
	model.add(Dense(12, input_shape=(8,), activation='relu'))
	model.add(Dense(1, activation='sigmoid'))
	# Compile model
	model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
	return model

## Data Preprocessing

In [4]:
# fix random seed for reproducibility
seed = 7
tf.random.set_seed(seed)
# load dataset
dataset = np.loadtxt("data.csv", delimiter=",")
# split into input (X) and output (Y) variables
X = dataset[:, 0:8]
Y = dataset[:, 8]

## Model Instatiation

In [5]:
from keras import backend as K


def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall


def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision


def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [6]:
# create model
model = KerasClassifier(model=create_model, verbose=0, metrics=['acc', f1_m, precision_m, recall_m], metrics__threshold=0.65)


### Results without using Cross Validated Hyperparameter Tuning

In [7]:
result = model.fit(X, Y, validation_split=0.3, epochs=10, verbose=0, batch_size=40)

In [8]:
predict = model.score(X,Y)
print(predict)

0.5416666666666666


## Hyperparameter tuning using GridSearchCV

In [9]:
# define the grid search parameters
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)

In [10]:
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {grid_result.score(X,Y)}"

Best: 0.704427 using {'batch_size': 40, 'epochs': 100}
0.537760 (0.021710) with: {'batch_size': 10, 'epochs': 10}
0.656250 (0.005524) with: {'batch_size': 10, 'epochs': 50}
0.696615 (0.003683) with: {'batch_size': 10, 'epochs': 100}
0.604167 (0.019488) with: {'batch_size': 20, 'epochs': 10}
0.651042 (0.029463) with: {'batch_size': 20, 'epochs': 50}
0.678385 (0.008027) with: {'batch_size': 20, 'epochs': 100}
0.606771 (0.016367) with: {'batch_size': 40, 'epochs': 10}
0.661458 (0.045143) with: {'batch_size': 40, 'epochs': 50}
0.704427 (0.022402) with: {'batch_size': 40, 'epochs': 100}
0.584635 (0.046256) with: {'batch_size': 60, 'epochs': 10}
0.638021 (0.020505) with: {'batch_size': 60, 'epochs': 50}
0.643229 (0.011201) with: {'batch_size': 60, 'epochs': 100}
0.533854 (0.126148) with: {'batch_size': 80, 'epochs': 10}
0.645833 (0.025780) with: {'batch_size': 80, 'epochs': 50}
0.622396 (0.074822) with: {'batch_size': 80, 'epochs': 100}
0.565104 (0.046146) with: {'batch_size': 100, 'epochs':

'The final score is: 0.7278645833333334'

### Hyperparameter tuning using RandomSearchCV

In [11]:
from sklearn.model_selection import RandomizedSearchCV

parameters = {'batch_size' :batch_size, 'epochs':epochs}
randm_src = RandomizedSearchCV(estimator=model, param_distributions = parameters, cv = 2, n_iter = 10, n_jobs=-1)
rand_result = randm_src.fit(X,Y)

In [12]:
# summarize results
print("Best: %f using %s" % (rand_result.best_score_, rand_result.best_params_))
means = rand_result.cv_results_['mean_test_score']
stds = rand_result.cv_results_['std_test_score']
params = rand_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {rand_result.score(X,Y)}"

Best: 0.703125 using {'epochs': 100, 'batch_size': 20}
0.703125 (0.015625) with: {'epochs': 100, 'batch_size': 20}
0.541667 (0.104167) with: {'epochs': 10, 'batch_size': 100}
0.589844 (0.069010) with: {'epochs': 50, 'batch_size': 100}
0.592448 (0.048177) with: {'epochs': 50, 'batch_size': 80}
0.619792 (0.026042) with: {'epochs': 100, 'batch_size': 100}
0.570312 (0.080729) with: {'epochs': 10, 'batch_size': 80}
0.677083 (0.002604) with: {'epochs': 50, 'batch_size': 60}
0.644531 (0.003906) with: {'epochs': 50, 'batch_size': 40}
0.674479 (0.018229) with: {'epochs': 50, 'batch_size': 20}
0.587240 (0.066406) with: {'epochs': 10, 'batch_size': 60}


'The final score is: 0.7513020833333334'

### Hyperparameter tuning using Nested CV --> KFold

In [15]:
from sklearn.model_selection import KFold

# configure the cross-validation procedure
cv = KFold(n_splits=3, shuffle=True, random_state=1)
# define search space
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=cv)
# execute search
result = search.fit(X, Y)

In [16]:
# summarize results
print("Best: %f using %s" % (result.best_score_, result.best_params_))
means = result.cv_results_['mean_test_score']
stds = result.cv_results_['std_test_score']
params = result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {result.score(X,Y)}"

Best: 0.690104 using {'batch_size': 10, 'epochs': 100}
0.640625 (0.009568) with: {'batch_size': 10, 'epochs': 10}
0.653646 (0.023073) with: {'batch_size': 10, 'epochs': 50}
0.690104 (0.004872) with: {'batch_size': 10, 'epochs': 100}
0.506510 (0.103070) with: {'batch_size': 20, 'epochs': 10}
0.660156 (0.052698) with: {'batch_size': 20, 'epochs': 50}
0.688802 (0.004872) with: {'batch_size': 20, 'epochs': 100}
0.578125 (0.053274) with: {'batch_size': 40, 'epochs': 10}
0.651042 (0.012075) with: {'batch_size': 40, 'epochs': 50}
0.664062 (0.031412) with: {'batch_size': 40, 'epochs': 100}
0.532552 (0.027498) with: {'batch_size': 60, 'epochs': 10}
0.595052 (0.051560) with: {'batch_size': 60, 'epochs': 50}
0.644531 (0.019137) with: {'batch_size': 60, 'epochs': 100}
0.527344 (0.049101) with: {'batch_size': 80, 'epochs': 10}
0.669271 (0.011201) with: {'batch_size': 80, 'epochs': 50}
0.664062 (0.019401) with: {'batch_size': 80, 'epochs': 100}
0.511719 (0.075003) with: {'batch_size': 100, 'epochs':

'The final score is: 0.7408854166666666'

### Hyperparameter Tuning Using Nested CV --> Stratified KFold

In [20]:
from sklearn.model_selection import StratifiedKFold

# configure the cross-validation procedure
scv = StratifiedKFold(n_splits=2, random_state=None, shuffle=False)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
ssearch = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=scv)
# execute search
hresult = ssearch.fit(X, Y)

In [21]:
# summarize results
print("Best: %f using %s" % (hresult.best_score_, hresult.best_params_))
means = hresult.cv_results_['mean_test_score']
stds = hresult.cv_results_['std_test_score']
params = hresult.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {hresult.score(X,Y)}"

Best: 0.690104 using {'batch_size': 40, 'epochs': 100}
0.569010 (0.037760) with: {'batch_size': 10, 'epochs': 10}
0.644531 (0.024740) with: {'batch_size': 10, 'epochs': 50}
0.657552 (0.014323) with: {'batch_size': 10, 'epochs': 100}
0.506510 (0.001302) with: {'batch_size': 20, 'epochs': 10}
0.675781 (0.037760) with: {'batch_size': 20, 'epochs': 50}
0.618490 (0.061198) with: {'batch_size': 20, 'epochs': 100}
0.553385 (0.032552) with: {'batch_size': 40, 'epochs': 10}
0.617188 (0.052083) with: {'batch_size': 40, 'epochs': 50}
0.690104 (0.002604) with: {'batch_size': 40, 'epochs': 100}
0.549479 (0.041667) with: {'batch_size': 60, 'epochs': 10}
0.613281 (0.006510) with: {'batch_size': 60, 'epochs': 50}
0.661458 (0.013021) with: {'batch_size': 60, 'epochs': 100}
0.634115 (0.006510) with: {'batch_size': 80, 'epochs': 10}
0.571615 (0.022135) with: {'batch_size': 80, 'epochs': 50}
0.639323 (0.022135) with: {'batch_size': 80, 'epochs': 100}
0.617188 (0.033854) with: {'batch_size': 100, 'epochs':

'The final score is: 0.71875'

### Hyperparameter Tuning Using Nested CV --> Repeated KFolds / Repeated Random Sampling

In [22]:
from sklearn.model_selection import RepeatedKFold

# configure the cross-validation procedure
rcv = RepeatedKFold(n_splits=2, n_repeats=2, random_state=2652124)
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]
space = dict(batch_size=batch_size, epochs=epochs)

# define search
rsearch = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=rcv)
# execute search
rresult = rsearch.fit(X, Y)

In [23]:
# summarize results
print("Best: %f using %s" % (rresult.best_score_, rresult.best_params_))
means = rresult.cv_results_['mean_test_score']
stds = rresult.cv_results_['std_test_score']
params = rresult.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

f"The final score is: {rresult.score(X,Y)}"

Best: 0.694010 using {'batch_size': 10, 'epochs': 100}
0.600260 (0.048107) with: {'batch_size': 10, 'epochs': 10}
0.642578 (0.025936) with: {'batch_size': 10, 'epochs': 50}
0.694010 (0.033171) with: {'batch_size': 10, 'epochs': 100}
0.593099 (0.033521) with: {'batch_size': 20, 'epochs': 10}
0.662760 (0.015461) with: {'batch_size': 20, 'epochs': 50}
0.662760 (0.021904) with: {'batch_size': 20, 'epochs': 100}
0.528646 (0.049066) with: {'batch_size': 40, 'epochs': 10}
0.598307 (0.042924) with: {'batch_size': 40, 'epochs': 50}
0.674479 (0.008831) with: {'batch_size': 40, 'epochs': 100}
0.525391 (0.072271) with: {'batch_size': 60, 'epochs': 10}
0.635417 (0.048265) with: {'batch_size': 60, 'epochs': 50}
0.666016 (0.012939) with: {'batch_size': 60, 'epochs': 100}
0.523438 (0.113289) with: {'batch_size': 80, 'epochs': 10}
0.585938 (0.042112) with: {'batch_size': 80, 'epochs': 50}
0.613281 (0.041524) with: {'batch_size': 80, 'epochs': 100}
0.541016 (0.106904) with: {'batch_size': 100, 'epochs':

'The final score is: 0.6783854166666666'