In [None]:
import pandas as pd
import numpy as np
import keras
from sklearn.preprocessing import StandardScaler

import warnings
warnings.filterwarnings('ignore')

In [None]:
dataset = np.loadtxt("/content/pima-indians-diabetes.data.csv", delimiter=",")
X = dataset[:,0:8]
y = dataset[:,8]

In [None]:
# Standardization
a = StandardScaler()
a.fit(X)
X_standardized = a.transform(X)

In [None]:
pd.DataFrame(X_standardized).describe()

Unnamed: 0,0,1,2,3,4,5,6,7
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,-8.789266000000001e-17,-9.251859000000001e-18,1.5034270000000003e-17,8.211024000000001e-17,-4.972874e-17,2.960595e-15,1.905883e-15,2.081668e-16
std,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652,1.000652
min,-1.141852,-3.783654,-3.572597,-1.288212,-0.6928906,-4.060474,-1.189553,-1.041549
25%,-0.8448851,-0.6852363,-0.3673367,-1.288212,-0.6928906,-0.5955785,-0.6889685,-0.7862862
50%,-0.2509521,-0.1218877,0.1496408,0.1545332,-0.4280622,0.0009419788,-0.3001282,-0.3608474
75%,0.6399473,0.6057709,0.5632228,0.7190857,0.4120079,0.5847705,0.4662269,0.6602056
max,3.906578,2.444478,2.734528,4.921866,6.652839,4.455807,5.883565,4.063716


**Tuning of Hyperparameters :- **

In [None]:
# create NN and find best batch size and best epchs
# Importing the necessary packages
from sklearn.model_selection import GridSearchCV, KFold
from keras.models import Sequential # In NN we are going to add components (i.e. hidden layers) one by one in sequential manner
from keras.layers import Dense # the no. of neurons
from keras.wrappers.scikit_learn import KerasClassifier

from tensorflow.keras.optimizers import Adam # Adam - Adaptive Momentum - is an optimizer

In [None]:
# create model
def create_model():
    model = Sequential()
    #below is the first hidden layer
    model.add(Dense(12, input_dim=8, kernel_initializer='uniform', activation='relu'))
    #Dense is a NN, 12 - no. of neurons, i/p - 8 columns, kernel_initializer=uniform - bias or weights can be taken from uniform distribution.
    #below is the second hidden layer
    model.add(Dense(8,kernel_initializer='uniform', activation='relu'))
    #below is the output layer
    model.add(Dense(1, kernel_initializer='uniform', activation='sigmoid'))

    adam=Adam(learning_rate=0.01) # Adam is the optimizer from stochastic Gradient descent variant, lr - learning rate - eta
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy'])
    # using binary cross entropy find accuracy of model
    return model

In [None]:
# Create the model
model = KerasClassifier(build_fn = create_model,verbose = 0) # verbose=0 means don't print any information on screen
# Define the grid search parameters
batch_size = [10,20,40] # out of these 3 which is best value that we need to find using accuracy
# model will be built using 10 batch size once, then 20, then 40
epochs = [10,50,100,150] # makes all combinations all batch size and epochs
# Make a dictionary of the grid search parameters
param_grid = dict(batch_size = batch_size,epochs = epochs)
# Build and fit the GridSearchCV
grid = GridSearchCV(estimator = model,param_grid = param_grid,cv = KFold(),verbose = 10) # by default it takes 5 K folds, verbose =10: displays 10 lines of epochs 1/12 for 10 times
grid_result = grid.fit(X_standardized,y)
# batch size=3, epoch=4. 3*4=12, 12*5 folds = 60 models will be built

Fitting 5 folds for each of 12 candidates, totalling 60 fits
[CV 1/5; 1/12] START batch_size=10, epochs=10...................................
[CV 1/5; 1/12] END ....batch_size=10, epochs=10;, score=0.727 total time=   2.9s
[CV 2/5; 1/12] START batch_size=10, epochs=10...................................
[CV 2/5; 1/12] END ....batch_size=10, epochs=10;, score=0.714 total time=   2.1s
[CV 3/5; 1/12] START batch_size=10, epochs=10...................................
[CV 3/5; 1/12] END ....batch_size=10, epochs=10;, score=0.766 total time=   2.1s
[CV 4/5; 1/12] START batch_size=10, epochs=10...................................
[CV 4/5; 1/12] END ....batch_size=10, epochs=10;, score=0.837 total time=   2.8s
[CV 5/5; 1/12] START batch_size=10, epochs=10...................................
[CV 5/5; 1/12] END ....batch_size=10, epochs=10;, score=0.778 total time=   2.6s
[CV 1/5; 2/12] START batch_size=10, epochs=50...................................
[CV 1/5; 2/12] END ....batch_size=10, epochs=50;

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.7670401573181153, using {'batch_size': 40, 'epochs': 10}
0.7644342660903931,0.043093444733982876 with: {'batch_size': 10, 'epochs': 10}
0.7487310171127319,0.02866399623253584 with: {'batch_size': 10, 'epochs': 50}
0.7539597749710083,0.04682025592049717 with: {'batch_size': 10, 'epochs': 100}
0.7461760520935059,0.03331552464513433 with: {'batch_size': 10, 'epochs': 150}
0.7644342660903931,0.050227400030165785 with: {'batch_size': 20, 'epochs': 10}
0.7578983187675477,0.0401807340668268 with: {'batch_size': 20, 'epochs': 50}
0.7370172381401062,0.020721491550990796 with: {'batch_size': 20, 'epochs': 100}
0.7383329153060914,0.04230203038651627 with: {'batch_size': 20, 'epochs': 150}
0.7670401573181153,0.04523432690167328 with: {'batch_size': 40, 'epochs': 10}
0.7514133095741272,0.040014368148913036 with: {'batch_size': 40, 'epochs': 50}
0.7591291069984436,0.012734609095455809 with: {'batch_size': 40, 'epochs': 100}
0.764383339881897,0.025979159725513203 with: {'batch_size': 40, 'ep

In [None]:
params

[{'batch_size': 10, 'epochs': 10},
 {'batch_size': 10, 'epochs': 50},
 {'batch_size': 10, 'epochs': 100},
 {'batch_size': 10, 'epochs': 150},
 {'batch_size': 20, 'epochs': 10},
 {'batch_size': 20, 'epochs': 50},
 {'batch_size': 20, 'epochs': 100},
 {'batch_size': 20, 'epochs': 150},
 {'batch_size': 40, 'epochs': 10},
 {'batch_size': 40, 'epochs': 50},
 {'batch_size': 40, 'epochs': 100},
 {'batch_size': 40, 'epochs': 150}]

## Tuning of Hyperparameters:- Learning rate and Drop out rate

In [None]:
from keras.layers import Dropout
# drop out rate - to control the accuracy. To overcome overfit of model randomly remove connection of some neurons
# Defining the model

def create_model(learning_rate,dropout_rate):
    model = Sequential()
    model.add(Dense(8,input_dim = 8,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(12,input_dim = 8,kernel_initializer = 'normal',activation = 'relu'))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1,activation = 'sigmoid'))

    adam = Adam(learning_rate = learning_rate)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 10)

# Define the grid search parameters: Learning rate and dropout rate are the hyperparameters we want to test

learning_rate = [0.001,0.01,0.1]
dropout_rate = [0.0,0.1,0.2]# drop 0%,10%, 20% neurons

# Make a dictionary of the grid search parameters

param_grids = dict(learning_rate = learning_rate,dropout_rate = dropout_rate)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10) # total 9*5 default folds=45 models will be built. (Lrate 3 values, Dropout rate =3, 3*3=9,)
grid_result = grid.fit(X_standardized,y)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 1/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.760 total time=   1.8s
[CV 2/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 2/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.714 total time=   1.4s
[CV 3/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 3/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.760 total time=   1.4s
[CV 4/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 4/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.824 total time=   2.0s
[CV 5/5; 1/9] START dropout_rate=0.0, learning_rate=0.001.......................
[CV 5/5; 1/9] END dropout_rate=0.0, learning_rate=0.001;, score=0.765 total time=   1.5s
[CV 1/5; 2/9] START dropout_rate=0.0, learning_rate=0.01........................
[CV 1/5; 

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

## Tuning of Hyperparameters:- Activation Function and Kernel Initializer

In [None]:
# Defining the model

def create_model(activation_function,init):
    model = Sequential()
    model.add(Dense(8,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.2))
    model.add(Dense(12,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(0.2))
    model.add(Dense(1,activation = 'sigmoid'))

    adam = Adam(learning_rate = 0.01)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model
    # In machine learning, Loss function is used to find error or deviation in the learning process.
    # Keras requires loss function during model compilation process. https://www.tutorialspoint.com/keras/keras_model_compilation.htm

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 50)

# Define the grid search parameters
activation_function = ['softmax','relu','tanh','linear'] # find which activation function is best out of these
init = ['uniform','normal','zero'] # Weight initializers from where the weights has to be sampled randomly. Uniform distribution, normal distribution and zero distribution

# Make a dictionary of the grid search parameters
param_grids = dict(activation_function = activation_function,init = init)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

## Tuning of Hyperparameter :-Number of Neurons in hidden layer

In [None]:
# Defining the model

def create_model(neuron1,neuron2):
    model = Sequential()
    model.add(Dense(neuron1,input_dim = 8,kernel_initializer = 'uniform',activation = 'linear'))
    model.add(Dropout(0.2))
    model.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = 'uniform',activation = 'linear'))
    model.add(Dropout(0.2))
    model.add(Dense(1,activation = 'sigmoid'))

    adam = Adam(learning_rate = 0.01)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0,batch_size = 40,epochs = 50)

# Define the grid search parameters

neuron1 = [4,8,16]# in first hidden layer use 4,8,16 neurons
neuron2 = [2,4,8]# in second hidden layer use 2,4,8 neurons

# Make a dictionary of the grid search parameters

param_grids = dict(neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV 1/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 1/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.760 total time=   3.0s
[CV 2/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 2/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.740 total time=   2.7s
[CV 3/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 3/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.766 total time=   2.6s
[CV 4/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 4/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.830 total time=   3.8s
[CV 5/5; 1/9] START neuron1=4, neuron2=2........................................
[CV 5/5; 1/9] END .........neuron1=4, neuron2=2;, score=0.739 total time=   3.1s
[CV 1/5; 2/9] START neuron1=4, neuron2=4........................................
[CV 1/5; 2/9] END .........neuron1=4, neuron2=4;,

In [None]:
# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Best : 0.7773958086967468, using {'neuron1': 8, 'neuron2': 2}
0.7669722437858582,0.033329513017886676 with: {'neuron1': 4, 'neuron2': 2}
0.7734912157058715,0.030823893138293236 with: {'neuron1': 4, 'neuron2': 4}
0.7656735420227051,0.0341649544092364 with: {'neuron1': 4, 'neuron2': 8}
0.7773958086967468,0.03082541136700629 with: {'neuron1': 8, 'neuron2': 2}
0.7708938121795654,0.03293139218349116 with: {'neuron1': 8, 'neuron2': 4}
0.7722179770469666,0.035609128049830796 with: {'neuron1': 8, 'neuron2': 8}
0.764391815662384,0.034847145479469106 with: {'neuron1': 16, 'neuron2': 2}
0.7657074928283691,0.03881495371444925 with: {'neuron1': 16, 'neuron2': 4}
0.7709107875823975,0.03669044960874866 with: {'neuron1': 16, 'neuron2': 8}


# Hyperparameters all at once

In [None]:
def create_model(learning_rate,dropout_rate,activation_function,init,neuron1,neuron2):
    model = Sequential()
    model.add(Dense(neuron1,input_dim = 8,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(neuron2,input_dim = neuron1,kernel_initializer = init,activation = activation_function))
    model.add(Dropout(dropout_rate))
    model.add(Dense(1,activation = 'sigmoid'))

    adam = Adam(learning_rate = learning_rate)
    model.compile(loss = 'binary_crossentropy',optimizer = adam,metrics = ['accuracy'])
    return model

# Create the model

model = KerasClassifier(build_fn = create_model,verbose = 0)

# Define the grid search parameters

batch_size = [10,20,40]
epochs = [10,50,100]
learning_rate = [0.001,0.01,0.1]
dropout_rate = [0.0,0.1,0.2]
activation_function = ['softmax','relu','tanh','linear']
init = ['uniform','normal','zero']
neuron1 = [4,8,16]
neuron2 = [2,4,8]

# Make a dictionary of the grid search parameters

param_grids = dict(batch_size = batch_size,epochs = epochs,learning_rate = learning_rate,dropout_rate = dropout_rate,
                   activation_function = activation_function,init = init,neuron1 = neuron1,neuron2 = neuron2)

# Build and fit the GridSearchCV

grid = GridSearchCV(estimator = model,param_grid = param_grids,cv = KFold(),verbose = 10)
grid_result = grid.fit(X_standardized,y)

# Summarize the results
print('Best : {}, using {}'.format(grid_result.best_score_,grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
  print('{},{} with: {}'.format(mean, stdev, param))

Fitting 5 folds for each of 8748 candidates, totalling 43740 fits
[CV 1/5; 1/8748] START activation_function=softmax, batch_size=10, dropout_rate=0.0, epochs=10, init=uniform, learning_rate=0.001, neuron1=4, neuron2=2


KeyboardInterrupt: ignored