# Neural network Tuning

In [11]:
# Importing necesary packages
import numpy
import pandas as pd
from sklearn.model_selection import GridSearchCV 
from keras.models import Sequential
from keras.layers import Dense
import tensorflow as tf
import warnings
warnings.filterwarnings('ignore')
from keras.wrappers.scikit_learn import KerasClassifier
from tensorflow import keras

# part 1 - Tuning batch and epoch

In [12]:
# load the dataset for India diabetes
data = pd.read_csv('diabetes.csv')

In [13]:
#viewing data
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [14]:
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0,768.0
mean,3.845052,120.894531,69.105469,20.536458,79.799479,31.992578,0.471876,33.240885,0.348958
std,3.369578,31.972618,19.355807,15.952218,115.244002,7.88416,0.331329,11.760232,0.476951
min,0.0,0.0,0.0,0.0,0.0,0.0,0.078,21.0,0.0
25%,1.0,99.0,62.0,0.0,0.0,27.3,0.24375,24.0,0.0
50%,3.0,117.0,72.0,23.0,30.5,32.0,0.3725,29.0,0.0
75%,6.0,140.25,80.0,32.0,127.25,36.6,0.62625,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [15]:
mask = data[data['BMI'] == 0]
data.drop(mask.index, inplace=True)
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,757.0,757.0,757.0,757.0,757.0,757.0,757.0,757.0,757.0
mean,3.844122,121.136063,69.690885,20.774108,80.84148,32.457464,0.472437,33.281374,0.351387
std,3.369959,32.061428,18.283325,15.908567,115.710818,6.924988,0.329474,11.697752,0.477719
min,0.0,0.0,0.0,0.0,0.0,18.2,0.078,21.0,0.0
25%,1.0,99.0,64.0,0.0,0.0,27.5,0.244,24.0,0.0
50%,3.0,117.0,72.0,23.0,37.0,32.3,0.376,29.0,0.0
75%,6.0,141.0,80.0,32.0,130.0,36.6,0.627,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [16]:
mask = data[(data['BloodPressure'] == 0) & (data['Insulin'] == 0)]
data.drop(mask.index, inplace=True)
data.describe()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
count,729.0,729.0,729.0,729.0,729.0,729.0,729.0,729.0,729.0
mean,3.858711,121.046639,72.367627,21.499314,83.946502,32.469959,0.474117,33.318244,0.344307
std,3.357468,32.255215,12.375838,15.708376,116.803,6.885098,0.331649,11.753078,0.475468
min,0.0,0.0,24.0,0.0,0.0,18.2,0.078,21.0,0.0
25%,1.0,99.0,64.0,0.0,0.0,27.5,0.245,24.0,0.0
50%,3.0,117.0,72.0,24.0,46.0,32.4,0.378,29.0,0.0
75%,6.0,141.0,80.0,33.0,130.0,36.6,0.627,41.0,1.0
max,17.0,199.0,122.0,99.0,846.0,67.1,2.42,81.0,1.0


In [17]:
# split into predictors and response
X = data.iloc[:,0:8] #predictors
Y = data.iloc[:,8] #response

# part 2- Optimizing Activation Function

In [18]:
# Function to create model
def create_my_model(optimizer):
    # create model
    mymodel = Sequential()
    mymodel.add(Dense(12, input_dim=8, activation='relu'))
    #mymodel.add(layers.Dropout(0.5))
    mymodel.add(Dense(8, activation='relu'))
    #mymodel.add(Dense(4, activation='relu'))
    mymodel.add(Dense(1, activation='sigmoid'))

    mymodel.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return mymodel

# How can you improve accuracy of the model? 

In [19]:
model_improved = KerasClassifier(build_fn=create_my_model, epochs=500, batch_size=10, optimizer='Adam')

In [20]:
result = model_improved.fit(X, Y)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

In [21]:
# show best results from model_improved
print(str(max(result.history["accuracy"])*100) + "%")

81.34430646896362%


In [22]:
# batchSize = [5,10, 20, 30]
# epochs = [50, 100, 150, 200, 300]
# optimizer = ['SGD','Adadelta', 'RMSprop', 'Adagrad','Adam']

In [23]:
# model_improved = KerasClassifier(build_fn=create_my_model)

In [24]:
# parameter_grid = dict(batch_size=batchSize, epochs=epochs, optimizer=optimizer)

# mygrid = GridSearchCV(estimator=model_improved, param_grid=parameter_grid, n_jobs=-1, cv=3)
# grid_result = mygrid.fit(X, Y)

In [None]:
# grid_df = pd.DataFrame(grid_result.cv_results_)
# grid_df

NameError: name 'grid_result' is not defined

In [None]:
# print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best: 0.676269 using {'batch_size': 100, 'epochs': 50, 'optimizer': 'SGD'}


## 82.6%
parameters: 500 epochs, 10 batch_size, Adam optimizer, loss='binary_crossentropy'.

layers: 12 Dense, 8 Dense, 1 sigmoid.

In [None]:
result.model.summary()

Model: "sequential_17"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_42 (Dense)            (None, 12)                108       
                                                                 
 dense_43 (Dense)            (None, 8)                 104       
                                                                 
 dense_44 (Dense)            (None, 1)                 9         
                                                                 
Total params: 221
Trainable params: 221
Non-trainable params: 0
_________________________________________________________________
