In [1]:
import pandas as pd
import numpy as np
import sklearn
from tensorflow import keras

In [2]:
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
names = ['n_pregnant','glucose_concentration','blood_pressure (mm Hg)','skin_thickness (mm)',
        'serum_insulin (mu U/ml)','BMI','pedigree_function','age','class']
df = pd.read_csv(url,names=names)

In [3]:
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
n_pregnant,768.0,3.845052,3.369578,0.0,1.0,3.0,6.0,17.0
glucose_concentration,768.0,120.894531,31.972618,0.0,99.0,117.0,140.25,199.0
blood_pressure (mm Hg),768.0,69.105469,19.355807,0.0,62.0,72.0,80.0,122.0
skin_thickness (mm),768.0,20.536458,15.952218,0.0,0.0,23.0,32.0,99.0
serum_insulin (mu U/ml),768.0,79.799479,115.244002,0.0,0.0,30.5,127.25,846.0
BMI,768.0,31.992578,7.88416,0.0,27.3,32.0,36.6,67.1
pedigree_function,768.0,0.471876,0.331329,0.078,0.24375,0.3725,0.62625,2.42
age,768.0,33.240885,11.760232,21.0,24.0,29.0,41.0,81.0
class,768.0,0.348958,0.476951,0.0,0.0,0.0,1.0,1.0


In [4]:
cols = ['glucose_concentration','blood_pressure (mm Hg)','skin_thickness (mm)',
        'serum_insulin (mu U/ml)','BMI']
for col in cols:
  df[col].replace(0,np.nan,inplace=True)
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
n_pregnant,768.0,3.845052,3.369578,0.0,1.0,3.0,6.0,17.0
glucose_concentration,763.0,121.686763,30.535641,44.0,99.0,117.0,141.0,199.0
blood_pressure (mm Hg),733.0,72.405184,12.382158,24.0,64.0,72.0,80.0,122.0
skin_thickness (mm),541.0,29.15342,10.476982,7.0,22.0,29.0,36.0,99.0
serum_insulin (mu U/ml),394.0,155.548223,118.775855,14.0,76.25,125.0,190.0,846.0
BMI,757.0,32.457464,6.924988,18.2,27.5,32.3,36.6,67.1
pedigree_function,768.0,0.471876,0.331329,0.078,0.24375,0.3725,0.62625,2.42
age,768.0,33.240885,11.760232,21.0,24.0,29.0,41.0,81.0
class,768.0,0.348958,0.476951,0.0,0.0,0.0,1.0,1.0


In [5]:
df.dropna(inplace=True)
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
n_pregnant,392.0,3.30102,3.211424,0.0,1.0,2.0,5.0,17.0
glucose_concentration,392.0,122.627551,30.860781,56.0,99.0,119.0,143.0,198.0
blood_pressure (mm Hg),392.0,70.663265,12.496092,24.0,62.0,70.0,78.0,110.0
skin_thickness (mm),392.0,29.145408,10.516424,7.0,21.0,29.0,37.0,63.0
serum_insulin (mu U/ml),392.0,156.056122,118.84169,14.0,76.75,125.5,190.0,846.0
BMI,392.0,33.086224,7.027659,18.2,28.4,33.2,37.1,67.1
pedigree_function,392.0,0.523046,0.345488,0.085,0.26975,0.4495,0.687,2.42
age,392.0,30.864796,10.200777,21.0,23.0,27.0,36.0,81.0
class,392.0,0.331633,0.471401,0.0,0.0,0.0,1.0,1.0


In [6]:
dataset = df.values
print(dataset.shape)

(392, 9)


In [7]:
X = dataset[:,0:8]
Y = dataset[:,8].astype(int)

In [8]:
print(X.shape)
print(Y.shape)
print(X[:5])
print(Y[:5])

(392, 8)
(392,)
[[1.000e+00 8.900e+01 6.600e+01 2.300e+01 9.400e+01 2.810e+01 1.670e-01
  2.100e+01]
 [0.000e+00 1.370e+02 4.000e+01 3.500e+01 1.680e+02 4.310e+01 2.288e+00
  3.300e+01]
 [3.000e+00 7.800e+01 5.000e+01 3.200e+01 8.800e+01 3.100e+01 2.480e-01
  2.600e+01]
 [2.000e+00 1.970e+02 7.000e+01 4.500e+01 5.430e+02 3.050e+01 1.580e-01
  5.300e+01]
 [1.000e+00 1.890e+02 6.000e+01 2.300e+01 8.460e+02 3.010e+01 3.980e-01
  5.900e+01]]
[0 1 1 1 1]


In [9]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler().fit(X)

In [10]:
X_standardized = scaler.transform(X)
data = pd.DataFrame(X_standardized)
df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
n_pregnant,392.0,3.30102,3.211424,0.0,1.0,2.0,5.0,17.0
glucose_concentration,392.0,122.627551,30.860781,56.0,99.0,119.0,143.0,198.0
blood_pressure (mm Hg),392.0,70.663265,12.496092,24.0,62.0,70.0,78.0,110.0
skin_thickness (mm),392.0,29.145408,10.516424,7.0,21.0,29.0,37.0,63.0
serum_insulin (mu U/ml),392.0,156.056122,118.84169,14.0,76.75,125.5,190.0,846.0
BMI,392.0,33.086224,7.027659,18.2,28.4,33.2,37.1,67.1
pedigree_function,392.0,0.523046,0.345488,0.085,0.26975,0.4495,0.687,2.42
age,392.0,30.864796,10.200777,21.0,23.0,27.0,36.0,81.0
class,392.0,0.331633,0.471401,0.0,0.0,0.0,1.0,1.0


In [13]:
pip install scikeras

Collecting scikeras
  Downloading scikeras-0.11.0-py3-none-any.whl (27 kB)
Installing collected packages: scikeras
Successfully installed scikeras-0.11.0


In [14]:
import tensorflow as tf
from tensorflow import keras

from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV, KFold

In [18]:
seed=6
np.random.seed(seed)

def create_model():
  model = keras.models.Sequential()
  model.add(keras.layers.Dense(8,input_dim=8,kernel_initializer='normal',activation='relu'))
  model.add(keras.layers.Dense(4,kernel_initializer='normal',activation='relu'))
  model.add(keras.layers.Dense(1,activation='sigmoid'))
  adam = keras.optimizers.Adam(learning_rate=0.01)

  model.compile(optimizer=adam,loss='binary_crossentropy',metrics=['accuracy'])
  return model

model = create_model()
print(model.summary())

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 8)                 72        
                                                                 
 dense_7 (Dense)             (None, 4)                 36        
                                                                 
 dense_8 (Dense)             (None, 1)                 5         
                                                                 
Total params: 113 (452.00 Byte)
Trainable params: 113 (452.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


In [19]:
model = KerasClassifier(model=create_model)

In [20]:
batch_size = [10,20,40]
epochs = [10,50,100]

param_grid = dict(batch_size=batch_size,epochs=epochs)

In [21]:
grid = GridSearchCV(estimator=model,param_grid=param_grid,cv=KFold(
    random_state=seed,shuffle=True),verbose=0)

grid_results=grid.fit(X_standardized,Y)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/5



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 2

In [22]:
print("Best: {0}, **using** {1}".format(grid_results.best_score_,
                                        grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds=grid_results.cv_results_['std_test_score']
params=grid_results.cv_results_['params']
for mean,stdev,param in zip(means,stds,params):
  print('{0} ({1}) with: {2}'.format(mean,stdev,param))

Best: 0.7833171048360921, **using** {'batch_size': 40, 'epochs': 100}
0.7730282375851996 (0.03755774969235114) with: {'batch_size': 10, 'epochs': 10}
0.7475170399221032 (0.025356454391458855) with: {'batch_size': 10, 'epochs': 50}
0.7576760791950665 (0.03995420426022085) with: {'batch_size': 10, 'epochs': 100}
0.7502434274586174 (0.046703009856836446) with: {'batch_size': 20, 'epochs': 10}
0.7551444336254463 (0.04048746949827606) with: {'batch_size': 20, 'epochs': 50}
0.7630314832846479 (0.04521032150016973) with: {'batch_size': 20, 'epochs': 100}
0.767964946445959 (0.042229580861686905) with: {'batch_size': 40, 'epochs': 10}
0.7399870172022072 (0.052393210102002624) with: {'batch_size': 40, 'epochs': 50}
0.7833171048360921 (0.03318089197696351) with: {'batch_size': 40, 'epochs': 100}


In [23]:
seed = 6
np.random.seed(seed)

def create_model(learn_rate,dropout_rate):
  model=keras.models.Sequential()
  model.add(keras.layers.Dense(8,input_dim=8,kernel_initializer='normal',activation='relu'))
  model.add(keras.layers.Dropout(dropout_rate))
  model.add(keras.layers.Dense(4,kernel_initializer='normal',activation='relu'))
  model.add(keras.layers.Dropout(dropout_rate))
  model.add(keras.layers.Dense(1,activation='sigmoid'))
  adam=keras.optimizers.Adam(learning_rate=learn_rate)

  model.compile(optimizer=adam,loss='binary_crossentropy',metrics=['accuracy'])
  return model



In [24]:
model=KerasClassifier(model=create_model,epochs=50,batch_size=40,verbose=0)

In [25]:
learn_rate=[0.001,0.01,0.1]
dropout_rate=[0.0,0.1,0.2]

param_grid=dict(model__learn_rate=learn_rate,model__dropout_rate=dropout_rate)
grid=GridSearchCV(estimator=model,param_grid=param_grid,
                  cv=KFold(random_state=seed,shuffle=True),verbose=0)

grid_results=grid.fit(X_standardized,Y)

In [26]:
print("Best: {0}, using {1}".format(grid_results.best_score_,
                                        grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds=grid_results.cv_results_['std_test_score']
params=grid_results.cv_results_['params']
for mean,stdev,param in zip(means,stds,params):
  print('{0} ({1}) with: {2}'.format(mean,stdev,param))

Best: 0.7807530022719897, using {'model__dropout_rate': 0.2, 'model__learn_rate': 0.01}
0.7705615060045439 (0.039252357114530415) with: {'model__dropout_rate': 0.0, 'model__learn_rate': 0.001}
0.7680947744238884 (0.03914345829950368) with: {'model__dropout_rate': 0.0, 'model__learn_rate': 0.01}
0.7422590068159689 (0.04274099413911984) with: {'model__dropout_rate': 0.0, 'model__learn_rate': 0.1}
0.7705939629990262 (0.048680796051818) with: {'model__dropout_rate': 0.1, 'model__learn_rate': 0.001}
0.7654008438818565 (0.05451322042533911) with: {'model__dropout_rate': 0.1, 'model__learn_rate': 0.01}
0.7525478740668614 (0.0392483312014585) with: {'model__dropout_rate': 0.1, 'model__learn_rate': 0.1}
0.7756247971437844 (0.04149504568900226) with: {'model__dropout_rate': 0.2, 'model__learn_rate': 0.001}
0.7807530022719897 (0.05191465575761194) with: {'model__dropout_rate': 0.2, 'model__learn_rate': 0.01}
0.765335929892892 (0.047362862942680724) with: {'model__dropout_rate': 0.2, 'model__learn

In [27]:
seed = 6
np.random.seed(seed)

def create_model(init,activation):
  model=keras.models.Sequential()
  model.add(keras.layers.Dense(8,input_dim=8,kernel_initializer=init,activation=activation))
  model.add(keras.layers.Dropout(0.2))
  model.add(keras.layers.Dense(4,kernel_initializer=init,activation=activation))
  model.add(keras.layers.Dropout(0.2))
  model.add(keras.layers.Dense(1,activation='sigmoid'))
  adam=keras.optimizers.Adam(learning_rate=0.01)

  model.compile(optimizer=adam,loss='binary_crossentropy',metrics=['accuracy'])
  return model



In [28]:
model = KerasClassifier(model=create_model,epochs=50, batch_size=40,verbose=0)

In [29]:
activation = ['softmax','relu','tanh','linear']
init=['uniform','normal','zero']

param_grid=dict(model__activation=activation,model__init=init)

grid=GridSearchCV(estimator=model,param_grid=param_grid,
                  cv=KFold(random_state=seed,shuffle=True),verbose=0)
grid_results = grid.fit(X_standardized,Y)

In [30]:
print("Best: {0}, using {1}".format(grid_results.best_score_,
                                        grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds=grid_results.cv_results_['std_test_score']
params=grid_results.cv_results_['params']
for mean,stdev,param in zip(means,stds,params):
  print('{0} ({1}) with: {2}'.format(mean,stdev,param))

Best: 0.7884128529698149, using {'model__activation': 'linear', 'model__init': 'normal'}
0.7600778967867574 (0.042887297473601274) with: {'model__activation': 'softmax', 'model__init': 'uniform'}
0.7653683868873742 (0.036204086079335715) with: {'model__activation': 'softmax', 'model__init': 'normal'}
0.7780266147354755 (0.03199844864236677) with: {'model__activation': 'softmax', 'model__init': 'zero'}
0.762934112301201 (0.0539711511171551) with: {'model__activation': 'relu', 'model__init': 'uniform'}
0.762934112301201 (0.03344100703718401) with: {'model__activation': 'relu', 'model__init': 'normal'}
0.6685167153521585 (0.02715733189031009) with: {'model__activation': 'relu', 'model__init': 'zero'}
0.767964946445959 (0.03982585643745321) with: {'model__activation': 'tanh', 'model__init': 'uniform'}
0.7729957805907173 (0.03871446029776078) with: {'model__activation': 'tanh', 'model__init': 'normal'}
0.6685167153521585 (0.02715733189031009) with: {'model__activation': 'tanh', 'model__init

In [31]:
seed = 6
np.random.seed(seed)

def create_model(neuron1,neuron2):
  model=keras.models.Sequential()
  model.add(keras.layers.Dense(neuron1,input_dim=8,kernel_initializer='uniform',activation='linear'))
  model.add(keras.layers.Dropout(0.2))
  model.add(keras.layers.Dense(neuron2,kernel_initializer='uniform',activation='linear'))
  model.add(keras.layers.Dropout(0.2))
  model.add(keras.layers.Dense(1,activation='sigmoid'))
  adam=keras.optimizers.Adam(learning_rate=0.01)

  model.compile(optimizer=adam,loss='binary_crossentropy',metrics=['accuracy'])
  return model



In [32]:
model = KerasClassifier(model=create_model,epochs=50, batch_size=40,verbose=0)

In [33]:
neuron1=[4,8,16]
neuron2=[2,4,8]

param_grid=dict(model__neuron1 = neuron1,model__neuron2= neuron2)

grid=GridSearchCV(estimator=model,param_grid=param_grid,
                  cv=KFold(random_state=seed,shuffle=True),verbose=0)
grid_reusults=grid.fit(X_standardized,Y)


In [34]:
print("Best: {0}, using {1}".format(grid_results.best_score_,
                                        grid_results.best_params_))
means = grid_results.cv_results_['mean_test_score']
stds=grid_results.cv_results_['std_test_score']
params=grid_results.cv_results_['params']
for mean,stdev,param in zip(means,stds,params):
  print('{0} ({1}) with: {2}'.format(mean,stdev,param))

Best: 0.7884128529698149, using {'model__activation': 'linear', 'model__init': 'normal'}
0.7600778967867574 (0.042887297473601274) with: {'model__activation': 'softmax', 'model__init': 'uniform'}
0.7653683868873742 (0.036204086079335715) with: {'model__activation': 'softmax', 'model__init': 'normal'}
0.7780266147354755 (0.03199844864236677) with: {'model__activation': 'softmax', 'model__init': 'zero'}
0.762934112301201 (0.0539711511171551) with: {'model__activation': 'relu', 'model__init': 'uniform'}
0.762934112301201 (0.03344100703718401) with: {'model__activation': 'relu', 'model__init': 'normal'}
0.6685167153521585 (0.02715733189031009) with: {'model__activation': 'relu', 'model__init': 'zero'}
0.767964946445959 (0.03982585643745321) with: {'model__activation': 'tanh', 'model__init': 'uniform'}
0.7729957805907173 (0.03871446029776078) with: {'model__activation': 'tanh', 'model__init': 'normal'}
0.6685167153521585 (0.02715733189031009) with: {'model__activation': 'tanh', 'model__init

In [35]:
import numpy as np
y_pred = grid.predict(X_standardized)

In [36]:
print(y_pred.shape)

(392,)


In [37]:
print(y_pred[:5])

[0 1 0 1 1]


In [38]:
from sklearn.metrics import classification_report, accuracy_score
print(accuracy_score(Y,y_pred))
print(classification_report(Y,y_pred))

0.7882653061224489
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       262
           1       0.73      0.58      0.64       130

    accuracy                           0.79       392
   macro avg       0.77      0.74      0.75       392
weighted avg       0.78      0.79      0.78       392



In [39]:
example = df.iloc[1]
print(example)

n_pregnant                   0.000
glucose_concentration      137.000
blood_pressure (mm Hg)      40.000
skin_thickness (mm)         35.000
serum_insulin (mu U/ml)    168.000
BMI                         43.100
pedigree_function            2.288
age                         33.000
class                        1.000
Name: 4, dtype: float64


In [40]:
prediction = grid.predict(X_standardized[1].reshape(1,-1))
print(prediction)

[1]
