# Load Dataset

In [1]:
import pandas as pd
df = pd.read_csv('diabetics.csv')
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,1
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,0
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,1
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,0
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,1
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,0
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,0
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,0
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,1


In [2]:
df.dtypes

Pregnancies                 float64
Glucose                     float64
BloodPressure               float64
SkinThickness               float64
Insulin                     float64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                         float64
Outcome                       int64
dtype: object

In [3]:
df.shape

(768, 9)

# Data Preparation

***Find missing values***

In [4]:
df.isna().sum()

Pregnancies                  6
Glucose                     11
BloodPressure                8
SkinThickness               20
Insulin                      6
BMI                         11
DiabetesPedigreeFunction     5
Age                          7
Outcome                      0
dtype: int64

***Handle missing values***

Replace all the missing data with the mean, median, or mode of the column

In [5]:
df['Pregnancies'].fillna( df['Pregnancies'].median(), inplace = True )
df['Glucose'].fillna( df['Glucose'].mean(), inplace = True )
df['BloodPressure'].fillna( df['BloodPressure'].mean(), inplace = True )
df['SkinThickness'].fillna( df['SkinThickness'].mean(), inplace = True)
df['Insulin'].fillna( df['Insulin'].mean(), inplace = True )
df['BMI'].fillna( df['BMI'].mean(), inplace = True)
df['DiabetesPedigreeFunction'].fillna( df['DiabetesPedigreeFunction'].mean(), inplace= True)
df['Age'].fillna( df['Age'].median(), inplace = True )


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Pregnancies'].fillna( df['Pregnancies'].median(), inplace = True )
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Glucose'].fillna( df['Glucose'].mean(), inplace = True )
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate

In [6]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

# Data Seperation

***Seperate input and output from dataset***

In [7]:
x = df.iloc[:,:-1].values              # [row, col]    : means full row    :-1 means except last col
x            # input

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [8]:
y = df.iloc[:,-1].values
y             # output

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,

# Data Splitting

In [9]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split( x , y, test_size=0.30, random_state = 52)     # test_size = .3 ie, 30% data
x_train

array([[ 12.   , 151.   ,  70.   , ...,  41.8  ,   0.742,  38.   ],
       [  3.   , 158.   ,  70.   , ...,  35.5  ,   0.344,  35.   ],
       [  6.   , 154.   ,  74.   , ...,  29.3  ,   0.839,  39.   ],
       ...,
       [  4.   , 114.   ,  65.   , ...,  21.9  ,   0.432,  37.   ],
       [  3.   ,  87.   ,  60.   , ...,  21.8  ,   0.444,  21.   ],
       [  6.   ,  98.   ,  58.   , ...,  34.   ,   0.43 ,  43.   ]])

# Data Normalization

***Normalization technique used - standard scalar***

**z = (x-u)/s ***  

x is mean of training data

u is exact value

s is std deviation of training data

Feature scaling is a common preprocessing step in machine learning to ensure that all features have the same scale.


In [10]:
from sklearn.preprocessing import StandardScaler    # another one - MinMaxScalar
scalar = StandardScaler()
scalar.fit(x_train)
x_train = scalar.transform(x_train)      # fit and transform can be together performed. scalar.fit_transform(x_train)
x_test = scalar.transform(x_test)


# Model creation

In [11]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [12]:
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = 8))
model.add(Dropout(0.5))  # Increased from 0.2/0.3

model.add(Dense(1, activation = 'sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [14]:
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test))

Epoch 1/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.4014 - loss: 0.8987 - val_accuracy: 0.3550 - val_loss: 0.8404
Epoch 2/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.4329 - loss: 0.8208 - val_accuracy: 0.4589 - val_loss: 0.7727
Epoch 3/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5004 - loss: 0.7420 - val_accuracy: 0.5671 - val_loss: 0.7211
Epoch 4/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5667 - loss: 0.7153 - val_accuracy: 0.6494 - val_loss: 0.6790
Epoch 5/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5675 - loss: 0.7129 - val_accuracy: 0.7013 - val_loss: 0.6438
Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6282 - loss: 0.6666 - val_accuracy: 0.7056 - val_loss: 0.6164
Epoch 7/100
[1m17/17[0m [32m━━

<keras.src.callbacks.history.History at 0x19158736090>

# Select good optimizer

In [15]:
!pip install keras-tuner --upgrade





[notice] A new release of pip available: 22.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [16]:
import keras_tuner as kt
import keras

In [17]:
def build_model(hp):

  model = Sequential()
  model.add(Dense(32, activation = 'relu', input_dim = 8))
  model.add(Dense(1, activation = 'sigmoid'))

  optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta'])
  model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [18]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='optimizer')

Reloading Tuner from my_dir\optimizer\tuner0.json


In [19]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


In [20]:
best_optimizer = tuner.get_best_hyperparameters()[0].values
best_optimizer

{'optimizer': 'rmsprop'}

In [21]:
model = tuner.get_best_models(num_models=1)[0]





  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [22]:
model.summary()

In [23]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=5 )

Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7367 - loss: 0.5552 - val_accuracy: 0.7706 - val_loss: 0.5196
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7880 - loss: 0.5091 - val_accuracy: 0.7792 - val_loss: 0.5070
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7847 - loss: 0.4739 - val_accuracy: 0.7792 - val_loss: 0.4983
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8035 - loss: 0.4655 - val_accuracy: 0.7792 - val_loss: 0.4919
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8083 - loss: 0.4555 - val_accuracy: 0.7835 - val_loss: 0.4872
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7655 - loss: 0.4983 - val_accuracy: 0.7835 - val_loss: 0.4845
Epoch 12/100
[1m17/17[0m [32

<keras.src.callbacks.history.History at 0x1915a49f9d0>

# Decide no: of nodes in layer

In [24]:
def build_model(hp):

  model = Sequential()

  units = hp.Int('units', min_value=8, max_value=128, step=8)
  model.add(Dense(units=units, activation = 'relu', input_dim = 8))
  model.add(Dense(1, activation = 'sigmoid'))

  model.compile(optimizer='rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [25]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='num_nodes')

In [28]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


Trial 5 Complete [00h 00m 01s]
val_accuracy: 0.7705627679824829

Best val_accuracy So Far: 0.7922077775001526
Total elapsed time: 00h 00m 06s


In [29]:
best_node_num = tuner.get_best_hyperparameters()[0].values
best_node_num

{'units': 64}

In [30]:
model = tuner.get_best_models(num_models=1)[0]

In [31]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=5 )

Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.7943 - loss: 0.4881 - val_accuracy: 0.7879 - val_loss: 0.4957
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7835 - loss: 0.4878 - val_accuracy: 0.7835 - val_loss: 0.4881
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7761 - loss: 0.4603 - val_accuracy: 0.7879 - val_loss: 0.4840
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7950 - loss: 0.4539 - val_accuracy: 0.7879 - val_loss: 0.4810
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7858 - loss: 0.4535 - val_accuracy: 0.7879 - val_loss: 0.4814
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7693 - loss: 0.4757 - val_accuracy: 0.7792 - val_loss: 0.4806
Epoch 12/100
[1m17/17[0m [32

<keras.src.callbacks.history.History at 0x1915eadf190>

# Decide no: of layers

In [32]:
def build_model(hp):

  model = Sequential()

  model.add(Dense(88, activation = 'relu', input_dim = 8))

  for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
    model.add(Dense(88, activation = 'relu'))

  model.add(Dense(1, activation = 'sigmoid'))

  model.compile(optimizer='rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [33]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='num_layers')

In [34]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


Trial 5 Complete [00h 00m 02s]
val_accuracy: 0.7705627679824829

Best val_accuracy So Far: 0.7878788113594055
Total elapsed time: 00h 00m 08s


In [35]:
best_layer_num = tuner.get_best_hyperparameters()[0].values
best_layer_num

{'num_layers': 6}

In [36]:
model = tuner.get_best_models(num_models=1)[0]
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=5 )

Epoch 6/100


  saveable.load_own_variables(weights_store.get(inner_path))


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 13ms/step - accuracy: 0.7720 - loss: 0.4473 - val_accuracy: 0.7662 - val_loss: 0.4805
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8127 - loss: 0.4206 - val_accuracy: 0.7792 - val_loss: 0.4828
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7984 - loss: 0.4119 - val_accuracy: 0.7056 - val_loss: 0.5256
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8173 - loss: 0.3822 - val_accuracy: 0.7403 - val_loss: 0.5291
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.8444 - loss: 0.3440 - val_accuracy: 0.7879 - val_loss: 0.5180
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8165 - loss: 0.3778 - val_accuracy: 0.7403 - val_loss: 0.5278
Epoch 12/100
[1m17/17[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x19160fad8d0>

In [37]:
def build_model(hp):
    model = keras.Sequential()
    counter = 0
    for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
        if counter == 0:
            model.add(
                  Dense(
                    units=hp.Int('units'+str(i), min_value=8, max_value=128, step=8),

                    activation=hp.Choice('activation'+str(i), values=['relu', 'tanh', 'sigmoid']),

                    input_dim=8
                      )
                    )
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])))

        else:
            model.add(
                  Dense(
                    units=hp.Int('units'+str(i), min_value=8, max_value=128, step=8),
                    activation=hp.Choice('activation'+str(i), values=['relu', 'tanh', 'sigmoid'])
                      )
                    )
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])))


        counter += 1
    model.add(Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta', 'nadam']),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

In [38]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='full1')

In [39]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


Trial 5 Complete [00h 00m 02s]
val_accuracy: 0.6709956526756287

Best val_accuracy So Far: 0.761904776096344
Total elapsed time: 00h 00m 12s


In [40]:
best = tuner.get_best_hyperparameters()[0].values
best

{'num_layers': 2,
 'units0': 80,
 'activation0': 'tanh',
 'dropout0': 0.4,
 'optimizer': 'adam',
 'units1': 40,
 'activation1': 'relu',
 'dropout1': 0.3,
 'units2': 56,
 'activation2': 'tanh',
 'dropout2': 0.4,
 'units3': 112,
 'activation3': 'relu',
 'dropout3': 0.8,
 'units4': 32,
 'activation4': 'sigmoid',
 'dropout4': 0.4,
 'units5': 88,
 'activation5': 'sigmoid',
 'dropout5': 0.2,
 'units6': 72,
 'activation6': 'tanh',
 'dropout6': 0.1}

In [41]:
model = tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [42]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=6 )

Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.7259 - loss: 0.5321 - val_accuracy: 0.7576 - val_loss: 0.4939
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7645 - loss: 0.4915 - val_accuracy: 0.7489 - val_loss: 0.4839
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7563 - loss: 0.4921 - val_accuracy: 0.7706 - val_loss: 0.4780
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7249 - loss: 0.5175 - val_accuracy: 0.7662 - val_loss: 0.4759
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7986 - loss: 0.4399 - val_accuracy: 0.7576 - val_loss: 0.4784
Epoch 12/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7745 - loss: 0.4556 - val_accuracy: 0.7489 - val_loss: 0.4817
Epoch 13/100
[1m17/17[0m [3

<keras.src.callbacks.history.History at 0x191680a8810>