# Load Dataset

In [1]:
import pandas as pd
df = pd.read_csv('diabetics.csv')
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,1
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,0
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,1
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,0
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,1
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,0
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,0
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,0
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,1


In [2]:
df.dtypes

Pregnancies                 float64
Glucose                     float64
BloodPressure               float64
SkinThickness               float64
Insulin                     float64
BMI                         float64
DiabetesPedigreeFunction    float64
Age                         float64
Outcome                       int64
dtype: object

In [3]:
df.shape

(768, 9)

# Data Preparation

***Find missing values***

In [4]:
df.isna().sum()

Pregnancies                  6
Glucose                     11
BloodPressure                8
SkinThickness               20
Insulin                      6
BMI                         11
DiabetesPedigreeFunction     5
Age                          7
Outcome                      0
dtype: int64

***Handle missing values***

Replace all the missing data with the mean, median, or mode of the column

In [5]:
df['Pregnancies'].fillna( df['Pregnancies'].median(), inplace = True )
df['Glucose'].fillna( df['Glucose'].mean(), inplace = True )
df['BloodPressure'].fillna( df['BloodPressure'].mean(), inplace = True )
df['SkinThickness'].fillna( df['SkinThickness'].mean(), inplace = True)
df['Insulin'].fillna( df['Insulin'].mean(), inplace = True )
df['BMI'].fillna( df['BMI'].mean(), inplace = True)
df['DiabetesPedigreeFunction'].fillna( df['DiabetesPedigreeFunction'].mean(), inplace= True)
df['Age'].fillna( df['Age'].median(), inplace = True )


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Pregnancies'].fillna( df['Pregnancies'].median(), inplace = True )
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['Glucose'].fillna( df['Glucose'].mean(), inplace = True )
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate

In [6]:
df.isna().sum()

Pregnancies                 0
Glucose                     0
BloodPressure               0
SkinThickness               0
Insulin                     0
BMI                         0
DiabetesPedigreeFunction    0
Age                         0
Outcome                     0
dtype: int64

In [7]:
df.corr()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
Pregnancies,1.0,0.125718,0.153817,-0.071225,-0.075511,0.009628,-0.034719,0.540975,0.218446
Glucose,0.125718,1.0,0.14956,0.05851,0.320424,0.217184,0.135424,0.260627,0.46941
BloodPressure,0.153817,0.14956,1.0,0.22251,0.088187,0.280725,0.041373,0.235469,0.060958
SkinThickness,-0.071225,0.05851,0.22251,1.0,0.430496,0.389469,0.184648,-0.10998,0.072988
Insulin,-0.075511,0.320424,0.088187,0.430496,1.0,0.199997,0.143893,-0.049707,0.138975
BMI,0.009628,0.217184,0.280725,0.389469,0.199997,1.0,0.143086,0.032303,0.287702
DiabetesPedigreeFunction,-0.034719,0.135424,0.041373,0.184648,0.143893,0.143086,1.0,0.031051,0.175332
Age,0.540975,0.260627,0.235469,-0.10998,-0.049707,0.032303,0.031051,1.0,0.23733
Outcome,0.218446,0.46941,0.060958,0.072988,0.138975,0.287702,0.175332,0.23733,1.0


In [8]:
import numpy as np

def remove_highly_correlated_features(df, threshold=0.8):
    corr_matrix = df.corr().abs()  # Absolute correlation matrix
    upper = corr_matrix.where(np.triu(np.ones(corr_matrix.shape), k=1).astype(bool))
    to_drop = [column for column in upper.columns if any(upper[column] > threshold)]
    return df.drop(columns=to_drop)

# Remove features with correlation > 0.8
df_filtered = remove_highly_correlated_features(df, threshold=0.8)

In [9]:
df = df_filtered
df

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6.0,148.0,72.0,35.0,0.0,33.6,0.627,50.0,1
1,1.0,85.0,66.0,29.0,0.0,26.6,0.351,31.0,0
2,8.0,183.0,64.0,0.0,0.0,23.3,0.672,32.0,1
3,1.0,89.0,66.0,23.0,94.0,28.1,0.167,21.0,0
4,0.0,137.0,40.0,35.0,168.0,43.1,2.288,33.0,1
...,...,...,...,...,...,...,...,...,...
763,10.0,101.0,76.0,48.0,180.0,32.9,0.171,63.0,0
764,2.0,122.0,70.0,27.0,0.0,36.8,0.340,27.0,0
765,5.0,121.0,72.0,23.0,112.0,26.2,0.245,30.0,0
766,1.0,126.0,60.0,0.0,0.0,30.1,0.349,47.0,1


# Data Seperation

***Seperate input and output from dataset***

In [10]:
x = df.iloc[:,:-1].values              # [row, col]    : means full row    :-1 means except last col
x            # input

array([[  6.   , 148.   ,  72.   , ...,  33.6  ,   0.627,  50.   ],
       [  1.   ,  85.   ,  66.   , ...,  26.6  ,   0.351,  31.   ],
       [  8.   , 183.   ,  64.   , ...,  23.3  ,   0.672,  32.   ],
       ...,
       [  5.   , 121.   ,  72.   , ...,  26.2  ,   0.245,  30.   ],
       [  1.   , 126.   ,  60.   , ...,  30.1  ,   0.349,  47.   ],
       [  1.   ,  93.   ,  70.   , ...,  30.4  ,   0.315,  23.   ]])

In [11]:
y = df.iloc[:,-1].values
y             # output

array([1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1,
       0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0,
       1, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1,

# Data Splitting

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split( x , y, test_size=0.30, random_state = 52)     # test_size = .3 ie, 30% data
x_train

array([[ 12.   , 151.   ,  70.   , ...,  41.8  ,   0.742,  38.   ],
       [  3.   , 158.   ,  70.   , ...,  35.5  ,   0.344,  35.   ],
       [  6.   , 154.   ,  74.   , ...,  29.3  ,   0.839,  39.   ],
       ...,
       [  4.   , 114.   ,  65.   , ...,  21.9  ,   0.432,  37.   ],
       [  3.   ,  87.   ,  60.   , ...,  21.8  ,   0.444,  21.   ],
       [  6.   ,  98.   ,  58.   , ...,  34.   ,   0.43 ,  43.   ]])

In [13]:
x_test

array([[3.00e+00, 1.80e+02, 6.40e+01, ..., 3.40e+01, 2.71e-01, 2.60e+01],
       [3.00e+00, 9.90e+01, 8.00e+01, ..., 1.93e+01, 2.84e-01, 3.00e+01],
       [4.00e+00, 1.48e+02, 6.00e+01, ..., 3.09e+01, 1.50e-01, 2.90e+01],
       ...,
       [3.00e+00, 1.70e+02, 6.40e+01, ..., 3.45e+01, 3.56e-01, 3.00e+01],
       [2.00e+00, 1.14e+02, 6.80e+01, ..., 2.87e+01, 9.20e-02, 2.50e+01],
       [3.00e+00, 1.07e+02, 6.20e+01, ..., 2.29e+01, 6.78e-01, 2.30e+01]])

In [14]:
y_train

array([1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0,
       0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1,
       0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 0,
       0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,

In [15]:
y_test

array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1,
       0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1], dtype=int64)

# Data Normalization

***Normalization technique used - standard scalar***

**z = (x-u)/s ***  

x is mean of training data

u is exact value

s is std deviation of training data

Feature scaling is a common preprocessing step in machine learning to ensure that all features have the same scale.


In [16]:
from sklearn.preprocessing import StandardScaler    # another one - MinMaxScalar
scalar = StandardScaler()
scalar.fit(x_train)
x_train = scalar.transform(x_train)      # fit and transform can be together performed. scalar.fit_transform(x_train)
x_test = scalar.transform(x_test)


# Model creation

In [17]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

In [None]:
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = 8))
model.add(Dropout(0.5))  # Increased from 0.2/0.3

model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test,y_test) )

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [53]:
model = Sequential()
model.add(Dense(32, activation = 'relu', input_dim = 8))
model.add(Dropout(0.5))  # Increased from 0.2/0.3

model.add(Dense(1, activation = 'sigmoid'))
model.compile(optimizer = 'Adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(x_train, y_train, batch_size=32, epochs=10, validation_data=(x_test,y_test) )

Epoch 1/10


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6358 - loss: 0.6537 - val_accuracy: 0.6883 - val_loss: 0.5865
Epoch 2/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6445 - loss: 0.6492 - val_accuracy: 0.6883 - val_loss: 0.5639
Epoch 3/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.6863 - loss: 0.6025 - val_accuracy: 0.7056 - val_loss: 0.5477
Epoch 4/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6762 - loss: 0.6058 - val_accuracy: 0.7273 - val_loss: 0.5343
Epoch 5/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7028 - loss: 0.6027 - val_accuracy: 0.7489 - val_loss: 0.5244
Epoch 6/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.7061 - loss: 0.5883 - val_accuracy: 0.7446 - val_loss: 0.5161
Epoch 7/10
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1ceaf71fb50>

# Select good optimizer

In [None]:
!pip install keras-tuner --upgrade


In [22]:
import keras_tuner as kt
import keras

In [92]:
def build_model(hp):

  model = Sequential()
  model.add(Dense(32, activation = 'relu', input_dim = 8))
  model.add(Dense(1, activation = 'sigmoid'))

  optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta'])
  model.compile(optimizer = optimizer, loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [93]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='optimizer')

Reloading Tuner from my_dir\optimizer\tuner0.json


In [94]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


In [95]:
best_optimizer = tuner.get_best_hyperparameters()[0].values
best_optimizer

{'optimizer': 'adam'}

In [96]:
model = tuner.get_best_models(num_models=1)[0]


In [97]:
model.summary()

In [98]:
model.fit(x_train, y_train, batch_size=64, epochs=60, validation_data=(x_test,y_test) )

Epoch 1/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 20ms/step - accuracy: 0.7340 - loss: 0.5462 - val_accuracy: 0.7792 - val_loss: 0.5177
Epoch 2/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7292 - loss: 0.5374 - val_accuracy: 0.7792 - val_loss: 0.5100
Epoch 3/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7639 - loss: 0.5058 - val_accuracy: 0.7879 - val_loss: 0.5038
Epoch 4/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.7539 - loss: 0.5060 - val_accuracy: 0.7965 - val_loss: 0.4987
Epoch 5/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7593 - loss: 0.4977 - val_accuracy: 0.8009 - val_loss: 0.4941
Epoch 6/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7578 - loss: 0.5050 - val_accuracy: 0.8052 - val_loss: 0.4904
Epoch 7/60
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0

<keras.src.callbacks.history.History at 0x1ceb1cb23d0>

In [30]:
from sklearn.metrics import classification_report
y_pred = (model.predict(x_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
              precision    recall  f1-score   support

           0       0.81      0.85      0.83       155
           1       0.66      0.59      0.62        76

    accuracy                           0.77       231
   macro avg       0.74      0.72      0.73       231
weighted avg       0.76      0.77      0.76       231



# Decide no: of nodes in layer

In [31]:
def build_model(hp):

  model = Sequential()

  units = hp.Int('units', min_value=8, max_value=128, step=8)
  model.add(Dense(units=units, activation = 'relu', input_dim = 8))
  model.add(Dense(1, activation = 'sigmoid'))

  model.compile(optimizer='rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [32]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='num_nodes')

Reloading Tuner from my_dir\num_nodes\tuner0.json


In [33]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


In [34]:
best_node_num = tuner.get_best_hyperparameters()[0].values
best_node_num

{'units': 104}

In [35]:
model = tuner.get_best_models(num_models=1)[0]

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))


In [36]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=5 )

Epoch 6/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.7556 - loss: 0.4829 - val_accuracy: 0.7792 - val_loss: 0.4776
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7845 - loss: 0.4417 - val_accuracy: 0.7792 - val_loss: 0.4770
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7989 - loss: 0.4340 - val_accuracy: 0.7792 - val_loss: 0.4765
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7836 - loss: 0.4381 - val_accuracy: 0.7749 - val_loss: 0.4766
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7614 - loss: 0.4897 - val_accuracy: 0.7749 - val_loss: 0.4786
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7857 - loss: 0.4660 - val_accuracy: 0.7706 - val_loss: 0.4791
Epoch 12/100
[1m17/17[0m [32

<keras.src.callbacks.history.History at 0x1cea80d94d0>

# Decide no: of layers

In [37]:
def build_model(hp):

  model = Sequential()

  model.add(Dense(88, activation = 'relu', input_dim = 8))

  for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
    model.add(Dense(88, activation = 'relu'))

  model.add(Dense(1, activation = 'sigmoid'))

  model.compile(optimizer='rmsprop', loss = 'binary_crossentropy', metrics = ['accuracy'])

  return model

In [38]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='num_layers')

Reloading Tuner from my_dir\num_layers\tuner0.json


In [39]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


In [40]:
best_layer_num = tuner.get_best_hyperparameters()[0].values
best_layer_num

{'num_layers': 4}

In [41]:
model = tuner.get_best_models(num_models=1)[0]
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=5 )

Epoch 6/100


  saveable.load_own_variables(weights_store.get(inner_path))


[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.7539 - loss: 0.4960 - val_accuracy: 0.7619 - val_loss: 0.4856
Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7858 - loss: 0.4437 - val_accuracy: 0.7489 - val_loss: 0.4977
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7892 - loss: 0.4174 - val_accuracy: 0.7446 - val_loss: 0.4961
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7831 - loss: 0.4248 - val_accuracy: 0.7273 - val_loss: 0.5179
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8023 - loss: 0.4228 - val_accuracy: 0.7229 - val_loss: 0.5282
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8120 - loss: 0.4074 - val_accuracy: 0.7706 - val_loss: 0.5388
Epoch 12/100
[1m17/17[0m [32m━━━━━━━━━━━

<keras.src.callbacks.history.History at 0x1cea82157d0>

In [42]:
def build_model(hp):
    model = keras.Sequential()
    counter = 0
    for i in range(hp.Int('num_layers', min_value=1, max_value=10)):
        if counter == 0:
            model.add(
                  Dense(
                    units=hp.Int('units'+str(i), min_value=8, max_value=128, step=8),

                    activation=hp.Choice('activation'+str(i), values=['relu', 'tanh', 'sigmoid']),

                    input_dim=8
                      )
                    )
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])))

        else:
            model.add(
                  Dense(
                    units=hp.Int('units'+str(i), min_value=8, max_value=128, step=8),
                    activation=hp.Choice('activation'+str(i), values=['relu', 'tanh', 'sigmoid'])
                      )
                    )
            model.add(Dropout(hp.Choice('dropout' + str(i), values=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9])))


        counter += 1
    model.add(Dense(1, activation='sigmoid'))
    model.compile(
        optimizer=hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta', 'nadam']),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    return model

In [43]:
tuner = kt.RandomSearch(build_model, objective='val_accuracy', max_trials=5, directory='my_dir', project_name='full1')

Reloading Tuner from my_dir\full1\tuner0.json


In [44]:
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))


In [45]:
best = tuner.get_best_hyperparameters()[0].values
best

{'num_layers': 3,
 'units0': 32,
 'activation0': 'sigmoid',
 'dropout0': 0.9,
 'optimizer': 'nadam',
 'units1': 56,
 'activation1': 'sigmoid',
 'dropout1': 0.9,
 'units2': 48,
 'activation2': 'relu',
 'dropout2': 0.7,
 'units3': 120,
 'activation3': 'relu',
 'dropout3': 0.5,
 'units4': 48,
 'activation4': 'sigmoid',
 'dropout4': 0.6,
 'units5': 56,
 'activation5': 'tanh',
 'dropout5': 0.4,
 'units6': 24,
 'activation6': 'relu',
 'dropout6': 0.2,
 'units7': 8,
 'activation7': 'relu',
 'dropout7': 0.4}

In [46]:
model = tuner.get_best_models(num_models=1)[0]

  saveable.load_own_variables(weights_store.get(inner_path))


In [47]:
model.fit(x_train, y_train, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=6 )

Epoch 7/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - accuracy: 0.4881 - loss: 1.3341 - val_accuracy: 0.6710 - val_loss: 0.6494
Epoch 8/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.4958 - loss: 1.2307 - val_accuracy: 0.6710 - val_loss: 0.6392
Epoch 9/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5442 - loss: 1.0075 - val_accuracy: 0.6710 - val_loss: 0.6328
Epoch 10/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.5129 - loss: 1.1162 - val_accuracy: 0.6710 - val_loss: 0.6334
Epoch 11/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5553 - loss: 0.9834 - val_accuracy: 0.6710 - val_loss: 0.6366
Epoch 12/100
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5777 - loss: 0.9424 - val_accuracy: 0.6710 - val_loss: 0.6462
Epoch 13/100
[1m17/17[0m [3

<keras.src.callbacks.history.History at 0x1cea9caf050>

In [48]:
df['Outcome'].value_counts()

Outcome
0    500
1    268
Name: count, dtype: int64

In [49]:
!pip install --upgrade scikit-learn imbalanced-learn





[notice] A new release of pip available: 22.3 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [50]:
from imblearn.over_sampling import SMOTE

# Apply SMOTE only to the training data (to avoid data leakage)
smote = SMOTE(sampling_strategy='minority', random_state=42)
x_train_resampled, y_train_resampled = smote.fit_resample(x_train, y_train)

# Verify new class distribution
import numpy as np
unique, counts = np.unique(y_train_resampled, return_counts=True)
print("Resampled class distribution:", dict(zip(unique, counts)))

Resampled class distribution: {0: 345, 1: 345}


In [51]:
model.fit(x_train_resampled, y_train_resampled, batch_size=32, epochs=100, validation_data=(x_test,y_test), initial_epoch=6 )

Epoch 7/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5236 - loss: 0.7136 - val_accuracy: 0.6710 - val_loss: 0.6491
Epoch 8/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4838 - loss: 0.7220 - val_accuracy: 0.6710 - val_loss: 0.6524
Epoch 9/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5143 - loss: 0.7018 - val_accuracy: 0.6710 - val_loss: 0.6556
Epoch 10/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4770 - loss: 0.7220 - val_accuracy: 0.6710 - val_loss: 0.6585
Epoch 11/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5014 - loss: 0.7030 - val_accuracy: 0.6710 - val_loss: 0.6595
Epoch 12/100
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.5016 - loss: 0.7002 - val_accuracy: 0.6710 - val_loss: 0.6609
Epoch 13/100
[1m22/22[0m [3

<keras.src.callbacks.history.History at 0x1cea42824d0>

In [52]:
from sklearn.metrics import classification_report
y_pred = (model.predict(x_test) > 0.5).astype(int)
print(classification_report(y_test, y_pred))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
              precision    recall  f1-score   support

           0       0.67      1.00      0.80       155
           1       0.00      0.00      0.00        76

    accuracy                           0.67       231
   macro avg       0.34      0.50      0.40       231
weighted avg       0.45      0.67      0.54       231



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
