# Hyperparameter Tuning for Neural Networks - Step by Step

## What we'll learn:
1. **What are hyperparameters?** - Settings we choose before training (like learning rate, number of layers)
2. **Why tune them?** - To get the best performance from our model
3. **How to automate the process** - Using Keras Tuner to try different combinations
4. **Key hyperparameters to tune:**
   - Optimizer (Adam, SGD, RMSprop)
   - Number of hidden layers
   - Number of neurons in each layer
   - Learning rate
   - Batch size

Let's start!

In [2]:
import pandas as pd 
import numpy as np

In [3]:
df=pd.read_csv('diabetes.csv')

In [4]:
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [5]:
df.corr()['Outcome']

Pregnancies                 0.221898
Glucose                     0.466581
BloodPressure               0.065068
SkinThickness               0.074752
Insulin                     0.130548
BMI                         0.292695
DiabetesPedigreeFunction    0.173844
Age                         0.238356
Outcome                     1.000000
Name: Outcome, dtype: float64

In [6]:
X=df.iloc[:,:-1].values
y=df.iloc[:,-1].values
# Splitting the dataset into the Training set and Test set

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [None]:
X = scaler.fit_transform(X)

In [9]:
X


array([[ 0.63994726,  0.84832379,  0.14964075, ...,  0.20401277,
         0.46849198,  1.4259954 ],
       [-0.84488505, -1.12339636, -0.16054575, ..., -0.68442195,
        -0.36506078, -0.19067191],
       [ 1.23388019,  1.94372388, -0.26394125, ..., -1.10325546,
         0.60439732, -0.10558415],
       ...,
       [ 0.3429808 ,  0.00330087,  0.14964075, ..., -0.73518964,
        -0.68519336, -0.27575966],
       [-0.84488505,  0.1597866 , -0.47073225, ..., -0.24020459,
        -0.37110101,  1.17073215],
       [-0.84488505, -0.8730192 ,  0.04624525, ..., -0.20212881,
        -0.47378505, -0.87137393]])

In [10]:
X.shape

(768, 8)

In [11]:
from sklearn.model_selection import train_test_split

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [13]:
import tensorflow as tf
from tensorflow import keras
from keras import Sequential
from keras.layers import Dense

In [14]:
model = Sequential()

In [15]:
model.add(Dense(32, activation='relu', input_dim=8))
model.add(Dense(1, activation='sigmoid'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [16]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - accuracy: 0.5804 - loss: 0.6866 - val_accuracy: 0.5854 - val_loss: 0.6923
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6334 - loss: 0.6467 - val_accuracy: 0.6179 - val_loss: 0.6601
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.6701 - loss: 0.6160 - val_accuracy: 0.6098 - val_loss: 0.6346
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.6945 - loss: 0.5923 - val_accuracy: 0.6341 - val_loss: 0.6122
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7006 - loss: 0.5712 - val_accuracy: 0.6423 - val_loss: 0.5927
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7169 - loss: 0.5547 - val_accuracy: 0.6341 - val_loss: 0.5759
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━

<keras.src.callbacks.history.History at 0x265d09fda50>

In [18]:
import kerastuner as kt

  import kerastuner as kt


In [None]:
def build_model(hp):
    model = Sequential()
    
    hidden_nodes = hp.Int('hidden_nodes', min_value=16, max_value=128, step=16)

    model.add(Dense(hidden_nodes, activation='relu', input_dim=8))
    model.add(Dense(1, activation='sigmoid'))

    optimizer = hp.Choice('optimizer', values=['adam', 'sgd', 'rmsprop', 'adadelta'])
    
    learning_rate = hp.Float('learning_rate', min_value=0.0001, max_value=0.01, sampling='LOG')
    
    if optimizer == 'adam':
        opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)
    elif optimizer == 'adadelta':
        opt = tf.keras.optimizers.Adadelta(learning_rate=learning_rate)

    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])

    return model

In [21]:
tuner.search(X_train, y_train, epochs=5, validation_data=(X_test, y_test))

In [22]:
best_hyperparameters = tuner.get_best_hyperparameters()[0]

In [None]:
# Create hyperparameter tuner
tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',  
    max_trials=5,
    directory='tuner_results',
    project_name='diabetes_hyperparameter_tuning'
)

print("Tuner created successfully!")

hidden_nodes: 80
optimizer: adam
learning_rate: 0.0006872468161348029


In [24]:
best_model = tuner.get_best_models(num_models=1)[0]




  saveable.load_own_variables(weights_store.get(inner_path))


In [25]:
best_model.summary()

In [26]:
history = best_model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

Epoch 1/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - accuracy: 0.7413 - loss: 0.5438 - val_accuracy: 0.7317 - val_loss: 0.5506
Epoch 2/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7536 - loss: 0.5236 - val_accuracy: 0.7236 - val_loss: 0.5341
Epoch 3/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7617 - loss: 0.5075 - val_accuracy: 0.7236 - val_loss: 0.5226
Epoch 4/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7658 - loss: 0.4957 - val_accuracy: 0.7398 - val_loss: 0.5121
Epoch 5/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7719 - loss: 0.4866 - val_accuracy: 0.7317 - val_loss: 0.5059
Epoch 6/10
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.7739 - loss: 0.4782 - val_accuracy: 0.7317 - val_loss: 0.4987
Epoch 7/10
[1m16/16[0m [32m━━━━━━━━━

In [27]:
test_loss, test_accuracy = best_model.evaluate(X_test, y_test, verbose=0)

In [28]:
print(f"test loss: {test_loss}")
print(f"test accuracy: {test_accuracy}")

test loss: 0.5052015781402588
test accuracy: 0.7792207598686218


In [33]:
predictions=best_model.predict(X_test, verbose=0)
print(f"\n first 10 predictions: {predictions[:10]}")
binary_predictions= (predictions > 0.5).astype(int)
print(f"first 10 binary predictions: {binary_predictions[:10].flatten()}")
print(f"first 10 actual labels: {y_test[:10]}")


 first 10 predictions: [[0.33823118]
 [0.15158452]
 [0.1057574 ]
 [0.24917841]
 [0.46486327]
 [0.46655855]
 [0.01565853]
 [0.42860386]
 [0.6007948 ]
 [0.6136197 ]]
first 10 binary predictions: [0 0 0 0 0 0 0 0 1 1]
first 10 actual labels: [0 0 0 0 0 0 0 0 0 0]
