# 1. Required Packages

In [1]:
import pandas as pd
import keras_tuner as kt
import json
from tensorflow.keras import Input, Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Concatenate, BatchNormalization
from tensorflow.keras.optimizers import Adam, RMSprop, SGD, Nadam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.metrics import AUC
from tensorflow.keras import backend as K
import tensorflow as tf
import os
from utils import get_data_path, get_save_path, save_json
from tensorflow.keras.metrics import AUC
from tensorflow.keras.regularizers import l2

In [2]:
# Custom Focal Loss
class FocalLoss(tf.keras.losses.Loss):
    def __init__(self, gamma=2., alpha=0.75, **kwargs):
        super().__init__(**kwargs)
        self.gamma = gamma
        self.alpha = alpha

    def call(self, y_true, y_pred):
        y_pred = K.clip(y_pred, K.epsilon(), 1. - K.epsilon())
        pt = tf.where(tf.equal(y_true, 1), y_pred, 1 - y_pred)
        return -K.mean(self.alpha * K.pow(1. - pt, self.gamma) * K.log(pt))

# 2. Load Data

In [3]:
X_train = pd.read_csv(get_data_path("X_train_deep.csv"))
y_train = pd.read_csv(get_data_path("y_train.csv")).squeeze()
X_val = pd.read_csv(get_data_path("X_val_deep.csv"))
y_val = pd.read_csv(get_data_path("y_val.csv")).squeeze()

input_dim = X_train.shape[1]

# 3. Dense Model

In [4]:
# Tunable Feedforward Model
def build_dense_model_hp(hp):
    batch_size = hp.Choice("batch_size", [32, 64, 128])
    model = Sequential()
    model.add(Input(shape=(input_dim,)))

    for i in range(hp.Int("num_layers", 2, 6)):
        units = hp.Choice(f"units_{i}", [32, 64, 128, 256, 512])
        activation = hp.Choice("activation", ["relu", "tanh", "swish"])
        model.add(Dense(units, activation=activation))
        
        # L2 regularization hyperparameter
        l2_reg = hp.Choice(f"l2_{i}", [0.0, 1e-5, 1e-4, 1e-3])

        model.add(Dense(
            units,
            activation=activation,
            kernel_regularizer=l2(l2_reg)
        ))
        
        if hp.Boolean(f"use_bn_{i}"):
            model.add(BatchNormalization())

        dropout_rate = hp.Choice(f"dropout_{i}", [0.3, 0.4, 0.5])
        model.add(Dropout(dropout_rate))

    model.add(Dense(1, activation="sigmoid"))

    optimizer_choice = hp.Choice("optimizer", ["adam", "rmsprop", "sgd", "nadam"])
    lr = hp.Float("learning_rate", 1e-4, 1e-2, sampling="log")

    if optimizer_choice == "adam":
        optimizer = Adam(learning_rate=lr)
    elif optimizer_choice == "sgd":
        optimizer = SGD(learning_rate=lr)
    elif optimizer_choice == "nadam":
        optimizer = Nadam(learning_rate=lr)
    else:
        optimizer = RMSprop(learning_rate=lr)
    model.compile(
        optimizer=optimizer,
        loss=FocalLoss(alpha=0.75),
        metrics=[
            "accuracy",
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="AUC", curve="ROC"),
            tf.keras.metrics.AUC(name="PR_AUC", curve="PR")
        ]
    )

    return model

# 4. Create Bayesian Optimization Tuner 

In [5]:
bayes_tuner = kt.BayesianOptimization(
    build_dense_model_hp,
    objective="val_AUC",
    max_trials=30,
    directory="tuner_dir",
    project_name="dense_model_focal_tuned"
)


Reloading Tuner from tuner_dir/dense_model_focal_tuned/tuner0.json


# 5. Start Automated Hyperparameter Tuning

In [6]:
# EarlyStopping
early_stop = EarlyStopping(
    monitor="val_AUC", 
    mode="max",
    patience=10,
    restore_best_weights=True
)
# Start tuning
# Here, EarlyStopping is used to find the best epoch
# After saving the model and parameters, we will use the same early stopping or retrain to that epoch in notebook 04

bayes_tuner.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    callbacks=[early_stop]
)


# 6. Save Best Model and Hyperparameters

In [7]:
# Save the best model to the specified folder
best_model = bayes_tuner.get_best_models(1)[0]
best_model.save(get_save_path("best_model_dense.keras"))

# Save the best hyperparameters as JSON
best_hp = bayes_tuner.get_best_hyperparameters(1)[0]
save_json(best_hp.values, "best_hyperparameters_dense.json")

bayes_tuner.results_summary()
print("\nBest Hyperparameter Combination:")
print(best_hp.values)

Results summary
Results in tuner_dir/dense_model_focal_tuned
Showing 10 best trials
Objective(name="val_AUC", direction="max")

Trial 09 summary
Hyperparameters:
batch_size: 32
num_layers: 3
units_0: 128
activation: swish
l2_0: 1e-05
use_bn_0: True
dropout_0: 0.3
units_1: 128
l2_1: 0.0001
use_bn_1: True
dropout_1: 0.3
optimizer: adam
learning_rate: 0.0012422772048365925
units_2: 128
l2_2: 0.0
use_bn_2: False
dropout_2: 0.5
units_3: 64
l2_3: 0.001
use_bn_3: False
dropout_3: 0.3
units_4: 32
l2_4: 1e-05
use_bn_4: True
dropout_4: 0.4
Score: 0.6405916810035706

Trial 07 summary
Hyperparameters:
batch_size: 32
num_layers: 3
units_0: 256
activation: relu
l2_0: 1e-05
use_bn_0: True
dropout_0: 0.3
units_1: 128
l2_1: 0.0001
use_bn_1: True
dropout_1: 0.3
optimizer: rmsprop
learning_rate: 0.005808921978016473
units_2: 256
l2_2: 0.001
use_bn_2: False
dropout_2: 0.4
units_3: 128
l2_3: 1e-05
use_bn_3: False
dropout_3: 0.5
units_4: 256
l2_4: 1e-05
use_bn_4: True
dropout_4: 0.5
Score: 0.639795184135437

  saveable.load_own_variables(weights_store.get(inner_path))


# 7.Wide & Deep tuning

In [8]:
def build_wide_and_deep_model_hp(hp):
    batch_size = hp.Choice("batch_size", [32, 64, 128]) 
    
    input_layer = Input(shape=(input_dim,))
    
    # Deep Branch
    deep = input_layer
    for i in range(hp.Int("num_layers", 2, 6)):
        units = hp.Choice(f"units_{i}", [64, 128, 256, 512])
        activation = hp.Choice("activation", ["relu", "tanh", "swish"])
        deep = Dense(units, activation=activation)(deep)
        l2_reg = hp.Choice(f"l2_{i}", [0.0, 1e-5, 1e-4, 1e-3])  # L2
        
        deep = Dense(units, activation=activation, kernel_regularizer=l2(l2_reg))(deep)

        if hp.Boolean(f"use_bn_{i}"):
            deep = BatchNormalization()(deep)

        dropout_rate = hp.Choice(f"dropout_{i}", [0.3, 0.4, 0.5])
        deep = Dropout(dropout_rate)(deep)

    # Wide Branch + Concatenation
    combined = Concatenate()([input_layer, deep])
    output = Dense(1, activation="sigmoid")(combined)

    model = Model(inputs=input_layer, outputs=output)

    # Optimizer Settings
    optimizer_name = hp.Choice("optimizer", ["adam", "rmsprop", "sgd", "nadam"])
    lr = hp.Float("learning_rate", min_value=1e-5, max_value=1e-2, sampling="log")

    if optimizer_name == "adam":
        optimizer = Adam(learning_rate=lr)
    elif optimizer_name == "rmsprop":
        optimizer = RMSprop(learning_rate=lr)
    elif optimizer_name == "sgd":
        optimizer = SGD(learning_rate=lr)
    else:
        optimizer = Nadam(learning_rate=lr)

    # Loss Function (can switch focal loss) 
    loss_type = hp.Choice("loss_fn", ["binary_crossentropy", "focal"])
    loss_fn = "binary_crossentropy" if loss_type == "binary_crossentropy" else FocalLoss()


    model.compile(
        optimizer=optimizer,
        loss=FocalLoss(alpha=0.75),
        metrics=[
            "accuracy",
            tf.keras.metrics.Recall(name="recall"),
            tf.keras.metrics.AUC(name="AUC", curve="ROC"),
            tf.keras.metrics.AUC(name="PR_AUC", curve="PR")
        ]
    )
    return model

# 8. Bayesian Tuner for Wide & Deep

In [9]:
tuner_wd = kt.BayesianOptimization(
    build_wide_and_deep_model_hp,
    objective="val_AUC",
    max_trials=30,
    directory="tuner_dir",
    project_name="wide_deep_model_tuning"
)

Reloading Tuner from tuner_dir/wide_deep_model_tuning/tuner0.json


# 9. Start Search (Wide & Deep)

In [10]:
tuner_wd.search(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    callbacks=[early_stop],
    verbose=1
)

# 10. Save Best Wide & Deep Model and Parameters

In [11]:
best_wd_model = tuner_wd.get_best_models(1)[0]
best_wd_model.save(get_save_path("best_model_widedeep.keras"))

best_wd_hp = tuner_wd.get_best_hyperparameters(1)[0]
save_json(best_wd_hp.values, "best_hyperparameters_widedeep.json")

tuner_wd.results_summary()
print("Best Wide & Deep Hyperparameters:")
print(best_wd_hp.values)

Results summary
Results in tuner_dir/wide_deep_model_tuning
Showing 10 best trials
Objective(name="val_AUC", direction="max")

Trial 21 summary
Hyperparameters:
batch_size: 128
num_layers: 6
units_0: 128
activation: swish
l2_0: 1e-05
use_bn_0: True
dropout_0: 0.3
units_1: 512
l2_1: 1e-05
use_bn_1: True
dropout_1: 0.4
optimizer: nadam
learning_rate: 0.0002537689381953388
loss_fn: binary_crossentropy
units_2: 512
l2_2: 0.0001
use_bn_2: True
dropout_2: 0.5
units_3: 128
l2_3: 0.0
use_bn_3: True
dropout_3: 0.5
units_4: 512
l2_4: 0.001
use_bn_4: False
dropout_4: 0.5
units_5: 64
l2_5: 0.001
use_bn_5: False
dropout_5: 0.5
Score: 0.6407451629638672

Trial 12 summary
Hyperparameters:
batch_size: 32
num_layers: 6
units_0: 128
activation: relu
l2_0: 0.0001
use_bn_0: True
dropout_0: 0.3
units_1: 512
l2_1: 0.001
use_bn_1: True
dropout_1: 0.3
optimizer: adam
learning_rate: 0.00040038051991221
loss_fn: binary_crossentropy
units_2: 256
l2_2: 1e-05
use_bn_2: False
dropout_2: 0.5
units_3: 256
l2_3: 0.0
u

  saveable.load_own_variables(weights_store.get(inner_path))
