In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd
import mlflow
import dagshub
from dagshub import dagshub_logger
import json
import os
import keras
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from keras.callbacks import EarlyStopping, ModelCheckpoint, Callback
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from kerastuner.tuners import BayesianOptimization

2025-09-28 15:19:54.802679: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-09-28 15:19:54.891708: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-09-28 15:19:56.851576: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
  from kerastuner.tuners import BayesianOptimization


In [2]:
# Dagshub and MLflow setup
mlflow.set_tracking_uri("https://dagshub.com/alfoCaiazza/churn_prediction.mlflow")
dagshub.init(repo_owner="alfoCaiazza", repo_name="churn_prediction", mlflow=True)
dagshub_logger = dagshub_logger(metrics_path="metrics", hparams_path="params")

In [3]:
# Setting up GPU usage
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print(f"GPU: {gpu}")
    tf.config.experimental.set_memory_growth(gpu, True)

GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
# Parametrizing the SVM model
from sklearn.svm import SVC

def build_SVM(hp):
    kernel_name = hp.Choice('kernel', values=['linear', 'poly', 'rbf', 'sigmoid'])
    C = hp.float('C', min_value=1e-3, max_value=1e3, sampling='log')

    # Gamma value only for non-linear kernels
    gamma = 'scale'
    if kernel_name in ['poly', 'rbf', 'sigmoid']:
        gamma = hp.Choice('gamma', values=['scale', 'auto'])
    
    # Degree value only for polynomial kernel
    degree = 3
    if kernel_name == 'poly':
        degree = hp.Int('degree', min_value=2, max_value=6, step=1)

    svm = SVC(
        kernel=kernel_name, C=C, gamma=gamma, degree=degree
    )
    
    return svm

In [5]:
# Defining Callbacks
base_callbacks = [
    EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True),
    ModelCheckpoint('best_model.h5', monitor='val_loss', save_best_only=True)
]

# Custom Callback for MLflow logging
class MLflowCallback(Callback):
    def __init__(self, trial_id, trial_hyperparameters):
        super().__init__()
        self.trial_id = trial_id
        self.trial_hyperparameters = trial_hyperparameters

    def on_train_begin(self, logs=None):
        self.run = mlflow.start_run(run_name = f"trial_{self.trial_id}", nested=True)

        mlflow.log_param('trial_id', self.trial_id)
        for param_name, param_value in self.trial_hyperparameters.values.items():
            mlflow.log_param(param_name, param_value)

    def on_epoch_end(self, epoch, logs=None):
        if logs is not None:
            for metric_name, value in logs.items():
                mlflow.log_metric(metric_name, value, step=epoch)

    def on_train_end(self, logs=None):
        if self.run:
            mlflow.end_run()

In [None]:
# Sublassing BayesianOptimization to include MLflow tracking callback
class MyTuner(BayesianOptimization):
    def run_trial(self, trial, *args, **kwargs):
        callbacks = base_callbacks + [MLflowCallback(trial.hyperparameters, trial.trial_id)]
        kwargs['callbacks'] = callbacks

        return super().run_trial(trial, *args, **kwargs)

In [None]:
# Building Model training phase
TUNER_DIR = "src/tuning" 
with mlflow.start_run(run_name="Main_run"):
    mlflow.set_tag("stage", "hyperparameter_tuning")
    mlflow.set_tag("model", "parametrized_SVC")

    tuner = MyTuner(
        build_SVM,
        objective='val_accuracy',
        max_trials=20,
        executions_per_trial=1,
        directory=TUNER_DIR,
        project_name='svm_parametrized'
    )

    tuner.search(

    )

    # TO DO 
mlflow.end_run()