In [1]:
# Setup imports and path for local modules
import sys
sys.path.append("../src")

from data_preprocessing import load_and_preprocess
from hpo_optuna import run_optimization
from train_model import train
from batch_inference import batch_predict
# from drift_detection import detect_drift  # Optional bonus

import joblib
import mlflow
import pandas as pd

df = pd.read_csv("../data/diabetes.csv")
print(df.columns)
df.head()


# Step 1: Preprocess Data
X_train, X_test, y_train, y_test, scaler = load_and_preprocess("../data/diabetes.csv")
joblib.dump(scaler, "../models/scaler.pkl")  # Save scaler for inference




  from .autonotebook import tqdm as notebook_tqdm


Index(['PatientID', 'Pregnancies', 'PlasmaGlucose', 'DiastolicBloodPressure',
       'TricepsThickness', 'SerumInsulin', 'BMI', 'DiabetesPedigree', 'Age',
       'Diabetic'],
      dtype='object')


['../models/scaler.pkl']

In [2]:
# Step 2: Hyperparameter Tuning with Optuna
best_params = run_optimization()
print("Best Parameters:", best_params)


[I 2025-07-18 02:19:58,839] A new study created in memory with name: no-name-be8b8938-354b-4140-b265-4421a9d03fa4
[I 2025-07-18 02:20:04,227] Trial 0 finished with value: 0.9824350725829173 and parameters: {'n_estimators': 144, 'max_depth': 10, 'learning_rate': 0.07295663478268426}. Best is trial 0 with value: 0.9824350725829173.
[I 2025-07-18 02:20:06,444] Trial 1 finished with value: 0.9868828835486418 and parameters: {'n_estimators': 105, 'max_depth': 6, 'learning_rate': 0.056403065556954506}. Best is trial 1 with value: 0.9868828835486418.
[I 2025-07-18 02:20:11,973] Trial 2 finished with value: 0.9875144911805868 and parameters: {'n_estimators': 169, 'max_depth': 9, 'learning_rate': 0.15476034020745788}. Best is trial 2 with value: 0.9875144911805868.
[I 2025-07-18 02:20:14,217] Trial 3 finished with value: 0.9874937070171955 and parameters: {'n_estimators': 163, 'max_depth': 4, 'learning_rate': 0.0470937102945233}. Best is trial 2 with value: 0.9875144911805868.
[I 2025-07-18 02:

Best trial: {'n_estimators': 196, 'max_depth': 2, 'learning_rate': 0.19679701673570765}
Best Parameters: {'n_estimators': 196, 'max_depth': 2, 'learning_rate': 0.19679701673570765}


In [3]:
# Step 3: Train final model using best parameters
best_model, acc, auc = train(X_train, X_test, y_train, y_test, best_params)
print(f"Final Accuracy: {acc:.4f}, AUC: {auc:.4f}")





Model saved to ../models/model.pkl
Final Accuracy: 0.9550, AUC: 0.9921


In [5]:
# Step 4: Model Registration

# 1. Import the register_model function from your script
from model_registration import register_model

import mlflow
mlflow.end_run() # End any active run

# 2. Call it with your final model and test data
acc, auc = register_model(best_model, X_test, y_test, best_params)

print("Final Accuracy:", acc)
print("Final AUC:", auc)




✅ Final model saved at: models/final_model.pkl




✅ Model registered to MLflow
Final Accuracy: 0.955
Final AUC: 0.9920592949088037
