In [1]:
%pip install -r ../requirements.txt

Collecting fastapi==0.115.6 (from -r ../requirements.txt (line 1))
  Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)
Collecting matplotlib==3.9.2 (from -r ../requirements.txt (line 6))
  Using cached matplotlib-3.9.2-cp39-cp39-win_amd64.whl.metadata (11 kB)
Collecting scikit-learn==1.5.2 (from -r ../requirements.txt (line 8))
  Using cached scikit_learn-1.5.2-cp39-cp39-win_amd64.whl.metadata (13 kB)
Collecting nbconvert==7.16.4 (from -r ../requirements.txt (line 11))
  Using cached nbconvert-7.16.4-py3-none-any.whl.metadata (8.5 kB)
Collecting xverse==1.0.5 (from -r ../requirements.txt (line 12))
  Using cached xverse-1.0.5-py3-none-any.whl.metadata (19 kB)
Collecting mlflow (from -r ../requirements.txt (line 14))
  Using cached mlflow-3.1.4-py3-none-any.whl.metadata (29 kB)
Collecting starlette<0.42.0,>=0.40.0 (from fastapi==0.115.6->-r ../requirements.txt (line 1))
  Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)
Collecting mlflow-skinny==3.1.4 (from 



In [2]:
import pandas as pd
import numpy as np
import mlflow
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

# Load the processed data
data_path = '../data/processed/customer_risk_profiles.csv'
df = pd.read_csv(data_path)

# Define features (X) and target (y)
features = ['Recency', 'Frequency', 'Monetary']
target = 'is_high_risk'

X = df[features]
y = df[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

print('Training set shape:', X_train.shape)
print('Testing set shape:', X_test.shape)

Training set shape: (2993, 3)
Testing set shape: (749, 3)


In [3]:
# Set up MLflow experiment
mlflow.set_experiment('Credit_Risk_Model_Comparison')

# --- Model 1: Logistic Regression with GridSearchCV ---
with mlflow.start_run(run_name='Logistic_Regression_GridSearch'):
    # Define the model and parameter grid
    log_reg = LogisticRegression(random_state=42, max_iter=1000)
    param_grid = {
        'C': [0.01, 0.1, 1, 10, 100],
        'solver': ['liblinear', 'saga']
    }

    # Perform Grid Search
    grid_search = GridSearchCV(log_reg, param_grid, cv=5, scoring='roc_auc', n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Log best parameters and score
    mlflow.log_params(grid_search.best_params_)
    mlflow.log_metric('best_roc_auc_cv', grid_search.best_score_)

    # Train the best model
    best_log_reg = grid_search.best_estimator_
    y_pred = best_log_reg.predict(X_test)
    y_pred_proba = best_log_reg.predict_proba(X_test)[:, 1]

    # Log evaluation metrics
    mlflow.log_metric('accuracy', accuracy_score(y_test, y_pred))
    mlflow.log_metric('precision', precision_score(y_test, y_pred))
    mlflow.log_metric('recall', recall_score(y_test, y_pred))
    mlflow.log_metric('f1_score', f1_score(y_test, y_pred))
    mlflow.log_metric('roc_auc', roc_auc_score(y_test, y_pred_proba))

    # Log the model
    mlflow.sklearn.log_model(best_log_reg, 'logistic_regression_model')

    print('Logistic Regression experiment logged.')

2025/12/16 10:40:09 INFO mlflow.tracking.fluent: Experiment with name 'Credit_Risk_Model_Comparison' does not exist. Creating a new experiment.


Logistic Regression experiment logged.


In [4]:
# --- Model 2: Random Forest with GridSearchCV ---
with mlflow.start_run(run_name='Random_Forest_GridSearch'):
    # Define the model and parameter grid
    rf = RandomForestClassifier(random_state=42)
    param_grid_rf = {
        'n_estimators': [100, 200],
        'max_depth': [10, 20, None],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }

    # Perform Grid Search
    grid_search_rf = GridSearchCV(rf, param_grid_rf, cv=5, scoring='roc_auc', n_jobs=-1)
    grid_search_rf.fit(X_train, y_train)

    # Log best parameters and score
    mlflow.log_params(grid_search_rf.best_params_)
    mlflow.log_metric('best_roc_auc_cv', grid_search_rf.best_score_)

    # Train the best model
    best_rf = grid_search_rf.best_estimator_
    y_pred_rf = best_rf.predict(X_test)
    y_pred_proba_rf = best_rf.predict_proba(X_test)[:, 1]

    # Log evaluation metrics
    mlflow.log_metric('accuracy', accuracy_score(y_test, y_pred_rf))
    mlflow.log_metric('precision', precision_score(y_test, y_pred_rf))
    mlflow.log_metric('recall', recall_score(y_test, y_pred_rf))
    mlflow.log_metric('f1_score', f1_score(y_test, y_pred_rf))
    mlflow.log_metric('roc_auc', roc_auc_score(y_test, y_pred_proba_rf))

    # Log the model
    mlflow.sklearn.log_model(best_rf, 'random_forest_model')

    print('Random Forest experiment logged.')



Random Forest experiment logged.


In [5]:
# --- Identify and Register the Best Model ---

# Search for the best run in the experiment
experiment_name = 'Credit_Risk_Model_Comparison'
experiment = mlflow.get_experiment_by_name(experiment_name)

best_run = mlflow.search_runs(
    experiment_ids=[experiment.experiment_id],
    order_by=['metrics.roc_auc DESC'],
    max_results=1
).iloc[0]

best_run_id = best_run['run_id']
best_model_uri = f'runs:/{best_run_id}/random_forest_model' # Assuming RF is likely better, adjust if needed

# Register the best model
model_name = 'CreditRiskModel'
mv = mlflow.register_model(model_uri=best_model_uri, name=model_name)

print(f'Best run ID: {best_run_id}')
print(f'Model "{model_name}" registered with version {mv.version}')

Successfully registered model 'CreditRiskModel'.
Created version '1' of model 'CreditRiskModel'.


Best run ID: 3174b68102f946fda1c6a414ebb15c0b
Model "CreditRiskModel" registered with version 1


In [7]:
# --- Evaluate the Best Model from the Registry ---

# Load the latest version of the registered model
model_name = 'CreditRiskModel'
latest_version = mlflow.search_model_versions(filter_string=f"name='{model_name}'")[0].version
loaded_model = mlflow.sklearn.load_model(f'models:/{model_name}/{latest_version}')

# Make predictions on the test set
predictions = loaded_model.predict(X_test)
probs = loaded_model.predict_proba(X_test)[:, 1]

# Evaluate the model's performance
accuracy = accuracy_score(y_test, predictions)
precision = precision_score(y_test, predictions)
recall = recall_score(y_test, predictions)
f1 = f1_score(y_test, predictions)
roc_auc = roc_auc_score(y_test, probs)

print(f'Best Model (Version {latest_version}) - Test Set Performance:')
print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1-Score: {f1:.4f}')
print(f'ROC AUC: {roc_auc:.4f}')

Best Model (Version 1) - Test Set Performance:
Accuracy: 0.9933
Precision: 0.9930
Recall: 0.9895
F1-Score: 0.9912
ROC AUC: 0.9999
