In [43]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    confusion_matrix
)
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC

In [44]:
TEST_SIZE = 0.2
RANDOM_STATE = 42
MLFLOW_TRACKING_URI = 'http://127.0.0.1:5000'
EXPERIMENT_NAME = 'Loan-Defaulters-Models'

MODELS = {
    'RandomForest': {
        'model': RandomForestClassifier,
        'params': {
            'n_estimators': 150,
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE,
            'bootstrap': False
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression,
        'params': {
            'solver': 'liblinear',
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE
        }
    },
    'GradientBoosting': {
        'model': GradientBoostingClassifier,
        'params': {
            'n_estimators': 100,
            'learning_rate': 0.1,
            'max_depth': 3,
            'random_state': RANDOM_STATE
        }
    },
    'XGBoost': {
        'model': XGBClassifier,
        'params': {
            'n_estimators': 100,
            'learning_rate': 0.1,
            'use_label_encoder': False,
            'eval_metric': 'logloss',
            'random_state': RANDOM_STATE
        }
    },
    'SVC': {
        'model': SVC,
        'params': {
            'probability': True,
            'kernel': 'rbf',
            'C': 1.0,
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE
        }
    }
}

In [45]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)

<Experiment: artifact_location='mlflow-artifacts:/213950789839913881', creation_time=1749320142393, experiment_id='213950789839913881', last_update_time=1749320142393, lifecycle_stage='active', name='Loan-Defaulters-Models', tags={}>

In [46]:
data = pd.read_csv('../training_data/balanced_training_data.csv')
y = data['loan_status']
X = data.drop('loan_status', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE)

In [None]:
for model_name, config in MODELS.items():
    with mlflow.start_run(run_name=model_name):
        ModelClass = config['model']
        params = config['params']

        model = ModelClass(**params)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

        # Metrics
        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred)
        rec = recall_score(y_test, y_pred)
        f1 = f1_score(y_test, y_pred)
        roc_auc = roc_auc_score(y_test, y_proba) if y_proba is not None else None

        tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()
        specificity = tn / (tn + fp)

        # Log
        mlflow.log_params(params)
        metrics = {
            'accuracy': acc,
            'precision': prec,
            'recall': rec,
            'specificity': specificity,
            'f1_score': f1,
            'true_positives': tp,
            'false_positives': fp,
            'true_negatives': tn,
            'false_negatives': fn
        }
        if roc_auc is not None:
            metrics['roc_auc'] = roc_auc
        mlflow.log_metrics(metrics)

        mlflow.sklearn.log_model(model, artifact_path=f"{model_name}_model")



🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/213950789839913881/runs/bd90d4ad6c434bd48738aa05261f4d49
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/213950789839913881




🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/213950789839913881/runs/a7cad9ff3e764e9b9931776a12a1c8e1
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/213950789839913881


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run GradientBoosting at: http://127.0.0.1:5000/#/experiments/213950789839913881/runs/8fdde5e7ddd24876a9a90251aa0f3063
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/213950789839913881




🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/213950789839913881/runs/e1843085fde94008856a4ae6fcd46f01
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/213950789839913881
