In [1]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.svm import SVC

In [2]:
TEST_SIZE = 0.2
RANDOM_STATE = 42
MLFLOW_TRACKING_URI = 'http://127.0.0.1:5000'
EXPERIMENT_NAME = 'Loan-Defaulters-Models-Vanilla'

MODELS = {
    'RandomForest': {
        'model': RandomForestClassifier,
        'params': {
            'n_estimators': 150,
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE,
            'bootstrap': False
        }
    },
    'LogisticRegression': {
        'model': LogisticRegression,
        'params': {
            'solver': 'liblinear',
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE
        }
    },
    'GradientBoosting': {
        'model': GradientBoostingClassifier,
        'params': {
            'n_estimators': 100,
            'learning_rate': 0.1,
            'max_depth': 3,
            'random_state': RANDOM_STATE
        }
    },
    'XGBoost': {
        'model': XGBClassifier,
        'params': {
            'n_estimators': 100,
            'learning_rate': 0.1,
            'use_label_encoder': False,
            'eval_metric': 'logloss',
            'random_state': RANDOM_STATE
        }
    },
    'SVC': {
        'model': SVC,
        'params': {
            'probability': True,
            'kernel': 'rbf',
            'C': 1.0,
            'class_weight': 'balanced',
            'random_state': RANDOM_STATE
        }
    }
}

In [3]:
mlflow.set_tracking_uri(MLFLOW_TRACKING_URI)
mlflow.set_experiment(EXPERIMENT_NAME)

2025/06/08 13:47:32 INFO mlflow.tracking.fluent: Experiment with name 'Loan-Defaulters-Models-Vanilla' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/937231393866999261', creation_time=1749383252995, experiment_id='937231393866999261', last_update_time=1749383252995, lifecycle_stage='active', name='Loan-Defaulters-Models-Vanilla', tags={}>

In [4]:
data = pd.read_csv('../training_data/balanced_training_data.csv')
y = data['loan_status']
X = data.drop('loan_status', axis = 1)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE)

In [None]:
for model_name, config in MODELS.items():
    print(model_name)
    with mlflow.start_run(run_name=model_name):
        ModelClass = config['model']
        params = config['params']

        model = ModelClass(**params)
        model.fit(X_train, y_train)

        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)[:, 1] if hasattr(model, 'predict_proba') else None

        report_dict = classification_report(y_test, y_pred, output_dict=True)
        
        # Log
        mlflow.log_params(params)
        mlflow.sklearn.log_model(model, artifact_path=f"{model_name}_model")
        mlflow.log_metrics(
            {
                'accuracy': report_dict['accuracy'],
                'recall_class_0': report_dict['0']['recall'],
                'recall_class_1': report_dict['1']['recall'],
                'precision_class_0': report_dict['0']['precision'],
                'precision_class_1': report_dict['1']['precision'],
                'f1_score_macro': report_dict['macro avg']['f1-score'],
            }
        )
        


RandomForest




🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/937231393866999261/runs/8b7d3144dcd240d1922f0180cc355f7d
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/937231393866999261
LogisticRegression




🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/937231393866999261/runs/dce3a5d3a0fc4fa9b989d4937f005e04
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/937231393866999261
GradientBoosting


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


🏃 View run GradientBoosting at: http://127.0.0.1:5000/#/experiments/937231393866999261/runs/8091c620f1de4ee49c4a09cf4a682a2b
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/937231393866999261
XGBoost




🏃 View run XGBoost at: http://127.0.0.1:5000/#/experiments/937231393866999261/runs/c42ae69a080f409691c9d0a920d775b0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/937231393866999261
SVC
