In [73]:
!python --version

Python 3.8.10


IMPORT LIBRARIES

In [74]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

from sklearn.feature_extraction import DictVectorizer
from sklearn.linear_model import LogisticRegression

from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score

import xgboost as xgb
import pickle

STANDARDIZATION

In [75]:
X_train = pd.read_parquet('../processed_data/X_train.parquet')
X_val = pd.read_parquet('../processed_data/X_val.parquet')

y_train = np.loadtxt('../processed_data/y_train.txt').astype(int)
y_val = np.loadtxt('../processed_data/y_val.txt').astype(int)

In [77]:
# Seperation of columns into numeric and categorical columns
num_cols = np.array(X_train.select_dtypes(include= ['int64','float64']).columns).tolist()
cat_cols = np.array(X_train.select_dtypes(include= ['category','object']).columns).tolist()

In [78]:
print(cat_cols)
print(num_cols)
print(set(cat_cols + num_cols) - set(X_train.columns))  # Should be empty

['AGE_GROUP', 'YEARS_EMPLOYED_GROUP', 'PHONE_CHANGE_GROUP']
['REGION_RATING_CLIENT_W_CITY', 'REGION_RATING_CLIENT', 'EXT_SOURCE_3', 'EXT_SOURCE_2', 'EXT_SOURCE_1', 'FLOORSMAX_AVG']
set()


In [79]:
train_dicts = X_train[cat_cols + num_cols].to_dict(orient='records')
val_dicts = X_val[cat_cols + num_cols].to_dict(orient='records')

In [80]:
dv = DictVectorizer()
X_train = dv.fit_transform(train_dicts)
X_val = dv.transform(val_dicts)

EXPERIMENT TRACKING

In [81]:
import mlflow

mlflow.set_tracking_uri('sqlite:///../cred_risk_sqlite_mlflow.db')
mlflow.set_experiment('credit_default_risk_experiment_tracking')

<Experiment: artifact_location='/Users/mac/Projects/MLops_credit_default_risk_prediction/02-experiment-tracking/mlruns/1', creation_time=1754811864406, experiment_id='1', last_update_time=1754811864406, lifecycle_stage='active', name='credit_default_risk_experiment_tracking', tags={}>

In [82]:

with mlflow.start_run():
    mlflow.set_tag("engineer", "adeakinwe")
    mlflow.set_tag("model", "Logistic Regression")

    mlflow.log_param("train_data_path", "../processed_data/X_train.parquet")
    mlflow.log_param("val_data_path", "../processed_data/X_val.parquet")

    lr = LogisticRegression(class_weight='balanced')
    lr.fit(X_train, y_train)

    y_pred = lr.predict(X_val)
    y_proba = lr.predict_proba(X_val)[:, 1]

    accuracy = round(accuracy_score(y_val, y_pred), 3)
    auc = round(roc_auc_score(y_val, y_proba), 3)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("auc", auc)

In [83]:
with mlflow.start_run():
    mlflow.set_tag("engineer", "adeakinwe")
    mlflow.set_tag("model", "XGBoost")

    mlflow.log_param("train_data_path", "../processed_data/X_train.parquet")
    mlflow.log_param("val_data_path", "../processed_data/X_val.parquet")

    # Prepare DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    # Compute scale_pos_weight if needed
    class_counts = pd.Series(y_train).value_counts()
    scale_pos_weight = class_counts[0] / class_counts[1]

    # XGBoost params
    params = {
        "objective": "binary:logistic",
        "eval_metric": "auc",
        "scale_pos_weight": scale_pos_weight,
    }

    mlflow.log_param("scale_pos_weight", scale_pos_weight)

    # Train with early stopping
    model = xgb.train(
        params=params,
        dtrain=dtrain,
        num_boost_round=100,
        evals=[(dtrain, "train"), (dval, "eval")],
        early_stopping_rounds=50,
        verbose_eval=False
    )

    # Predict and evaluate
    y_pred_proba = model.predict(dval)
    y_pred = (y_pred_proba > 0.5).astype(int)

    accuracy = round(accuracy_score(y_val, y_pred), 3)
    auc = round(roc_auc_score(y_val, y_pred_proba), 3)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("auc", auc)

HYPER-PARAMETER TUNING

In [84]:
from hyperopt import fmin, tpe, hp, Trials, STATUS_OK
from hyperopt.pyll import scope
import mlflow.xgboost

In [85]:
# Search space for hyperopt
search_space = {
    'max_depth': scope.int(hp.quniform('max_depth', 4, 100, 1)),
    'learning_rate': hp.loguniform('learning_rate', -3, 0),
    'reg_alpha': hp.loguniform('reg_alpha', -5, -1),
    'reg_lambda': hp.loguniform('reg_lambda', -6, -1),
    'min_child_weight': hp.loguniform('min_child_weight', -1, 3),
    'subsample': hp.uniform('subsample', 0.5, 1.0),
    'colsample_bytree': hp.uniform('colsample_bytree', 0.5, 1.0),
}

# Objective function for hyperopt
def objective(params):
    # Calculate imbalance ratio
    neg, pos = np.bincount(y_train)
    scale_pos_weight = neg / pos

    # Add required static params
    params['objective'] = 'binary:logistic'
    params['seed'] = 42
    params['eval_metric'] = 'auc'
    params['scale_pos_weight'] = scale_pos_weight

    run_name = f"xgb-md{params['max_depth']}-lr{params['learning_rate']:.3f}"

    with mlflow.start_run(nested=True, run_name=run_name):
        mlflow.set_tag("model", "XGBoost")
        mlflow.set_tag("engineer", "adeakinwe")

        mlflow.log_param("train_data", "../processed_data/X_train.parquet")
        mlflow.log_param("val_data", "../processed_data/X_val.parquet")

        mlflow.log_params({k: round(v, 5) if isinstance(v, float) else v for k, v in params.items()})

        dtrain = xgb.DMatrix(X_train, label=y_train)
        dval = xgb.DMatrix(X_val, label=y_val)

        model = xgb.train(
            params,
            dtrain,
            num_boost_round=200,
            evals=[(dval, 'eval')],
            early_stopping_rounds=50,
            verbose_eval=10
        )

        y_pred_proba = model.predict(dval)
        y_pred = (y_pred_proba > 0.5).astype(int)

        accuracy = round(accuracy_score(y_val, y_pred), 3)
        auc = round(roc_auc_score(y_val, y_pred_proba), 3)

        mlflow.log_metric("accuracy", accuracy)
        mlflow.log_metric("auc", auc)

        mlflow.xgboost.log_model(model, artifact_path="models")

        return {'loss': -auc, 'status': STATUS_OK}

# Run outer MLflow parent run
with mlflow.start_run(run_name="xgboost-hyperopt"):
    trials = Trials()
    best_result = fmin(
        fn=objective,
        space=search_space,
        algo=tpe.suggest,
        max_evals=50,
        trials=trials
    )

    # Log best hyperparameters found
    mlflow.log_params({f"best_{k}": v for k, v in best_result.items()})

[0]	eval-auc:0.61075                                  
[10]	eval-auc:0.66212                                 
[20]	eval-auc:0.66643                                 
[30]	eval-auc:0.66567                                 
[40]	eval-auc:0.66182                                 
[50]	eval-auc:0.65924                                 
[60]	eval-auc:0.65760                                 
[67]	eval-auc:0.65682                                 
  0%|          | 0/50 [00:15<?, ?trial/s, best loss=?]




[0]	eval-auc:0.60377                                                
[10]	eval-auc:0.64307                                               
[20]	eval-auc:0.65915                                               
[30]	eval-auc:0.66392                                               
[40]	eval-auc:0.66588                                               
[50]	eval-auc:0.66664                                               
[60]	eval-auc:0.66559                                               
[70]	eval-auc:0.66545                                               
[80]	eval-auc:0.66469                                               
[90]	eval-auc:0.66397                                               
[100]	eval-auc:0.66226                                              
[101]	eval-auc:0.66225                                              
  2%|▏         | 1/50 [00:45<18:57, 23.22s/trial, best loss: -0.657]




[0]	eval-auc:0.61617                                                
[10]	eval-auc:0.68345                                               
[20]	eval-auc:0.68966                                               
[30]	eval-auc:0.68560                                               
[40]	eval-auc:0.67814                                               
[50]	eval-auc:0.67476                                               
[60]	eval-auc:0.67421                                               
[66]	eval-auc:0.67189                                               
  4%|▍         | 2/50 [00:59<21:17, 26.61s/trial, best loss: -0.662]




[0]	eval-auc:0.61149                                                
[10]	eval-auc:0.64766                                               
[20]	eval-auc:0.65107                                               
[30]	eval-auc:0.65057                                               
[40]	eval-auc:0.64753                                               
[50]	eval-auc:0.64580                                               
[60]	eval-auc:0.64398                                               
[70]	eval-auc:0.64241                                               
[74]	eval-auc:0.64211                                               
  6%|▌         | 3/50 [01:15<15:28, 19.75s/trial, best loss: -0.672]




[0]	eval-auc:0.62561                                                
[10]	eval-auc:0.69622                                               
[20]	eval-auc:0.70061                                               
[30]	eval-auc:0.69932                                               
[40]	eval-auc:0.69576                                               
[50]	eval-auc:0.69286                                               
[60]	eval-auc:0.69125                                               
[70]	eval-auc:0.68834                                               
[72]	eval-auc:0.68802                                               
  8%|▊         | 4/50 [01:34<14:41, 19.15s/trial, best loss: -0.672]




[0]	eval-auc:0.66498                                                
[10]	eval-auc:0.64594                                               
[20]	eval-auc:0.64085                                               
[30]	eval-auc:0.63957                                               
[40]	eval-auc:0.63791                                               
[49]	eval-auc:0.63610                                               
 10%|█         | 5/50 [01:44<14:08, 18.86s/trial, best loss: -0.688]




[0]	eval-auc:0.61196                                                
[10]	eval-auc:0.63177                                               
[20]	eval-auc:0.63727                                               
[30]	eval-auc:0.63285                                               
[40]	eval-auc:0.63242                                               
[50]	eval-auc:0.63016                                               
[54]	eval-auc:0.63047                                               
 12%|█▏        | 6/50 [01:55<11:04, 15.11s/trial, best loss: -0.688]




[0]	eval-auc:0.61013                                                
[10]	eval-auc:0.61171                                               
[20]	eval-auc:0.62103                                               
[30]	eval-auc:0.61897                                               
[40]	eval-auc:0.61932                                               
[50]	eval-auc:0.61845                                               
[60]	eval-auc:0.62003                                               
[70]	eval-auc:0.62159                                               
 14%|█▍        | 7/50 [02:11<09:53, 13.80s/trial, best loss: -0.688]




[0]	eval-auc:0.62131                                                
[10]	eval-auc:0.69189                                               
[20]	eval-auc:0.69566                                               
[30]	eval-auc:0.69340                                               
[40]	eval-auc:0.68857                                               
[50]	eval-auc:0.68581                                               
[60]	eval-auc:0.68441                                               
[65]	eval-auc:0.68312                                               
 16%|█▌        | 8/50 [02:32<10:43, 15.32s/trial, best loss: -0.688]




[0]	eval-auc:0.62172                                                
[10]	eval-auc:0.68367                                               
[20]	eval-auc:0.69071                                               
[30]	eval-auc:0.68877                                               
[40]	eval-auc:0.68576                                               
[50]	eval-auc:0.68247                                               
[60]	eval-auc:0.67782                                               
[69]	eval-auc:0.67534                                               
 18%|█▊        | 9/50 [02:45<11:26, 16.74s/trial, best loss: -0.688]




[0]	eval-auc:0.61140                                                 
[10]	eval-auc:0.62642                                                
[20]	eval-auc:0.62581                                                
[30]	eval-auc:0.62477                                                
[40]	eval-auc:0.62268                                                
[50]	eval-auc:0.62291                                                
[60]	eval-auc:0.62234                                                
 20%|██        | 10/50 [03:03<10:22, 15.57s/trial, best loss: -0.688]




[0]	eval-auc:0.66077                                                 
[10]	eval-auc:0.71332                                                
[20]	eval-auc:0.72018                                                
[30]	eval-auc:0.72012                                                
[40]	eval-auc:0.71830                                                
[50]	eval-auc:0.71638                                                
[60]	eval-auc:0.71535                                                
[70]	eval-auc:0.71373                                                
[78]	eval-auc:0.71248                                                
 22%|██▏       | 11/50 [03:12<10:26, 16.06s/trial, best loss: -0.688]




[0]	eval-auc:0.63111                                                 
[10]	eval-auc:0.66859                                                
[20]	eval-auc:0.66411                                                
[30]	eval-auc:0.65709                                                
[40]	eval-auc:0.65427                                                
[50]	eval-auc:0.65241                                                
[54]	eval-auc:0.65280                                                
 24%|██▍       | 12/50 [03:21<08:31, 13.46s/trial, best loss: -0.712]




[0]	eval-auc:0.60179                                                 
[10]	eval-auc:0.63845                                                
[20]	eval-auc:0.65352                                                
[30]	eval-auc:0.66173                                                
[40]	eval-auc:0.66508                                                
[50]	eval-auc:0.66663                                                
[60]	eval-auc:0.66783                                                
[70]	eval-auc:0.66817                                                
[80]	eval-auc:0.66750                                                
[90]	eval-auc:0.66554                                                
[100]	eval-auc:0.66498                                               
[110]	eval-auc:0.66396                                               
[120]	eval-auc:0.66239                                               
[122]	eval-auc:0.66246                                               
 26%|██▌       | 13/




[0]	eval-auc:0.62405                                                 
[10]	eval-auc:0.68876                                                
[20]	eval-auc:0.68242                                                
[30]	eval-auc:0.67836                                                
[40]	eval-auc:0.67368                                                
[50]	eval-auc:0.67039                                                
[59]	eval-auc:0.66861                                                
 28%|██▊       | 14/50 [04:17<12:26, 20.75s/trial, best loss: -0.712]




[0]	eval-auc:0.60713                                                 
[10]	eval-auc:0.67008                                                
[20]	eval-auc:0.68185                                                
[30]	eval-auc:0.68612                                                
[40]	eval-auc:0.68714                                                
[50]	eval-auc:0.68322                                                
[60]	eval-auc:0.67974                                                
[70]	eval-auc:0.67858                                                
[80]	eval-auc:0.67757                                                
[87]	eval-auc:0.67726                                                
 30%|███       | 15/50 [04:36<11:32, 19.78s/trial, best loss: -0.712]




[0]	eval-auc:0.61349                                                 
[10]	eval-auc:0.67704                                                
[20]	eval-auc:0.67702                                                
[30]	eval-auc:0.67226                                                
[40]	eval-auc:0.66848                                                
[50]	eval-auc:0.66416                                                
[58]	eval-auc:0.66317                                                
 32%|███▏      | 16/50 [04:53<11:11, 19.75s/trial, best loss: -0.712]




[0]	eval-auc:0.62864                                                 
[10]	eval-auc:0.69818                                                
[20]	eval-auc:0.70745                                                
[30]	eval-auc:0.70991                                                
[40]	eval-auc:0.70553                                                
[50]	eval-auc:0.70270                                                
[60]	eval-auc:0.70004                                                
[70]	eval-auc:0.69780                                                
[79]	eval-auc:0.69500                                                
 34%|███▍      | 17/50 [05:06<10:03, 18.28s/trial, best loss: -0.712]




[0]	eval-auc:0.66608                                                 
[10]	eval-auc:0.71111                                                
[20]	eval-auc:0.70822                                                
[30]	eval-auc:0.70109                                                
[40]	eval-auc:0.69657                                                
[50]	eval-auc:0.69375                                                
[54]	eval-auc:0.69236                                                
 36%|███▌      | 18/50 [05:13<08:56, 16.77s/trial, best loss: -0.712]




[0]	eval-auc:0.65536                                                 
[10]	eval-auc:0.67350                                                
[20]	eval-auc:0.66664                                                
[30]	eval-auc:0.65935                                                
[40]	eval-auc:0.65559                                                
[50]	eval-auc:0.65394                                                
[56]	eval-auc:0.65351                                                
 38%|███▊      | 19/50 [05:21<06:54, 13.38s/trial, best loss: -0.712]




[0]	eval-auc:0.66794                                                 
[10]	eval-auc:0.71933                                                
[20]	eval-auc:0.72736                                                
[30]	eval-auc:0.72803                                                
[40]	eval-auc:0.72917                                                
[50]	eval-auc:0.73005                                                
[60]	eval-auc:0.73078                                                
[70]	eval-auc:0.73172                                                
[80]	eval-auc:0.73212                                                
[90]	eval-auc:0.73258                                                
[100]	eval-auc:0.73294                                               
[110]	eval-auc:0.73333                                               
[120]	eval-auc:0.73350                                               
[130]	eval-auc:0.73365                                               
[140]	eval-auc:0.733




[0]	eval-auc:0.67521                                                 
[10]	eval-auc:0.71966                                                
[20]	eval-auc:0.72812                                                
[30]	eval-auc:0.72910                                                
[40]	eval-auc:0.72907                                                
[50]	eval-auc:0.72995                                                
[60]	eval-auc:0.73096                                                
[70]	eval-auc:0.73145                                                
[80]	eval-auc:0.73189                                                
[90]	eval-auc:0.73204                                                
[100]	eval-auc:0.73252                                               
[110]	eval-auc:0.73289                                               
[120]	eval-auc:0.73301                                               
[130]	eval-auc:0.73324                                               
[140]	eval-auc:0.733




[0]	eval-auc:0.66813                                                 
[10]	eval-auc:0.71752                                                
[20]	eval-auc:0.72650                                                
[30]	eval-auc:0.72684                                                
[40]	eval-auc:0.72700                                                
[50]	eval-auc:0.72808                                                
[60]	eval-auc:0.72922                                                
[70]	eval-auc:0.72977                                                
[80]	eval-auc:0.73030                                                
[90]	eval-auc:0.73070                                                
[100]	eval-auc:0.73106                                               
[110]	eval-auc:0.73158                                               
[120]	eval-auc:0.73195                                               
[130]	eval-auc:0.73226                                               
[140]	eval-auc:0.732




[0]	eval-auc:0.67560                                                 
[10]	eval-auc:0.72376                                                
[20]	eval-auc:0.73006                                                
[30]	eval-auc:0.73077                                                
[40]	eval-auc:0.73225                                                
[50]	eval-auc:0.73252                                                
[60]	eval-auc:0.73239                                                
[70]	eval-auc:0.73244                                                
[80]	eval-auc:0.73253                                                
[90]	eval-auc:0.73226                                                
[100]	eval-auc:0.73217                                               
[110]	eval-auc:0.73213                                               
[114]	eval-auc:0.73219                                               
 46%|████▌     | 23/50 [06:56<10:19, 22.94s/trial, best loss: -0.734]




[0]	eval-auc:0.61092                                                 
[10]	eval-auc:0.68293                                                
[20]	eval-auc:0.69572                                                
[30]	eval-auc:0.69700                                                
[40]	eval-auc:0.69651                                                
[50]	eval-auc:0.69508                                                
[60]	eval-auc:0.69435                                                
[70]	eval-auc:0.69245                                                
[79]	eval-auc:0.69074                                                
 48%|████▊     | 24/50 [07:36<09:23, 21.68s/trial, best loss: -0.734]




[0]	eval-auc:0.63480                                                 
[10]	eval-auc:0.69502                                                
[20]	eval-auc:0.70137                                                
[30]	eval-auc:0.69725                                                
[40]	eval-auc:0.69392                                                
[50]	eval-auc:0.69066                                                
[60]	eval-auc:0.68793                                                
[66]	eval-auc:0.68617                                                
 50%|█████     | 25/50 [08:11<11:54, 28.57s/trial, best loss: -0.734]




[0]	eval-auc:0.67609                                                 
[10]	eval-auc:0.72341                                                
[20]	eval-auc:0.72901                                                
[30]	eval-auc:0.72987                                                
[40]	eval-auc:0.73020                                                
[50]	eval-auc:0.73097                                                
[60]	eval-auc:0.73217                                                
[70]	eval-auc:0.73265                                                
[80]	eval-auc:0.73286                                                
[90]	eval-auc:0.73289                                                
[100]	eval-auc:0.73304                                               
[110]	eval-auc:0.73334                                               
[120]	eval-auc:0.73344                                               
[130]	eval-auc:0.73350                                               
[140]	eval-auc:0.733




[0]	eval-auc:0.62085                                                 
[10]	eval-auc:0.68257                                                
[20]	eval-auc:0.69253                                                
[30]	eval-auc:0.69360                                                
[40]	eval-auc:0.69153                                                
[50]	eval-auc:0.69043                                                
[60]	eval-auc:0.68997                                                
[65]	eval-auc:0.68842                                                
 54%|█████▍    | 27/50 [09:16<11:30, 30.01s/trial, best loss: -0.734]




[0]	eval-auc:0.63879                                                 
[10]	eval-auc:0.69498                                                
[20]	eval-auc:0.69101                                                
[30]	eval-auc:0.68523                                                
[40]	eval-auc:0.68108                                                
[50]	eval-auc:0.67654                                                
[58]	eval-auc:0.67420                                                
 56%|█████▌    | 28/50 [10:02<12:29, 34.05s/trial, best loss: -0.734]




[0]	eval-auc:0.65392                                                 
[10]	eval-auc:0.70700                                                
[20]	eval-auc:0.71123                                                
[30]	eval-auc:0.70857                                                
[40]	eval-auc:0.70524                                                
[50]	eval-auc:0.70300                                                
[60]	eval-auc:0.70111                                                
[67]	eval-auc:0.69963                                                
 58%|█████▊    | 29/50 [10:24<11:57, 34.16s/trial, best loss: -0.734]




[0]	eval-auc:0.62204                                                 
[10]	eval-auc:0.69469                                                
[20]	eval-auc:0.69967                                                
[30]	eval-auc:0.69798                                                
[40]	eval-auc:0.69365                                                
[50]	eval-auc:0.69099                                                
[60]	eval-auc:0.68940                                                
[64]	eval-auc:0.68738                                                
 60%|██████    | 30/50 [10:50<09:52, 29.63s/trial, best loss: -0.734]




[0]	eval-auc:0.61305                                                 
[10]	eval-auc:0.66220                                                
[20]	eval-auc:0.66837                                                
[30]	eval-auc:0.66856                                                
[40]	eval-auc:0.66556                                                
[50]	eval-auc:0.66448                                                
[60]	eval-auc:0.66428                                                
[64]	eval-auc:0.66229                                                
 62%|██████▏   | 31/50 [11:21<09:17, 29.32s/trial, best loss: -0.734]




[0]	eval-auc:0.61224                                                 
[10]	eval-auc:0.68220                                                
[20]	eval-auc:0.69409                                                
[30]	eval-auc:0.69721                                                
[40]	eval-auc:0.69638                                                
[50]	eval-auc:0.69386                                                
[60]	eval-auc:0.69386                                                
[70]	eval-auc:0.69259                                                
[80]	eval-auc:0.69100                                                
 64%|██████▍   | 32/50 [12:11<09:06, 30.39s/trial, best loss: -0.734]




[0]	eval-auc:0.61079                                                 
[10]	eval-auc:0.66438                                                
[20]	eval-auc:0.67794                                                
[30]	eval-auc:0.67674                                                
[40]	eval-auc:0.67327                                                
[50]	eval-auc:0.66867                                                
[60]	eval-auc:0.66581                                                
[67]	eval-auc:0.66455                                                
 66%|██████▌   | 33/50 [12:52<10:23, 36.67s/trial, best loss: -0.734]




[0]	eval-auc:0.60992                                                 
[10]	eval-auc:0.64167                                                
[20]	eval-auc:0.65629                                                
[30]	eval-auc:0.66035                                                
[40]	eval-auc:0.66335                                                
[50]	eval-auc:0.66285                                                
[60]	eval-auc:0.66218                                                
[70]	eval-auc:0.66092                                                
[80]	eval-auc:0.65927                                                
[90]	eval-auc:0.65916                                                
[96]	eval-auc:0.65795                                                
 68%|██████▊   | 34/50 [13:43<10:00, 37.56s/trial, best loss: -0.734]




[0]	eval-auc:0.62017                                                 
[10]	eval-auc:0.68624                                                
[20]	eval-auc:0.69596                                                
[30]	eval-auc:0.69667                                                
[40]	eval-auc:0.69444                                                
[50]	eval-auc:0.69304                                                
[60]	eval-auc:0.69147                                                
[70]	eval-auc:0.68936                                                
[78]	eval-auc:0.68769                                                
 70%|███████   | 35/50 [14:39<10:38, 42.59s/trial, best loss: -0.734]




[0]	eval-auc:0.61975                                                 
[10]	eval-auc:0.67190                                                
[20]	eval-auc:0.66697                                                
[30]	eval-auc:0.66034                                                
[40]	eval-auc:0.65405                                                
[50]	eval-auc:0.65213                                                
[55]	eval-auc:0.65135                                                
 72%|███████▏  | 36/50 [15:56<10:42, 45.89s/trial, best loss: -0.734]




[0]	eval-auc:0.67918                                                 
[10]	eval-auc:0.72364                                                
[20]	eval-auc:0.72973                                                
[30]	eval-auc:0.72877                                                
[40]	eval-auc:0.72852                                                
[50]	eval-auc:0.72786                                                
[60]	eval-auc:0.72741                                                
[68]	eval-auc:0.72686                                                
 74%|███████▍  | 37/50 [16:12<11:46, 54.35s/trial, best loss: -0.734]




[0]	eval-auc:0.62250                                                 
[10]	eval-auc:0.67908                                                
[20]	eval-auc:0.67697                                                
[30]	eval-auc:0.67240                                                
[40]	eval-auc:0.66904                                                
[50]	eval-auc:0.66616                                                
[58]	eval-auc:0.66422                                                
 76%|███████▌  | 38/50 [16:39<08:24, 42.04s/trial, best loss: -0.734]




[0]	eval-auc:0.63698                                                 
[10]	eval-auc:0.70644                                                
[20]	eval-auc:0.71497                                                
[30]	eval-auc:0.71716                                                
[40]	eval-auc:0.71445                                                
[50]	eval-auc:0.71267                                                
[60]	eval-auc:0.71164                                                
[70]	eval-auc:0.70956                                                
[79]	eval-auc:0.70801                                                
 78%|███████▊  | 39/50 [17:22<06:53, 37.58s/trial, best loss: -0.734]




[0]	eval-auc:0.62762                                                 
[10]	eval-auc:0.69497                                                
[20]	eval-auc:0.70154                                                
[30]	eval-auc:0.69976                                                
[40]	eval-auc:0.69703                                                
[50]	eval-auc:0.69459                                                
[60]	eval-auc:0.69301                                                
[66]	eval-auc:0.69086                                                
 80%|████████  | 40/50 [17:59<07:07, 42.72s/trial, best loss: -0.734]




[0]	eval-auc:0.60807                                                 
[10]	eval-auc:0.63698                                                
[20]	eval-auc:0.64307                                                
[30]	eval-auc:0.64082                                                
[40]	eval-auc:0.64076                                                
[50]	eval-auc:0.63943                                                
[60]	eval-auc:0.63810                                                
[70]	eval-auc:0.63759                                                
[72]	eval-auc:0.63789                                                
 82%|████████▏ | 41/50 [18:37<05:47, 38.59s/trial, best loss: -0.734]




[0]	eval-auc:0.61089                                                 
[10]	eval-auc:0.66303                                                
[20]	eval-auc:0.66801                                                
[30]	eval-auc:0.66640                                                
[40]	eval-auc:0.66383                                                
[50]	eval-auc:0.66194                                                
[60]	eval-auc:0.66075                                                
[67]	eval-auc:0.65826                                                
 84%|████████▍ | 42/50 [20:45<05:13, 39.21s/trial, best loss: -0.734]




[0]	eval-auc:0.61927                                                 
[10]	eval-auc:0.67444                                                
[20]	eval-auc:0.67683                                                
[30]	eval-auc:0.67076                                                
[40]	eval-auc:0.66492                                                
[50]	eval-auc:0.66182                                                
[60]	eval-auc:0.66117                                                
[65]	eval-auc:0.65980                                                
 86%|████████▌ | 43/50 [21:11<07:41, 65.96s/trial, best loss: -0.734]




[0]	eval-auc:0.65306                                                 
[10]	eval-auc:0.71543                                                
[20]	eval-auc:0.71862                                                
[30]	eval-auc:0.71664                                                
[40]	eval-auc:0.71528                                                
[50]	eval-auc:0.71480                                                
[60]	eval-auc:0.71343                                                
[70]	eval-auc:0.71291                                                
 88%|████████▊ | 44/50 [21:27<05:13, 52.20s/trial, best loss: -0.734]




[0]	eval-auc:0.62079                                                 
[10]	eval-auc:0.68970                                                
[20]	eval-auc:0.69731                                                
[30]	eval-auc:0.69801                                                
[40]	eval-auc:0.69491                                                
[50]	eval-auc:0.69254                                                
[60]	eval-auc:0.69078                                                
[66]	eval-auc:0.68895                                                
 90%|█████████ | 45/50 [21:50<03:25, 41.06s/trial, best loss: -0.734]




[0]	eval-auc:0.61387                                                 
[10]	eval-auc:0.62516                                                
[20]	eval-auc:0.62298                                                
[30]	eval-auc:0.62132                                                
[40]	eval-auc:0.62349                                                
[50]	eval-auc:0.62330                                                
[58]	eval-auc:0.62225                                                
 92%|█████████▏| 46/50 [22:10<02:24, 36.24s/trial, best loss: -0.734]




[0]	eval-auc:0.60167                                                 
[10]	eval-auc:0.64483                                                
[20]	eval-auc:0.65690                                                
[30]	eval-auc:0.65999                                                
[40]	eval-auc:0.66015                                                
[50]	eval-auc:0.66076                                                
[60]	eval-auc:0.66061                                                
[70]	eval-auc:0.65894                                                
[80]	eval-auc:0.65749                                                
[90]	eval-auc:0.65856                                                
[100]	eval-auc:0.65852                                               
[101]	eval-auc:0.65863                                               
 94%|█████████▍| 47/50 [23:07<01:31, 30.56s/trial, best loss: -0.734]




[0]	eval-auc:0.64125                                                 
[10]	eval-auc:0.71102                                                
[20]	eval-auc:0.71701                                                
[30]	eval-auc:0.71690                                                
[40]	eval-auc:0.71330                                                
[50]	eval-auc:0.71118                                                
[60]	eval-auc:0.70889                                                
[70]	eval-auc:0.70651                                                
[75]	eval-auc:0.70482                                                
 96%|█████████▌| 48/50 [23:41<01:22, 41.26s/trial, best loss: -0.734]

train and save best model

In [None]:
#best params from hyper parameter tuning
best_params = {
    'max_depth': 4,
    'learning_rate': 0.0566,
    'reg_alpha': 0.04744,
    'reg_lambda': 0.04301,
    'min_child_weight': 1.42993,
    'subsample': 0.65727,
    'colsample_bytree': 0.60704,
    'objective': 'binary:logistic',
    'seed': 42,
    'eval_metric': 'auc',
    'scale_pos_weight': 11.38747
}

with mlflow.start_run(run_name="xgboost-final-auc"):

    mlflow.set_tag("model", "XGBoost")
    mlflow.set_tag("engineer", "adeakinwe")
    mlflow.log_param("train_data_path", "../processed_data/X_train.parquet")
    mlflow.log_param("val_data_path", "../processed_data/X_val.parquet")

    mlflow.log_params(best_params)

    # Prepare DMatrix
    dtrain = xgb.DMatrix(X_train, label=y_train)
    dval = xgb.DMatrix(X_val, label=y_val)

    # Train model
    model = xgb.train(
        best_params,
        dtrain,
        num_boost_round=200,
        evals=[(dval, 'eval')],
        early_stopping_rounds=50,
        verbose_eval=10
    )

    # Evaluate
    y_proba = model.predict(dval)
    auc = round(roc_auc_score(y_val, y_proba), 3)
    mlflow.log_metric("auc", auc)

    # Log model (native)
    mlflow.xgboost.log_model(model, artifact_path="models/xgboost_model")

    # Dump model + vectorizer together
    model_bundle = {
        "model": model,
        "vectorizer": dv  # dict_vectorizer
    }

    bundle_path = "../models/xgb_credit_pred.bin"
    with open(bundle_path, "wb") as f_out:
        pickle.dump(model_bundle, f_out)

    mlflow.log_artifact(bundle_path)

load model with mlflow run id and predict

In [None]:
logged_model = 'runs:/2c2f5792316545ed84ddf88b09b072a9/models/xgboost_model'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)
loaded_model

In [None]:
xgboost_model = mlflow.xgboost.load_model(logged_model)
xgboost_model

In [None]:
y_pred = xgboost_model.predict(dval)

In [None]:
y_pred[:10]