In [1]:

import pandas as pd
import numpy as np
import pickle
import mlflow
import xgboost as xgb
from sklearn.metrics import roc_auc_score
from mlflow.tracking import MlflowClient

# ======== CONFIGURATION ========
x_test_path = "../processed_data/X_test.parquet"
y_test_path = "../processed_data/y_test.txt"
run_id = "2c2f5792316545ed84ddf88b09b072a9"
model_bundle_artifact_path = "xgb_credit_pred.bin"


In [3]:

# ======== EVALUATION FUNCTION ========
def evaluate_model(x_test_path, y_test_path, run_id, model_bundle_artifact_path):
    # Load test data
    print("Loading test data...")
    X_test = pd.read_parquet(x_test_path)
    y_test = np.loadtxt(y_test_path)

    # Set tracking URI
    mlflow.set_tracking_uri("sqlite:///../cred_risk_sqlite_mlflow.db")
    client = MlflowClient()

    # Download and load model bundle
    print("Loading model + vectorizer bundle from MLflow...")
    bundle_path = client.download_artifacts(run_id, model_bundle_artifact_path)
    with open(bundle_path, "rb") as f:
        model_bundle = pickle.load(f)

    model = model_bundle["model"]
    dv = model_bundle["vectorizer"]

    # Transform test data
    cat_cols = ['AGE_GROUP', 'YEARS_EMPLOYED_GROUP', 'PHONE_CHANGE_GROUP']
    num_cols = [
        'REGION_RATING_CLIENT_W_CITY', 'REGION_RATING_CLIENT',
        'EXT_SOURCE_3', 'EXT_SOURCE_2', 'EXT_SOURCE_1', 'FLOORSMAX_AVG'
    ]
    X_test_transformed = dv.transform(X_test[cat_cols + num_cols].to_dict(orient="records"))

    # Predictions (probabilities)
    print("Making predictions...")
    dtest = xgb.DMatrix(X_test_transformed)
    y_pred_proba = model.predict(dtest)

    # AUC metric
    auc = roc_auc_score(y_test, y_pred_proba)
    print(f"Test AUC: {auc:.4f}")

    # Log AUC back to MLflow
    print("Logging test metrics to MLflow...")
    client.log_metric(run_id, "test_auc", auc)

    print("Evaluation complete.")


In [4]:

# ======== RUN EVALUATION ========
evaluate_model(x_test_path, y_test_path, run_id, model_bundle_artifact_path)


Loading test data...
Loading model + vectorizer bundle from MLflow...
Making predictions...
Test AUC: 0.7332
Logging test metrics to MLflow...
Evaluation complete.
