In [13]:

import os

import numpy as np
import pandas as pd
import joblib
import cloudpickle
import json

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.metrics import (
    accuracy_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
    precision_recall_fscore_support,
    confusion_matrix,
    roc_curve,
    RocCurveDisplay,
    precision_recall_curve,
    PrecisionRecallDisplay,
)
from scipy.stats import ks_2samp

In [None]:

# ---------------- CONFIG ----------------
saved_folder = 'classification_package'

MODEL_PATH = os.path.join(saved_folder,"model.pkl")
PREPROCESSOR_PATH = os.path.join(saved_folder,"preprocessor.pkl")
REFERENCE_DATA_PATH = os.path.join(saved_folder,"X_rest.pkl")  
REFERENCE_TARGET_PATH = os.path.join(saved_folder,"Y_rest.pkl")  
UNSEEN_DATA_PATH = os.path.join(saved_folder,"X_test.pkl")
UNSEEN_TARGET_PATH = os.path.join(saved_folder,"Y_test.pkl")


# Load
with open(PREPROCESSOR_PATH, 'rb') as f:
    preprocessor = cloudpickle.load(f)
model = joblib.load(MODEL_PATH)

print("Loading unseen data...")
X_test = joblib.load(UNSEEN_DATA_PATH)
Y_test = joblib.load(UNSEEN_TARGET_PATH)

In [None]:
# ---------------- PREPROCESS ----------------

print("Applying preprocessing...")
X_test_transformed = preprocessor.transform(X_test)


# ---------------- PREDICT ----------------

print("Generating predictions...")
y_pred = model.predict(X_test_transformed)
y_prob = model.predict_proba(X_test_transformed)[:, 1]  # for ROC/PR

Applying preprocessing...
Generating predictions...
Basic metrics:
Accuracy: 0.43636363636363634




ValueError: multi_class must be in ('ovo', 'ovr')

In [None]:
print("Accuracy:", accuracy_score(Y_test, y_pred))
print("ROC AUC:", roc_auc_score(Y_test, y_prob))
print("PR AUC (Average Precision):", average_precision_score(Y_test, y_prob))

prec, rec, f1, _ = precision_recall_fscore_support(Y_test, y_pred, average="binary", zero_division=0)
print(f"Precision: {prec:.4f} | Recall: {rec:.4f} | F1: {f1:.4f}")

print("Confusion matrix:")
print(confusion_matrix(Y_test, y_pred))

Accuracy: 0.43636363636363634


ValueError: multi_class must be in ('ovo', 'ovr')

In [None]:
# Curves
fig, ax = plt.subplots(1, 2, figsize=(12, 5))
RocCurveDisplay.from_predictions(Y_test, y_prob, name="ROC", ax=ax[0])
ax[0].set_title("ROC Curve")
PrecisionRecallDisplay.from_predictions(Y_test, y_prob, name="PR", ax=ax[1])
ax[1].set_title("Precision-Recall Curve")
plt.tight_layout()
plt.show()

In [None]:

print("Checking drift...")
reference_df = pd.read_csv(REFERENCE_DATA_PATH)
psi_results = {}
ks_results = {}

for col in X_test.columns:
    ref = reference_df[col]
    new = X_unseen[col]
    # PSI (simplified)
    bins = pd.qcut(ref, q=10, duplicates="drop")
    ref_dist = bins.value_counts(normalize=True)
    new_bins = pd.cut(new, bins=bins.cat.categories)
    new_dist = new_bins.value_counts(normalize=True)
    psi = sum((new_dist - ref_dist) * np.log(new_dist / ref_dist))
    psi_results[col] = psi
    # KS
    ks_stat, ks_p = ks_2samp(ref, new)
    ks_results[col] = {"ks_stat": ks_stat, "p_value": ks_p}
