In [1]:
import joblib
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    roc_curve,
    roc_auc_score,
    precision_recall_curve,
    average_precision_score,
    confusion_matrix,
    classification_report
)

In [2]:
data = fetch_openml(name='credit-g', version=1, as_frame=True)
df = data.frame

X = df.drop('class', axis=1)
y = df['class'].map({'good': 0, 'bad': 1})

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

In [3]:
best_model = joblib.load("best_credit_model.pkl")

FileNotFoundError: [Errno 2] No such file or directory: 'best_credit_model.pkl'

In [None]:
y_probs = best_model.predict_proba(X_test)[:, 1]

In [None]:
fpr, tpr, thresholds = roc_curve(y_test, y_probs)
roc_auc = roc_auc_score(y_test, y_probs)

plt.figure()
plt.plot(fpr, tpr)
plt.xlabel("False Positive Rate")
plt.ylabel("True Positive Rate")
plt.title("ROC Curve")
plt.show()

print("ROC AUC Score:", roc_auc)

In [None]:
precision, recall, pr_thresholds = precision_recall_curve(y_test, y_probs)
pr_auc = average_precision_score(y_test, y_probs)

plt.figure()
plt.plot(recall, precision)
plt.xlabel("Recall")
plt.ylabel("Precision")
plt.title("Precision-Recall Curve")
plt.show()

print("PR AUC Score:", pr_auc)

In [None]:
def business_cost(y_true, y_pred):
    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
    
    cost = (fp * 50000) + (fn * 5000)
    return cost

In [None]:
y_default = (y_probs >= 0.5).astype(int)

default_cost = business_cost(y_test, y_default)

print("Default Threshold Cost:", default_cost)
print(confusion_matrix(y_test, y_default))

In [None]:
thresholds = np.arange(0.1, 0.9, 0.05)
costs = []

for thresh in thresholds:
    y_pred_thresh = (y_probs >= thresh).astype(int)
    cost = business_cost(y_test, y_pred_thresh)
    costs.append(cost)

In [None]:
plt.figure()
plt.plot(thresholds, costs)
plt.xlabel("Threshold")
plt.ylabel("Business Cost")
plt.title("Threshold vs Business Cost")
plt.show()

In [None]:
optimal_index = np.argmin(costs)
optimal_threshold = thresholds[optimal_index]
optimal_cost = costs[optimal_index]

print("Optimal Threshold:", optimal_threshold)
print("Minimum Business Cost:", optimal_cost)

In [None]:
y_optimal = (y_probs >= optimal_threshold).astype(int)

print("Confusion Matrix at Optimal Threshold:")
print(confusion_matrix(y_test, y_optimal))

print("\nClassification Report:")
print(classification_report(y_test, y_optimal))

In [None]:
reduction_percent = ((default_cost - optimal_cost) / default_cost) * 100

print("Business Cost Reduced by: {:.2f}%".format(reduction_percent))