In [1]:
import joblib
import pandas as pd

# Load trained model
model = joblib.load("../models/logistic_credit_model.pkl")

# Load processed data
df = pd.read_csv("../data/raw/credit_data.csv")

X = df.drop("default", axis=1)
y = df["default"]


In [2]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=42
)


In [3]:
y_pred_prob = model.predict_proba(X_test)[:, 1]


In [4]:
def credit_decision(prob):
    if prob < 0.30:
        return "Low Risk - Approve"
    elif prob < 0.60:
        return "Medium Risk - Manual Review"
    else:
        return "High Risk - Reject"


In [5]:
results = X_test.copy()
results["actual_default"] = y_test.values
results["default_probability"] = y_pred_prob
results["decision"] = results["default_probability"].apply(credit_decision)

results.head()


Unnamed: 0,ID,LIMIT_BAL,SEX,EDUCATION,MARRIAGE,AGE,PAY_0,PAY_2,PAY_3,PAY_4,...,BILL_AMT6,PAY_AMT1,PAY_AMT2,PAY_AMT3,PAY_AMT4,PAY_AMT5,PAY_AMT6,actual_default,default_probability,decision
6907,6908,50000,1,2,2,46,-1,-1,-1,-1,...,2764,26060,0,3472,2320,1764,2841,0,0.352288,Medium Risk - Manual Review
24575,24576,150000,1,1,1,31,-1,-1,-2,-2,...,11694,0,0,0,0,11694,30000,0,0.251452,Low Risk - Approve
26766,26767,50000,1,2,2,25,0,0,0,0,...,50702,1800,1844,2200,2000,1800,2038,0,0.494108,Medium Risk - Manual Review
2156,2157,290000,2,1,2,25,0,0,0,0,...,230925,15000,10500,10000,15000,7844,23333,1,0.255278,Low Risk - Approve
3179,3180,500000,2,2,1,27,-2,-2,-2,-2,...,10000,9983,13587,10000,10000,10000,25304,0,0.090991,Low Risk - Approve


In [6]:
results["decision"].value_counts(normalize=True) * 100


decision
Medium Risk - Manual Review    58.866667
Low Risk - Approve             21.333333
High Risk - Reject             19.800000
Name: proportion, dtype: float64

In [7]:
approval_rate = (results["decision"] == "Low Risk - Approve").mean()
print(f"Approval Rate: {approval_rate*100:.2f}%")


Approval Rate: 21.33%


In [8]:
rejection_rate = (results["decision"] == "High Risk - Reject").mean()
print(f"Rejection Rate: {rejection_rate*100:.2f}%")


Rejection Rate: 19.80%


In [9]:
from sklearn.metrics import recall_score
import numpy as np

thresholds = np.arange(0.2, 0.8, 0.05)

for t in thresholds:
    preds = (y_pred_prob >= t).astype(int)
    recall = recall_score(y_test, preds)
    print(f"Threshold {t:.2f} → Default Recall: {recall:.2f}")


Threshold 0.20 → Default Recall: 0.95
Threshold 0.25 → Default Recall: 0.92
Threshold 0.30 → Default Recall: 0.88
Threshold 0.35 → Default Recall: 0.81
Threshold 0.40 → Default Recall: 0.75
Threshold 0.45 → Default Recall: 0.69
Threshold 0.50 → Default Recall: 0.61
Threshold 0.55 → Default Recall: 0.52
Threshold 0.60 → Default Recall: 0.43
Threshold 0.65 → Default Recall: 0.38
Threshold 0.70 → Default Recall: 0.32
Threshold 0.75 → Default Recall: 0.25
Threshold 0.80 → Default Recall: 0.19
