In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, roc_auc_score

# Example dataset
data = {
    "credit_score": [700, 650, 600, 550, 750, 800, 680, 620, 690, 710],
    "monthly_income": [5000, 4800, 4500, 4200, 5500, 6000, 4700, 4400, 4600, 5200],
    "account_age_months": [24, 36, 12, 48, 60, 72, 30, 18, 36, 54],
    "credit_limit": [10000, 8000, 6000, 4000, 12000, 15000, 7000, 5000, 6500, 9000],
    "current_balance": [2000, 3000, 4000, 3500, 1000, 500, 2500, 4500, 3200, 2800],
    "payment_history": [1, 1, 0, 0, 1, 1, 1, 0, 1, 1],  # 1: Good, 0: Bad
    "delinquency_30_days": [0, 1, 2, 3, 0, 0, 1, 2, 1, 0],
    "defaulted": [0, 0, 1, 1, 0, 0, 0, 1, 0, 0],  # 1: Defaulted, 0: Not Defaulted
}
df = pd.DataFrame(data)

# Feature engineering
df["utilization_rate"] = df["current_balance"] / df["credit_limit"]

# Select features and target variable
features = [
    "credit_score",
    "monthly_income",
    "account_age_months",
    "credit_limit",
    "utilization_rate",
    "payment_history",
    "delinquency_30_days",
]
target = "defaulted"

X = df[features]
y = df[target]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# Train logistic regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict probabilities of default
y_pred_prob = model.predict_proba(X_test)[:, 1]  # Probability of default

# Convert probabilities to percentages
y_pred_percent = y_pred_prob * 100
print(y_pred_percent)

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
# print(f"ROC-AUC Score: {roc_auc_score(y_test, y_pred_prob):.2f}")

# Add predicted PD as a percentage to the dataframe
df["predicted_PD"] = model.predict_proba(X)[:, 1] * 100
df

[4.72259105e+01 3.34030432e-12 8.37900506e-71]
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         3

    accuracy                           1.00         3
   macro avg       1.00      1.00      1.00         3
weighted avg       1.00      1.00      1.00         3



Unnamed: 0,credit_score,monthly_income,account_age_months,credit_limit,current_balance,payment_history,delinquency_30_days,defaulted,utilization_rate,predicted_PD
0,700,5000,24,10000,2000,1,0,0,0.2,3.643008e-31
1,650,4800,36,8000,3000,1,1,0,0.375,3.340304e-12
2,600,4500,12,6000,4000,0,2,1,0.666667,99.98874
3,550,4200,48,4000,3500,0,3,1,0.875,100.0
4,750,5500,60,12000,1000,1,0,0,0.083333,1.799701e-45
5,800,6000,72,15000,500,1,0,0,0.033333,8.379005e-71
6,680,4700,30,7000,2500,1,1,0,0.357143,0.01000953
7,620,4400,18,5000,4500,0,2,1,0.9,100.0
8,690,4600,36,6500,3200,1,1,0,0.492308,47.22591
9,710,5200,54,9000,2800,1,0,0,0.311111,5.099569e-17
