# ⚠️ Loan Default Prediction with XGBoost

In [10]:

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score


In [11]:

# Load enhanced dataset
df = pd.read_csv("/Users/devanshudixit/Desktop/projects/BankIQ/data/processed/enhanced_customers.csv")

# Drop rows with missing target
df = df.dropna(subset=["HighRiskLoan"])

# Features and target
features = [
    "LoanBurdenScore", "AvgLoanAmount", "AvgEMItoIncomeRatio",
    "CreditScore", "Income", "Age"
]
target = "HighRiskLoan"

X = df[features]
y = df[target]

# Check class balance
print("Class Distribution:")
print(y.value_counts())

# Calculate scale_pos_weight
scale_pos_weight = (y == 0).sum() / (y == 1).sum()
print("scale_pos_weight:", round(scale_pos_weight, 2))

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


Class Distribution:
HighRiskLoan
0.0    900
1.0    100
Name: count, dtype: int64
scale_pos_weight: 9.0


In [12]:

# Train XGBoost with imbalance handling
model = XGBClassifier(scale_pos_weight=scale_pos_weight, use_label_encoder=False, eval_metric="logloss")
model.fit(X_train, y_train)

# Predict
# y_pred = model.predict(X_test)
y_prob = model.predict_proba(X_test)[:, 1]
y_pred = (y_prob > 0.3).astype(int)



Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [13]:

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

print("ROC AUC Score:", roc_auc_score(y_test, y_prob))


Confusion Matrix:
[[169  12]
 [ 15   4]]

Classification Report:
              precision    recall  f1-score   support

         0.0       0.92      0.93      0.93       181
         1.0       0.25      0.21      0.23        19

    accuracy                           0.86       200
   macro avg       0.58      0.57      0.58       200
weighted avg       0.85      0.86      0.86       200

ROC AUC Score: 0.8711834835708054
