<a href="https://colab.research.google.com/github/gburv25-collab/AI-_class_loan_approval/blob/main/Loan_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
#  Code: Train a PD Model and Make Approval Decisions
# Python (scikit-learn) - Logistic Regression for Credit Risk (PD)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
df = pd.read_csv("/content/loan_applications.csv")
target_col = "default_12m"
categorical = ["purpose", "home_ownership", "channel", "region", "loan_term_months"]
numeric = ["age", "annual_income", "employment_length", "credit_score", "debt_to_income",
          "num_open_accounts", "delinquencies_2y", "inquiries_6m", "loan_amount", "interest_rate"]
X = df[categorical + numeric]
y = df[target_col].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
# Class weights for imbalance
classes = np.array([0,1])
cw = compute_class_weight("balanced", classes=classes, y=y_train)
cw_dict = {cls:w for cls,w in zip(classes, cw)}
preprocess = ColumnTransformer([
   ("num", StandardScaler(), numeric),
   ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
])
lr = LogisticRegression(max_iter=2000, class_weight=cw_dict, solver="lbfgs")
pipe = Pipeline([("prep", preprocess), ("model", lr)])
pipe.fit(X_train, y_train)
# Evaluate
y_proba = pipe.predict_proba(X_test)[:,1]
print("ROC-AUC:", roc_auc_score(y_test, y_proba))
print("PR-AUC :", average_precision_score(y_test, y_proba))
# Policy: approve if PD < 0.05
pd_cutoff = 0.05
y_pred_policy = (y_proba >= pd_cutoff).astype(int)  # 1 = predict default (reject)
print("Confusion Matrix (reject=1 at cutoff 0.05):\n", confusion_matrix(y_test, y_pred_policy))
print(classification_report(y_test, y_pred_policy, digits=3))

ROC-AUC: 0.6527425783531493
PR-AUC : 0.30859425653347505
Confusion Matrix (reject=1 at cutoff 0.05):
 [[   1 2016]
 [   0  483]]
              precision    recall  f1-score   support

           0      1.000     0.000     0.001      2017
           1      0.193     1.000     0.324       483

    accuracy                          0.194      2500
   macro avg      0.597     0.500     0.162      2500
weighted avg      0.844     0.194     0.063      2500



In [17]:
#  Code: Train a PD Model and Make Approval Decisions
# Python (scikit-learn) - Logistic Regression for Credit Risk (PD)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
df = pd.read_csv("/content/loan_applications.csv")
target_col = "default_12m"
categorical = ["purpose", "home_ownership", "channel", "region", "loan_term_months"]
numeric = ["age", "annual_income", "employment_length", "credit_score", "debt_to_income",
          "num_open_accounts", "delinquencies_2y", "inquiries_6m", "loan_amount", "interest_rate"]
X = df[categorical + numeric]
y = df[target_col].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
# Class weights for imbalance
classes = np.array([0,1])
cw = compute_class_weight("balanced", classes=classes, y=y_train)
cw_dict = {cls:w for cls,w in zip(classes, cw)}
preprocess = ColumnTransformer([
   ("num", StandardScaler(), numeric),
   ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
])
lr = LogisticRegression(max_iter=2000, class_weight=cw_dict, solver="lbfgs")
pipe = Pipeline([("prep", preprocess), ("model", lr)])
pipe.fit(X_train, y_train)
# Evaluate
y_proba = pipe.predict_proba(X_test)[:,1]
print("ROC-AUC:", roc_auc_score(y_test, y_proba))
print("PR-AUC :", average_precision_score(y_test, y_proba))
# Policy: approve if PD < 0.8
pd_cutoff = 0.8
y_pred_policy = (y_proba >= pd_cutoff).astype(int)  # 1 = predict default (reject)
print("Confusion Matrix (reject=1 at cutoff 0.08):\n", confusion_matrix(y_test, y_pred_policy))
print(classification_report(y_test, y_pred_policy, digits=3))

ROC-AUC: 0.6527425783531493
PR-AUC : 0.30859425653347505
Confusion Matrix (reject=1 at cutoff 0.08):
 [[1998   19]
 [ 465   18]]
              precision    recall  f1-score   support

           0      0.811     0.991     0.892      2017
           1      0.486     0.037     0.069       483

    accuracy                          0.806      2500
   macro avg      0.649     0.514     0.481      2500
weighted avg      0.748     0.806     0.733      2500



In [18]:
#  Code: Train a PD Model and Make Approval Decisions
# Python (scikit-learn) - Logistic Regression for Credit Risk (PD)
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_auc_score, average_precision_score, confusion_matrix, classification_report
from sklearn.utils.class_weight import compute_class_weight
df = pd.read_csv("/content/loan_applications.csv")
target_col = "default_12m"
categorical = ["purpose", "home_ownership", "channel", "region", "loan_term_months"]
numeric = ["age", "annual_income", "employment_length", "credit_score", "debt_to_income",
          "num_open_accounts", "delinquencies_2y", "inquiries_6m", "loan_amount", "interest_rate"]
X = df[categorical + numeric]
y = df[target_col].astype(int)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42, stratify=y)
# Class weights for imbalance
classes = np.array([0,1])
cw = compute_class_weight("balanced", classes=classes, y=y_train)
cw_dict = {cls:w for cls,w in zip(classes, cw)}
preprocess = ColumnTransformer([
   ("num", StandardScaler(), numeric),
   ("cat", OneHotEncoder(handle_unknown="ignore"), categorical),
])
lr = LogisticRegression(max_iter=2000, class_weight=cw_dict, solver="lbfgs")
pipe = Pipeline([("prep", preprocess), ("model", lr)])
pipe.fit(X_train, y_train)
# Evaluate
y_proba = pipe.predict_proba(X_test)[:,1]
print("ROC-AUC:", roc_auc_score(y_test, y_proba))
print("PR-AUC :", average_precision_score(y_test, y_proba))
# Policy: approve if PD < 0.12
pd_cutoff = 0.12
y_pred_policy = (y_proba >= pd_cutoff).astype(int)  # 1 = predict default (reject)
print("Confusion Matrix (reject=1 at cutoff 0.12):\n", confusion_matrix(y_test, y_pred_policy))
print(classification_report(y_test, y_pred_policy, digits=3))

ROC-AUC: 0.6527425783531493
PR-AUC : 0.30859425653347505
Confusion Matrix (reject=1 at cutoff 0.12):
 [[  18 1999]
 [   3  480]]
              precision    recall  f1-score   support

           0      0.857     0.009     0.018      2017
           1      0.194     0.994     0.324       483

    accuracy                          0.199      2500
   macro avg      0.525     0.501     0.171      2500
weighted avg      0.729     0.199     0.077      2500

