In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

In [3]:
# 1) Load and prepare data
df = pd.read_csv('Loan_Data.csv')

In [4]:
# 1a) Feature engineering: debt‐to‐income & payment‐to‐income
df['debt_to_income']    = df['total_debt_outstanding'] / df['income']
df['payment_to_income'] = df['loan_amt_outstanding'] / df['income']

In [5]:
# 1b) Define model inputs
features = [
    'credit_lines_outstanding',
    'debt_to_income',
    'payment_to_income',
    'years_employed',
    'fico_score'
]
X = df[features]
y = df['default']

In [6]:
# ───────────────
# 2) Fit logistic‐regression PD model
# ───────────────
clf = LogisticRegression(
    solver='liblinear',
    tol=1e-5,
    max_iter=10_000,
    random_state=0
).fit(X, y)

In [7]:
# 2a) Coefficients
print("Model coefficients:")
for feat, coef in zip(features, clf.coef_[0]):
    print(f"  {feat:20s}: {coef:.4f}")
print(f"Intercept: {clf.intercept_[0]:.4f}\n")

Model coefficients:
  credit_lines_outstanding: 8.1852
  debt_to_income      : 0.5449
  payment_to_income   : 0.0199
  years_employed      : -2.7763
  fico_score          : -0.0242
Intercept: -0.0916



In [9]:
# 3) Evaluate performance on full sample
y_pred     = clf.predict(X)
y_prob     = clf.predict_proba(X)[:, 1]
accuracy   = (y == y_pred).mean()
fpr, tpr, _= metrics.roc_curve(y, y_prob)
auc_score  = metrics.auc(fpr, tpr)

print(f"Accuracy: {accuracy:.3f}")
print(f"ROC AUC : {auc_score:.3f}\n")

Accuracy: 0.996
ROC AUC : 1.000



In [10]:
# 4) Expected‐loss helper
def predict_expected_loss(
    borrower: dict,
    loan_amount: float,
    recovery_rate: float = 0.10
) -> float:
    """
    borrower: {feature_name: value, ...}
    loan_amount: exposure at default
    recovery_rate: fraction recovered if default
    returns expected loss = PD * EAD * (1 - recovery_rate)
    """
    df_new = pd.DataFrame([borrower])
    pd_prob = clf.predict_proba(df_new[features])[0, 1]
    return pd_prob * loan_amount * (1 - recovery_rate)

In [11]:
# 5) Example usage
sample = {
    'credit_lines_outstanding': df.loc[0, 'credit_lines_outstanding'],
    'debt_to_income'           : df.loc[0, 'debt_to_income'],
    'payment_to_income'        : df.loc[0, 'payment_to_income'],
    'years_employed'           : df.loc[0, 'years_employed'],
    'fico_score'               : df.loc[0, 'fico_score']
}
loan_amt = 15_000  # e.g. $15k
loss_est = predict_expected_loss(sample, loan_amt)

print(f"Sample borrower expected loss on a ${loan_amt:,} loan: ${loss_est:,.2f}")

Sample borrower expected loss on a $15,000 loan: $0.00
