Task 3 of The Forage Quantitative Research Module | JPMC

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score


In [3]:
# Load data
loan_data = pd.read_csv("data/loan_data.csv")

features = [
    "credit_lines_outstanding",
    "loan_amt_outstanding",
    "total_debt_outstanding",
    "income",
    "years_employed",
    "fico_score"
]

X = loan_data[features]
y = loan_data["default"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

In [4]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

logit_model = LogisticRegression(max_iter=1000)
logit_model.fit(X_train_scaled, y_train)

# Performance check
logit_pd_test = logit_model.predict_proba(X_test_scaled)[:, 1]
print("Logistic Regression AUC:", roc_auc_score(y_test, logit_pd_test))


Logistic Regression AUC: 0.9999766973184627


In [5]:
tree_model = DecisionTreeClassifier(
    max_depth=4,
    min_samples_leaf=50,
    random_state=42
)

tree_model.fit(X_train, y_train)

tree_pd_test = tree_model.predict_proba(X_test)[:, 1]
print("Decision Tree AUC:", roc_auc_score(y_test, tree_pd_test))


Decision Tree AUC: 0.9993099399467346


In [6]:
RECOVERY_RATE = 0.10

def expected_loss(
    credit_lines_outstanding,
    loan_amt_outstanding,
    total_debt_outstanding,
    income,
    years_employed,
    fico_score,
    model="logistic"
):
    """
    Returns expected loss for a single loan.
    """

    loan_features = pd.DataFrame([{
        "credit_lines_outstanding": credit_lines_outstanding,
        "loan_amt_outstanding": loan_amt_outstanding,
        "total_debt_outstanding": total_debt_outstanding,
        "income": income,
        "years_employed": years_employed,
        "fico_score": fico_score
    }])

    if model == "logistic":
        loan_scaled = scaler.transform(loan_features)
        pd_estimate = logit_model.predict_proba(loan_scaled)[0, 1]
    else:
        pd_estimate = tree_model.predict_proba(loan_features)[0, 1]

    ead = loan_amt_outstanding
    lgd = 1 - RECOVERY_RATE

    return pd_estimate * ead * lgd


In [7]:
loss = expected_loss(
    credit_lines_outstanding=3,
    loan_amt_outstanding=15000,
    total_debt_outstanding=22000,
    income=55000,
    years_employed=4,
    fico_score=680,
    model="logistic"
)

print(f"Expected Loss: ${loss:,.2f}")


Expected Loss: $13,403.96
