# Credit Default Prediction: The 'Hello World' of Quant Finance

This notebook demonstrates how to build a simple Logistic Regression model to predict loan probability of default (PD).

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import roc_curve, auc, confusion_matrix, classification_report

# 1. Generate Synthetic Data
np.random.seed(42)
n_samples = 1000

# Features: Income (k$), Leverage (x), Liquidity (Ratio)
income = np.random.normal(80, 20, n_samples)
leverage = np.random.normal(3.5, 1.0, n_samples)
liquidity = np.random.normal(1.5, 0.5, n_samples)

# Target: Default (Probability increases with Leverage, decreases with Income/Liquidity)
# Log-odds
z = -2 + 0.8 * leverage - 0.05 * income - 0.5 * liquidity + np.random.normal(0, 1, n_samples)
prob = 1 / (1 + np.exp(-z))
default = (prob > 0.5).astype(int)

df = pd.DataFrame({'Income': income, 'Leverage': leverage, 'Liquidity': liquidity, 'Default': default})
print("Dataset Head:")
print(df.head())

## 2. Train the Model

In [None]:
X = df[['Income', 'Leverage', 'Liquidity']]
y = df['Default']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

model = LogisticRegression()
model.fit(X_train, y_train)

print("Model Coefficients:", model.coef_)
print("Intercept:", model.intercept_)

## 3. Evaluate Performance (ROC Curve)

In [None]:
y_pred_prob = model.predict_proba(X_test)[:, 1]
fpr, tpr, thresholds = roc_curve(y_test, y_pred_prob)
roc_auc = auc(fpr, tpr)

plt.figure(figsize=(8, 6))
plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver Operating Characteristic (ROC)')
plt.legend(loc="lower right")
plt.show()