# Credit Risk Model — DFS + MIV Pipeline Walkthrough

Interactive walkthrough of the Revolut DFS + MIV credit scorecard methodology.

This notebook replicates the methodology from *"Enhancing Credit Risk Models at Revolut by Combining Deep Feature Synthesis and Marginal Information Value"* (Spinella & Krisciunas, 2025).

In [None]:
import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm
from sklearn.metrics import roc_auc_score

# Pipeline modules
from revolut_credit_risk import config
from revolut_credit_risk.logging_config import setup_logging
from revolut_credit_risk.data.synthetic_data import generate_synthetic_data, save_dataset
from revolut_credit_risk.features.dfs_engine import run_dfs
from revolut_credit_risk.features.binning import bin_features, transform_woe
from revolut_credit_risk.features.variable_config import get_variable_configs
from revolut_credit_risk.selection.information_value import collect_iv, filter_by_iv, bivariate_analysis
from revolut_credit_risk.selection.miv_selector import run_miv_selection
from revolut_credit_risk.model.scorecard import train_scorecard, benchmark_models
from revolut_credit_risk.model.calibration import calibrate_isotonic
from revolut_credit_risk.evaluation.metrics import compute_all_splits, plot_roc_curve, plot_lorenz_curve, plot_calibration_curve, plot_miv_selection

setup_logging()
sns.set_theme(style='whitegrid')
%matplotlib inline

# Use smaller dataset for notebook speed
config.N_CUSTOMERS = 2000
config.DFS_MAX_FEATURES = 200
print('Setup complete.')

## 1. Data Exploration

Generate synthetic relational data mimicking a digital bank's entity structure.

In [None]:
data = generate_synthetic_data()

print(f'Customers:    {len(data.customers):,}')
print(f'Accounts:     {len(data.accounts):,}')
print(f'Transactions: {len(data.transactions):,}')
print(f'Applications: {len(data.credit_applications):,}')
print(f'Default rate: {data.loan_performance["is_default"].mean():.1%}')

In [None]:
# Entity relationship overview
print('=== Customers ===')
display(data.customers.head())
print('\n=== Accounts ===')
display(data.accounts.head())
print('\n=== Transactions (sample) ===')
display(data.transactions.head())
print('\n=== Credit Applications ===')
display(data.credit_applications.head())
print('\n=== Loan Performance ===')
display(data.loan_performance.head())

In [None]:
# Default rate by customer demographics
merged = data.loan_performance.merge(data.credit_applications[['application_id', 'customer_id']])
merged = merged.merge(data.customers, on='customer_id')

fig, axes = plt.subplots(1, 3, figsize=(15, 4))

merged.groupby('income_band')['is_default'].mean().plot(kind='bar', ax=axes[0], title='Default Rate by Income')
merged.groupby('employment_status')['is_default'].mean().plot(kind='bar', ax=axes[1], title='Default Rate by Employment')
merged.groupby(pd.cut(merged['age'], bins=5))['is_default'].mean().plot(kind='bar', ax=axes[2], title='Default Rate by Age')

for ax in axes:
    ax.set_ylabel('Default Rate')
    ax.tick_params(axis='x', rotation=45)
plt.tight_layout()
plt.show()

## 2. Deep Feature Synthesis [Paper §2.2.4]

Automatically generate features from the relational data using featuretools.

In [None]:
feature_matrix = run_dfs(data)
print(f'Feature matrix shape: {feature_matrix.shape}')
print(f'\nSample feature names:')
for name in feature_matrix.columns[:20]:
    print(f'  {name}')

In [None]:
# Merge with target and split
target = data.loan_performance[['application_id', 'is_default']].copy()
full = feature_matrix.join(target.set_index('application_id'), how='inner')

y = full['is_default']
X = full.drop(columns=['is_default'])

# Time-based split
app_dates = data.credit_applications.set_index('application_id')['application_date']
app_dates = app_dates.loc[X.index]
sorted_idx = app_dates.sort_values().index

n = len(sorted_idx)
n_train = int(n * config.TRAIN_RATIO)
n_test = int(n * config.TEST_RATIO)

train_idx = sorted_idx[:n_train]
test_idx = sorted_idx[n_train:n_train + n_test]
oot_idx = sorted_idx[n_train + n_test:]

X_train, y_train = X.loc[train_idx], y.loc[train_idx]
X_test, y_test = X.loc[test_idx], y.loc[test_idx]
X_oot, y_oot = X.loc[oot_idx], y.loc[oot_idx]

print(f'Train: {len(X_train)}, Test: {len(X_test)}, OOT: {len(X_oot)}')

## 3. Binning & WoE Transformation [Paper §2.2.5, §2.2.6]

Coarse binning with optbinning and Weight of Evidence transformation.

In [None]:
var_configs = get_variable_configs(X_train.columns.tolist())
binning_results = bin_features(X_train, y_train, var_configs)
X_woe_train = transform_woe(X_train, binning_results)
X_woe_test = transform_woe(X_test, binning_results)
X_woe_oot = transform_woe(X_oot, binning_results)

print(f'Binned features: {len(binning_results.results)}')
print(f'WoE columns: {X_woe_train.shape[1]}')

In [None]:
# Show binning table for the top feature by IV
iv_table = collect_iv(binning_results)
top_feat = iv_table.iloc[0]['feature']
print(f'Top feature: {top_feat} (IV={iv_table.iloc[0]["iv"]:.4f})')
display(binning_results.results[top_feat].binning_table)

## 4. IV Distribution & Bivariate Analysis [Paper §2.3.2]

In [None]:
# IV distribution histogram
fig, ax = plt.subplots(figsize=(10, 5))
ax.hist(iv_table['iv'], bins=30, edgecolor='black', alpha=0.7)
ax.axvline(x=0.02, color='red', linestyle='--', label='IV threshold (0.02)')
ax.set_xlabel('Information Value')
ax.set_ylabel('Count')
ax.set_title('IV Distribution of DFS Features')
ax.legend()
plt.show()

# Bivariate analysis
biv = bivariate_analysis(binning_results, X_woe_train, y_train)
display(biv.head(15))

## 5. MIV Feature Selection [Paper §2.3.2, Fig. 5]

Greedy forward selection using Marginal Information Value.

In [None]:
candidates = filter_by_iv(iv_table)
miv_result = run_miv_selection(
    X_woe_train, y_train, X_woe_test, y_test,
    iv_table, binning_results, candidates,
)

print(f'Selected {len(miv_result.selected_features)} features:')
for step in miv_result.steps:
    print(f'  Step {step.step}: {step.feature_added} (MIV={step.miv:.4f}, AUC test={step.auc_test:.4f})')
print(f'Stopping reason: {miv_result.stopping_reason}')

In [None]:
# Replicate Paper Fig. 5: AUC vs step + MIV bar chart
if miv_result.steps:
    steps = miv_result.steps
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 8), sharex=True)

    ax1.plot([s.step for s in steps], [s.auc_train for s in steps], 'b-o', label='Train')
    ax1.plot([s.step for s in steps], [s.auc_test for s in steps], 'r-o', label='Test')
    ax1.set_ylabel('ROC AUC')
    ax1.set_title('MIV Feature Selection Progress [Paper Fig. 5]')
    ax1.legend()
    ax1.grid(True, alpha=0.3)

    ax2.bar([s.step for s in steps], [s.miv for s in steps], color='steelblue')
    ax2.set_xlabel('Selection Step')
    ax2.set_ylabel('MIV')
    ax2.set_yscale('log')
    ax2.grid(True, alpha=0.3, axis='y')

    plt.tight_layout()
    plt.show()

## 6. Final Model — Logistic Regression Scorecard [Paper §2.4.2]

In [None]:
selected = miv_result.selected_features
scorecard = train_scorecard(X_woe_train, y_train, selected, binning_results)
print(scorecard.summary_text)

In [None]:
# Coefficient table
display(scorecard.coefficient_table)

# Scorecard points
if scorecard.scorecard_points is not None:
    display(scorecard.scorecard_points)

## 7. Model Performance [Paper §2.4.1, Fig. 6, Fig. 7]

In [None]:
woe_cols = [f'woe_{f}' for f in selected]
prob_train = scorecard.model.predict(sm.add_constant(X_woe_train[woe_cols]))
prob_test = scorecard.model.predict(sm.add_constant(X_woe_test[woe_cols]))
prob_oot = scorecard.model.predict(sm.add_constant(X_woe_oot[woe_cols]))

metrics_df = compute_all_splits(
    y_train.values, prob_train,
    y_test.values, prob_test,
    y_oot.values, prob_oot,
)
display(metrics_df)

In [None]:
# ROC Curve
from sklearn.metrics import roc_curve

fig, ax = plt.subplots(figsize=(7, 6))
for y_true, y_prob, label in [
    (y_train.values, prob_train, 'Train'),
    (y_test.values, prob_test, 'Test'),
    (y_oot.values, prob_oot, 'OOT'),
]:
    fpr, tpr, _ = roc_curve(y_true, y_prob)
    auc = roc_auc_score(y_true, y_prob)
    ax.plot(fpr, tpr, lw=2, label=f'{label} (AUC={auc:.3f})')

ax.plot([0, 1], [0, 1], 'k--')
ax.set_xlabel('FPR')
ax.set_ylabel('TPR')
ax.set_title('ROC Curve')
ax.legend()
plt.show()

In [None]:
# Lorenz Curve [Paper Fig. 6]
fig, ax = plt.subplots(figsize=(7, 6))
order = np.argsort(-prob_test)
y_sorted = y_test.values[order]
cum_bads = np.cumsum(y_sorted) / y_sorted.sum()
x = np.arange(1, len(y_sorted) + 1) / len(y_sorted)

ax.plot(x, cum_bads, lw=2, label='Model')
ax.plot([0, 1], [0, 1], 'k--', label='Random')
ax.set_xlabel('Fraction of Population')
ax.set_ylabel('Cumulative Fraction of Defaults')
ax.set_title('Lorenz Curve [Paper Fig. 6]')
ax.legend()
plt.show()

## 8. PD Calibration [Paper §2.4.1, Fig. 7]

In [None]:
cal = calibrate_isotonic(prob_train, y_train.values, prob_test, y_test.values)
print(f'Brier Score before: {cal.brier_before:.4f}')
print(f'Brier Score after:  {cal.brier_after:.4f}')

# Calibration curve [Paper Fig. 7]
fig, ax = plt.subplots(figsize=(7, 6))
for probs, label in [(prob_test, 'Before'), (cal.calibrated_probs_test, 'After')]:
    n_bins = 10
    bin_edges = np.linspace(0, 1, n_bins + 1)
    bin_idx = np.clip(np.digitize(probs, bin_edges) - 1, 0, n_bins - 1)
    obs, pred = [], []
    for i in range(n_bins):
        mask = bin_idx == i
        if mask.sum() > 0:
            obs.append(y_test.values[mask].mean())
            pred.append(probs[mask].mean())
    ax.plot(pred, obs, 'o-', label=label)

ax.plot([0, 0.5], [0, 0.5], 'k--', label='Perfect')
ax.set_xlabel('Predicted PD')
ax.set_ylabel('Observed Default Rate')
ax.set_title('Calibration Curve [Paper Fig. 7]')
ax.legend()
plt.show()

## 9. Benchmarking [Paper §2.4.2, Table 1]

In [None]:
lr_auc_train = roc_auc_score(y_train, prob_train)
lr_auc_test = roc_auc_score(y_test, prob_test)
lr_auc_oot = roc_auc_score(y_oot, prob_oot)

bench = benchmark_models(
    X_train, y_train, X_test, y_test, X_oot, y_oot,
    lr_auc_train, lr_auc_test, lr_auc_oot,
)
display(bench)

## 10. Residual Monitoring [Paper §3, Fig. 9]

In [None]:
from revolut_credit_risk.monitoring.residual_monitor import run_residual_monitoring

monitor = run_residual_monitoring(
    model_probs=prob_train,
    y_true=y_train.values,
    X_woe=X_woe_train,
    X_raw=X_train,
    selected_features=selected,
    binning_results=binning_results,
)

if monitor.candidate_table is not None:
    display(monitor.candidate_table.head(10))

---

**Pipeline complete.** All results match the methodology from the Revolut DFS + MIV paper.