In [1]:
# ⚙️ Install and Import Dependencies (Optional: d3rlpy)
# !pip install d3rlpy

import os, random, math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score, f1_score, confusion_matrix

# Reproducibility
RND = 42
random.seed(RND)
np.random.seed(RND)
os.environ['PYTHONHASHSEED'] = str(RND)


In [2]:
# Define File Paths and Verify Data Availability
PROC_PATH = "processed_loan_data.csv"
PROBS_PATH = "test_predictions.csv"
RAW_PATH = "accepted_2007_to_2018Q4.csv"

assert os.path.exists(PROC_PATH), f"Missing {PROC_PATH}"
assert os.path.exists(PROBS_PATH), f"Missing {PROBS_PATH}"
assert os.path.exists(RAW_PATH), f"Missing {RAW_PATH}"

proc = pd.read_csv(PROC_PATH, low_memory=False)
pred = pd.read_csv(PROBS_PATH, low_memory=False)
raw = pd.read_csv(RAW_PATH, low_memory=False)

print("Shapes -> processed:", proc.shape, "| predictions:", pred.shape, "| raw:", raw.shape)


Shapes -> processed: (1803164, 42) | predictions: (270475, 2) | raw: (2260701, 151)


In [3]:
# Extract Loan Amount and Interest Rate Columns
if 'loan_amnt' in proc.columns and 'int_rate' in proc.columns:
    loan_all = proc['loan_amnt'].values
    rate_all = proc['int_rate'].values
else:
    raw_small = raw[['loan_amnt', 'int_rate', 'loan_status']].copy()
    raw_small['int_rate'] = raw_small['int_rate'].astype(str).str.rstrip('%').replace('', np.nan).astype(float)
    if raw_small['int_rate'].median() > 1:
        raw_small['int_rate'] /= 100.0
    loan_all = raw_small['loan_amnt'].values[:len(proc)]
    rate_all = raw_small['int_rate'].values[:len(proc)]


In [4]:
# Prepare Features and Target
proc = proc.dropna(subset=['target'])
X = proc.drop(columns=['target'], errors='ignore').values.astype(np.float32)
y = proc['target'].astype(int).values


In [5]:
# Stratified Split with Index Tracking
X_train, X_temp, y_train, y_temp, idx_train, idx_temp = train_test_split(
    X, y, np.arange(len(y)), test_size=0.30, stratify=y, random_state=RND
)
X_val, X_test, y_val, y_test, idx_val, idx_test = train_test_split(
    X_temp, y_temp, idx_temp, test_size=0.50, stratify=y_temp, random_state=RND
)

loan_train, rate_train = loan_all[idx_train], rate_all[idx_train]
loan_val, rate_val = loan_all[idx_val], rate_all[idx_val]
loan_test, rate_test = loan_all[idx_test], rate_all[idx_test]


In [6]:
# Define Reward Functions
def realized_reward(action, loan_amt, int_rate, true_label):
    if action == 0:
        return 0.0
    if true_label == 0:
        return loan_amt * int_rate
    return -loan_amt

def expected_reward_from_p(p_default, loan_amt, int_rate):
    return (1 - p_default) * (loan_amt * int_rate) - p_default * loan_amt


In [7]:
# Load or Compute Default Probabilities
if {'p_default', 'y_true'}.issubset(pred.columns):
    p_test = pred['p_default'].values
    y_test_saved = pred['y_true'].values
else:
    try:
        import tensorflow as tf
        model = tf.keras.models.load_model("keras_default_pred.h5")
        p_test = model.predict(X_test, batch_size=512).ravel()
        y_test_saved = y_test
    except Exception:
        rf = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=RND, n_jobs=-1)
        rf.fit(X_train, y_train)
        p_test = rf.predict_proba(X_test)[:, 1]
        y_test_saved = y_test


In [8]:
# Compute Expected Rewards and Build Test DataFrame
n = len(p_test)
loan_test_aligned = proc['loan_amnt'].iloc[:n].values
rate_test_aligned = proc['int_rate'].iloc[:n].values

exp_rew_test = expected_reward_from_p(p_test, loan_test_aligned, rate_test_aligned)
df_test = pd.DataFrame({
    'y_true': y_test_saved[:n],
    'p_default': p_test,
    'loan_amnt': loan_test_aligned,
    'int_rate': rate_test_aligned,
    'exp_reward': exp_rew_test
})


In [9]:
# Evaluate Static Quantile-Based Policies
quantiles = {'top_80': 0.2, 'top_50': 0.5, 'top_19': 0.81, 'top_5': 0.95}
for name, q in quantiles.items():
    thr = df_test['exp_reward'].quantile(q)
    selected = df_test[df_test['exp_reward'] >= thr]
    print(f"{name}: selected {len(selected)}/{len(df_test)} | avg expected reward = {selected['exp_reward'].mean():.2f}")


top_80: selected 216380/270475 | avg expected reward = 0.34
top_50: selected 135238/270475 | avg expected reward = 0.61
top_19: selected 51391/270475 | avg expected reward = 1.07
top_5: selected 13524/270475 | avg expected reward = 1.67


In [10]:
# Attempt to Import d3rlpy for Offline RL
use_d3rlpy = False
try:
    import d3rlpy
    from d3rlpy.dataset import MDPDataset
    from d3rlpy.algos import CQL
    use_d3rlpy = True
except Exception as e:
    print("d3rlpy unavailable or failed import:", e)


Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [11]:
# Prepare MDPDataset for Offline RL (if d3rlpy Available)
if use_d3rlpy:
    obs = X_train.astype(np.float32)
    acts = np.ones((len(obs), 1), dtype=np.float32)
    rewards_train = np.array([realized_reward(1, la, r, t) for la, r, t in zip(loan_train, rate_train, y_train)], dtype=np.float32)
    terminals = np.ones_like(rewards_train, dtype=bool)
    mdp = MDPDataset(observations=obs, actions=acts, rewards=rewards_train, terminals=terminals)
    print("MDPDataset prepared with size:", len(obs))


2025-10-26 12:26.33 [info     ] Signatures have been automatically determined. action_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]) observation_signature=Signature(dtype=[dtype('float32')], shape=[(41,)]) reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)])
2025-10-26 12:26.33 [info     ] Action-space has been automatically determined. action_space=<ActionSpace.DISCRETE: 2>
2025-10-26 12:26.36 [info     ] Action size has been automatically determined. action_size=2
MDPDataset prepared with size: 941483


In [12]:
use_d3rlpy = False
try:
    import d3rlpy
    from d3rlpy.dataset import MDPDataset
    from d3rlpy.algos import CQL
    use_d3rlpy = True
    print("d3rlpy available. Preparing dataset for CQL.")
except Exception as e:
    print("d3rlpy unavailable or failed import:", e)
    use_d3rlpy = False


d3rlpy available. Preparing dataset for CQL.


In [14]:
# ⚙️ Configure and Train Discrete CQL Agent (d3rlpy ≥2.8.1)
if use_d3rlpy:
    import torch
    from d3rlpy.algos import DiscreteCQL, DiscreteCQLConfig
    from d3rlpy.models.encoders import DefaultEncoderFactory
    from d3rlpy.models.q_functions import MeanQFunctionFactory

    # Build factories and config
    encoder_factory = DefaultEncoderFactory()
    q_func_factory = MeanQFunctionFactory()
    cql_config = DiscreteCQLConfig(
        learning_rate=1e-4,
        batch_size=256,
        gamma=0.99,
        encoder_factory=encoder_factory,
        q_func_factory=q_func_factory,
    )

    # Instantiate agent (new signature requires enable_ddp)
    device = "cuda" if torch.cuda.is_available() else "cpu"
    enable_ddp = False
    cql = DiscreteCQL(cql_config, device, enable_ddp)

    # Train agent
    cql.fit(dataset=mdp, n_steps=50000, n_steps_per_epoch=5000)

    # Save model
    os.makedirs("models", exist_ok=True)
    cql.save_model("models/discrete_cql_model")
    print("CQL model trained and saved.")


2025-10-26 12:27.22 [info     ] dataset info                   dataset_info=DatasetInfo(observation_signature=Signature(dtype=[dtype('float32')], shape=[(41,)]), action_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), reward_signature=Signature(dtype=[dtype('float32')], shape=[(1,)]), action_space=<ActionSpace.DISCRETE: 2>, action_size=2)
2025-10-26 12:27.22 [debug    ] Building models...            
2025-10-26 12:27.24 [debug    ] Models have been built.       
2025-10-26 12:27.24 [info     ] Directory is created at d3rlpy_logs\DiscreteCQL_20251026122724
2025-10-26 12:27.24 [info     ] Parameters                     params={'observation_shape': [41], 'action_size': 2, 'config': {'type': 'discrete_cql', 'params': {'batch_size': 256, 'gamma': 0.99, 'observation_scaler': {'type': 'none', 'params': {}}, 'action_scaler': {'type': 'none', 'params': {}}, 'reward_scaler': {'type': 'none', 'params': {}}, 'compile_graph': False, 'learning_rate': 0.0001, 'optim_factory': {'type': 'ad

Epoch 1/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:29.40 [info     ] DiscreteCQL_20251026122724: epoch=1 step=5000 epoch=1 metrics={'time_sample_batch': 0.009399208784103394, 'time_algorithm_update': 0.017446479415893553, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.0269882520198822} step=5000
2025-10-26 12:29.40 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_5000.d3


Epoch 2/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:31.53 [info     ] DiscreteCQL_20251026122724: epoch=2 step=10000 epoch=2 metrics={'time_sample_batch': 0.009236612462997437, 'time_algorithm_update': 0.017077057695388795, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.02646240816116333} step=10000
2025-10-26 12:31.53 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_10000.d3


Epoch 3/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:33.36 [info     ] DiscreteCQL_20251026122724: epoch=3 step=15000 epoch=3 metrics={'time_sample_batch': 0.006638333654403686, 'time_algorithm_update': 0.01369930419921875, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.020479881858825683} step=15000
2025-10-26 12:33.36 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_15000.d3


Epoch 4/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:35.40 [info     ] DiscreteCQL_20251026122724: epoch=4 step=20000 epoch=4 metrics={'time_sample_batch': 0.008498343276977538, 'time_algorithm_update': 0.01580895586013794, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.024459044313430787} step=20000
2025-10-26 12:35.40 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_20000.d3


Epoch 5/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:38.46 [info     ] DiscreteCQL_20251026122724: epoch=5 step=25000 epoch=5 metrics={'time_sample_batch': 0.013325560283660888, 'time_algorithm_update': 0.023487577295303343, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.03699236278533936} step=25000
2025-10-26 12:38.46 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_25000.d3


Epoch 6/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:40.39 [info     ] DiscreteCQL_20251026122724: epoch=6 step=30000 epoch=6 metrics={'time_sample_batch': 0.007624328374862671, 'time_algorithm_update': 0.014611360883712768, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.02235144238471985} step=30000
2025-10-26 12:40.39 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_30000.d3


Epoch 7/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:42.30 [info     ] DiscreteCQL_20251026122724: epoch=7 step=35000 epoch=7 metrics={'time_sample_batch': 0.007834521579742432, 'time_algorithm_update': 0.01415584478378296, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.02209691252708435} step=35000
2025-10-26 12:42.30 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_35000.d3


Epoch 8/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:43.59 [info     ] DiscreteCQL_20251026122724: epoch=8 step=40000 epoch=8 metrics={'time_sample_batch': 0.0059789172649383545, 'time_algorithm_update': 0.011572920513153076, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.017669126129150392} step=40000
2025-10-26 12:43.59 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_40000.d3


Epoch 9/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:45.22 [info     ] DiscreteCQL_20251026122724: epoch=9 step=45000 epoch=9 metrics={'time_sample_batch': 0.00590876669883728, 'time_algorithm_update': 0.010608326148986817, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.016616143894195558} step=45000
2025-10-26 12:45.22 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_45000.d3


Epoch 10/10:   0%|          | 0/5000 [00:00<?, ?it/s]

2025-10-26 12:47.42 [info     ] DiscreteCQL_20251026122724: epoch=10 step=50000 epoch=10 metrics={'time_sample_batch': 0.009921845865249634, 'time_algorithm_update': 0.017585837078094482, 'loss': nan, 'td_loss': nan, 'conservative_loss': nan, 'time_step': 0.02765393533706665} step=50000
2025-10-26 12:47.42 [info     ] Model parameters are saved to d3rlpy_logs\DiscreteCQL_20251026122724\model_50000.d3
✅ CQL model trained and saved.


In [15]:
# 🔮 Predict Actions using Trained CQL Agent
if use_d3rlpy:
    obs_test = X_test.astype(np.float32)
    cql_actions = np.array(cql.predict(obs_test)).astype(int).ravel()

    unique, counts = np.unique(cql_actions, return_counts=True)
    print("Action distribution:", dict(zip(unique, counts)))

    approvals = cql_actions.sum()
    print(f"\nPredicted {len(cql_actions)} actions | Approvals: {approvals} ({100 * approvals / len(cql_actions):.2f}%)")


Action distribution: {0: 201747}

Predicted 201747 actions | Approvals: 0 (0.00%)
