In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [1]:
# Install d3rlpy for Offline RL
!pip install d3rlpy torch scikit-learn pandas numpy matplotlib seaborn --quiet

import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import roc_auc_score, f1_score
import d3rlpy
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
import os

# Suppress warnings
warnings.filterwarnings('ignore')

# Set device 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Setup complete. Using device: {device}")

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m721.7/721.7 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m201.1/201.1 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m958.1/958.1 kB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m90.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m80.1 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[2K   [90m━━━━━━━━━━━━━━━

In [2]:
# LOAD DATA 

FILE_PATH = '/kaggle/input/lending-club/accepted_2007_to_2018Q4.csv.gz'

print(f"Attempting to load: {FILE_PATH}")

# We only load the columns we need to prevent RAM crashes 
cols_to_keep = [
    'loan_amnt', 'term', 'int_rate', 'installment', 'grade', 'sub_grade',
    'emp_length', 'home_ownership', 'annual_inc', 'verification_status',
    'loan_status', 'dti', 'fico_range_low', 'fico_range_high', 'open_acc',
    'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'mort_acc', 'pub_rec_bankruptcies'
]


# We use compression='gzip' to handle the .gz extension correctly.
# We limit to 500,000 rows for speed. 
try:
    df = pd.read_csv(FILE_PATH, usecols=cols_to_keep, compression='gzip', nrows=500000)
    print("File loaded successfully.")
except FileNotFoundError:
    print("ERROR: File not found. Please check the 'Add Data' panel on the right.")
    print("Make sure you added the 'All Lending Club loan data' by Nathan George.")


# FILTERING & CLEANING
print("Filtering for valid loan statuses...")
# We only want loans that have finished: Paid or Defaulted.
# We DROP 'Current' because the outcome is unknown.
valid_statuses = ['Fully Paid', 'Charged Off', 'Default']
df = df[df['loan_status'].isin(valid_statuses)]

# CREATE TARGET VARIABLE
# 0 = Fully Paid (Good outcome)
# 1 = Default / Charged Off (Bad outcome)
df['target'] = df['loan_status'].apply(lambda x: 0 if x == 'Fully Paid' else 1)

# MISSING VALUES & ENCODING
# Clean 'emp_length' (e.g. "10+ years" becomes 10.0)
df['emp_length'] = df['emp_length'].str.extract('(\d+)').astype(float).fillna(0)
df.fillna(0, inplace=True)

# Encode text columns to numbers
cat_cols = ['term', 'grade', 'sub_grade', 'home_ownership', 'verification_status']
for col in cat_cols:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col].astype(str))

print(f"Data Preparation Complete.")
print(f"Final Data Shape: {df.shape}")
print(f"Columns: {list(df.columns)}")

Attempting to load: /kaggle/input/lending-club/accepted_2007_to_2018Q4.csv.gz
File loaded successfully.
Filtering for valid loan statuses...
Data Preparation Complete.
Final Data Shape: (391168, 22)
Columns: ['loan_amnt', 'term', 'int_rate', 'installment', 'grade', 'sub_grade', 'emp_length', 'home_ownership', 'annual_inc', 'verification_status', 'loan_status', 'dti', 'fico_range_low', 'fico_range_high', 'open_acc', 'pub_rec', 'revol_bal', 'revol_util', 'total_acc', 'mort_acc', 'pub_rec_bankruptcies', 'target']


In [3]:
# FEATURE ENGINEERING & SPLITTING


print("--- Engineering Rewards & Splitting Data ---")

# DEFINE REWARD FUNCTION. We define "Value" based on business logic.
# Rule A: If we approve and it's paid back, we earn Interest.
# Rule B: If we approve and it defaults, we lose the Principal amount.
# Rule C: If we deny, we earn 0 and lose 0.

# Calculate the potential outcomes for every row
df['potential_profit'] = df['loan_amnt'] * (df['int_rate'] / 100)
df['potential_loss'] = -df['loan_amnt']

# Create 'reward' column based on the ACTUAL outcome in the dataset
df['reward'] = df.apply(lambda x: x['potential_profit'] if x['target'] == 0 else x['potential_loss'], axis=1)

# PREPARE FEATURES
# We drop columns that are not predictive features (like the target itself or the reward)
drop_cols = ['loan_status', 'target', 'potential_profit', 'potential_loss', 'reward']
feature_cols = [c for c in df.columns if c not in drop_cols]

# Convert to numpy arrays
X = df[feature_cols].values
y = df['target'].values
rewards = df['reward'].values

print(f"Features selected: {len(feature_cols)}")

# SCALE FEATURES
# Neural Networks require scaled data (mean 0, variance 1) to converge.
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 80% for Training, 20% for Testing
X_train, X_test, y_train, y_test, r_train, r_test = train_test_split(
    X_scaled, y, rewards, test_size=0.2, random_state=42
)

print("Data successfully split.")
print(f"Training set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")

--- Engineering Rewards & Splitting Data ---
Features selected: 20
Data successfully split.
Training set: 312934 samples
Test set: 78234 samples


In [4]:
# MODEL ARCHITECTURE

class LoanClassifier(nn.Module):
    def __init__(self, input_dim):
        super(LoanClassifier, self).__init__()
        # Multi-Layer Perceptron (MLP)
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),   # Input layer -> Hidden 1
            nn.ReLU(),                   # Activation
            nn.Dropout(0.3),             # Regularization to prevent overfitting
            nn.Linear(256, 128),         # Hidden 1 -> Hidden 2
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(128, 1),           # Hidden 2 -> Output
            nn.Sigmoid()                 # Sigmoid ensures output is between 0 and 1 (Probability)
        )
    
    def forward(self, x):
        return self.model(x)

print("Deep Learning Model Architecture defined.")

Deep Learning Model Architecture defined.


In [5]:
# TRAIN SUPERVISED DL MODEL 
print("--- Training Deep Learning Model ---")

model = LoanClassifier(X_train.shape[1]).to(device)
criterion = nn.BCELoss() # Binary Cross Entropy Loss for classification
optimizer = optim.Adam(model.parameters(), lr=0.001)

# DATA LOADER
train_data = TensorDataset(torch.FloatTensor(X_train), torch.FloatTensor(y_train))
train_loader = DataLoader(train_data, batch_size=2048, shuffle=True)

# TRAINING 
epochs = 5
for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs.squeeze(), labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs} - Loss: {running_loss/len(train_loader):.4f}")

# EVALUATION
print("\nEvaluating on Test Set...")
model.eval()
with torch.no_grad():
    X_test_tensor = torch.FloatTensor(X_test).to(device)
    y_pred_prob = model(X_test_tensor).cpu().numpy().squeeze()

# Metrics
auc = roc_auc_score(y_test, y_pred_prob)
y_pred_class = (y_pred_prob > 0.5).astype(int)
f1 = f1_score(y_test, y_pred_class)

print(f"Final DL Results: AUC = {auc:.4f}, F1-Score = {f1:.4f}")

--- Training Deep Learning Model ---
Epoch 1/5 - Loss: 0.4610
Epoch 2/5 - Loss: 0.4477
Epoch 3/5 - Loss: 0.4464
Epoch 4/5 - Loss: 0.4460
Epoch 5/5 - Loss: 0.4454

Evaluating on Test Set...
Final DL Results: AUC = 0.7344, F1-Score = 0.2036


In [7]:
# TRAIN OFFLINE RL AGENT 

print("--- Training Offline RL Agent (CQL) ---")

# CREATE RL DATASET (DATA AUGMENTATION). The dataset only contains loans that were APPROVED (Action = 1).
# To teach the agent, we must show it what happens if we DENY (Action = 0).
# We assume Deny = Reward 0.

print("Constructing RL Experience Replay Buffer...")

# Real Data (Action = Approve = 1)
obs_1 = X_train
act_1 = np.ones(len(X_train)) # All 1s
rew_1 = r_train

# Synthetic Data (Action = Deny = 0)
# We duplicate the observations but assign Action 0 and Reward 0
obs_0 = X_train
act_0 = np.zeros(len(X_train)) # All 0s
rew_0 = np.zeros(len(X_train)) 

# Combine them
obs_rl = np.vstack([obs_1, obs_0])
act_rl = np.concatenate([act_1, act_0])
rew_rl = np.concatenate([rew_1, rew_0])
terminals = np.ones(len(obs_rl)) # All episodes end after 1 step

# Create d3rlpy Dataset object
dataset = d3rlpy.dataset.MDPDataset(
    observations=obs_rl,
    actions=act_rl,
    rewards=rew_rl,
    terminals=terminals,
)

# CONFIGURE ALGORITHM
# We use Discrete CQL (Conservative Q-Learning)
cql = d3rlpy.algos.DiscreteCQLConfig(
    batch_size=2048,
    learning_rate=1e-4,
).create(device=True if torch.cuda.is_available() else False)

# TRAIN AGENT
print("Starting Training (this may take a few minutes)...")


cql.fit(
    dataset,
    n_steps=10000
)

print("RL Agent Training Complete.")

--- Training Offline RL Agent (CQL) ---
Constructing RL Experience Replay Buffer...
[2m2025-12-09 18:37.55[0m [[32m[1minfo     [0m] [1mSignatures have been automatically determined.[0m [36maction_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m [36mobservation_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(20,)])[0m [36mreward_signature[0m=[35mSignature(dtype=[dtype('float64')], shape=[(1,)])[0m
[2m2025-12-09 18:37.55[0m [[32m[1minfo     [0m] [1mAction-space has been automatically determined.[0m [36maction_space[0m=[35m<ActionSpace.DISCRETE: 2>[0m
[2m2025-12-09 18:37.57[0m [[32m[1minfo     [0m] [1mAction size has been automatically determined.[0m [36maction_size[0m=[35m2[0m
Starting Training (this may take a few minutes)...
[2m2025-12-09 18:37.58[0m [[32m[1minfo     [0m] [1mdataset info                  [0m [36mdataset_info[0m=[35mDatasetInfo(observation_signature=Signature(dtype=[dtype('float64')], s

Epoch 1/1:   0%|          | 0/10000 [00:00<?, ?it/s]

[2m2025-12-09 18:45.29[0m [[32m[1minfo     [0m] [1mDiscreteCQL_20251209183758: epoch=1 step=10000[0m [36mepoch[0m=[35m1[0m [36mmetrics[0m=[35m{'time_sample_batch': 0.03880418281555176, 'time_algorithm_update': 0.005839399576187134, 'loss': 2275.5941565063476, 'td_loss': 2274.540419177246, 'conservative_loss': 1.0537365218222141, 'time_step': 0.04485613431930542}[0m [36mstep[0m=[35m10000[0m
[2m2025-12-09 18:45.29[0m [[32m[1minfo     [0m] [1mModel parameters are saved to d3rlpy_logs/DiscreteCQL_20251209183758/model_10000.d3[0m
RL Agent Training Complete.


In [8]:
# FINAL COMPARISON & REPORT

print("--- Comparative Analysis: DL vs RL ---")

# DECISIONS ON TEST SET
# DL Strategy: Deny if Risk > 50% 
dl_actions = np.where(y_pred_prob > 0.5, 0, 1) # 0=Deny, 1=Approve

# RL Strategy: Agent predicts the action with highest Q-Value
rl_actions = cql.predict(X_test)

# PORTFOLIO VALUE (This calculates how much money each strategy would have made on the test set)
def get_portfolio_value(actions, actual_rewards):
    total = 0
    for act, r in zip(actions, actual_rewards):
        if act == 1:
            total += r
        # If act == 0 (Deny), reward is 0, so we add nothing.
    return total

dl_val = get_portfolio_value(dl_actions, r_test)
rl_val = get_portfolio_value(rl_actions, r_test)

print(f"Total Portfolio Value (Deep Learning):  ${dl_val:,.2f}")
print(f"Total Portfolio Value (Reinforcement Learning): ${rl_val:,.2f}")

# ANALYZE DISAGREEMENTS
# We want to find cases where the RL agent APPROVED (1) but the DL model DENIED (0).
# This usually happens when the Interest Rate is high enough to justify the Risk.
disagreements = np.where((rl_actions == 1) & (dl_actions == 0))[0]

print(f"\nNumber of loans where RL Approves but DL Denies: {len(disagreements)}")

if len(disagreements) > 0:
    #top 3 examples
    print("\n--- Insight: Why did RL approve these? ---")
    for i in range(min(3, len(disagreements))):
        idx = disagreements[i]
        print(f"\nLoan Example #{idx}")
        print(f"  DL Predicted Default Probability: {y_pred_prob[idx]*100:.2f}% (High Risk)")
        print(f"  Actual Profit/Loss (if approved): ${r_test[idx]:.2f}")
        
        # Note: Since we scaled X, we can't easily see the original Interest Rate here.
        # But generally, RL approves high-risk loans only if the reward (Interest) is massive.
        if r_test[idx] > 0:
            print("  OUTCOME: The loan was actually PAID back. RL was right!")
        else:
            print("  OUTCOME: The loan Defaulted. RL took a risk and lost.")
else:
    print("No disagreements found in this test batch.")

--- Comparative Analysis: DL vs RL ---
Total Portfolio Value (Deep Learning):  $-113,689,828.21
Total Portfolio Value (Reinforcement Learning): $-110,073,573.73

Number of loans where RL Approves but DL Denies: 273

--- Insight: Why did RL approve these? ---

Loan Example #133
  DL Predicted Default Probability: 50.09% (High Risk)
  Actual Profit/Loss (if approved): $1853.60
  OUTCOME: The loan was actually PAID back. RL was right!

Loan Example #271
  DL Predicted Default Probability: 54.07% (High Risk)
  Actual Profit/Loss (if approved): $2979.00
  OUTCOME: The loan was actually PAID back. RL was right!

Loan Example #523
  DL Predicted Default Probability: 50.36% (High Risk)
  Actual Profit/Loss (if approved): $-12000.00
  OUTCOME: The loan Defaulted. RL took a risk and lost.
