In [13]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader

import numpy as np
import pandas as pd
import joblib

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, f1_score


In [14]:
df = pd.read_csv("sample_200k.csv")

df['target'] = df['loan_status'].apply(
    lambda x: 1 if str(x) in ["Charged Off", "Default"] else 0
)

X = df.drop(columns=['loan_status', 'target'], errors='ignore')
y = df['target']

print(X.shape, y.shape)


  df = pd.read_csv("sample_200k.csv")


(200000, 150) (200000,)


In [15]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.20, random_state=42, stratify=y
)

print("Train:", X_train.shape, "Test:", X_test.shape)


Train: (160000, 150) Test: (40000, 150)


In [17]:
preprocessor = joblib.load("final_preprocessor.pkl")

preprocessor.fit(X_train)

X_train_prep = preprocessor.transform(X_train)
X_test_prep  = preprocessor.transform(X_test)

print("Transformed Train:", X_train_prep.shape)
print("Transformed Test :", X_test_prep.shape)


Transformed Train: (160000, 148)
Transformed Test : (40000, 148)


In [20]:
X_train_dense = X_train_prep
X_test_dense  = X_test_prep

X_train_t = torch.tensor(X_train_dense, dtype=torch.float32)
y_train_t = torch.tensor(y_train.values, dtype=torch.float32)

X_test_t = torch.tensor(X_test_dense, dtype=torch.float32)
y_test_t = torch.tensor(y_test.values, dtype=torch.float32)


In [21]:
train_ds = TensorDataset(X_train_t, y_train_t)
train_loader = DataLoader(train_ds, batch_size=512, shuffle=True)

input_size = X_train_t.shape[1]
print("Input size:", input_size)


Input size: 148


In [30]:
class MLP(nn.Module):
    def __init__(self, input_size):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(0.3),

            nn.Linear(256, 128),
            nn.BatchNorm1d(128),
            nn.ReLU(),
            nn.Dropout(0.2),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 1)
        )

    def forward(self, x):
        return self.net(x)


In [31]:
model = MLP(input_size)

criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001, weight_decay=1e-5)

print("Model Ready!")


Model Ready!


In [32]:
epochs = 20

for epoch in range(epochs):
    model.train()
    total_loss = 0

    for xb, yb in train_loader:
        optimizer.zero_grad()
        preds = model(xb).squeeze()
        loss = criterion(preds, yb)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs} - Loss: {total_loss:.4f}")


Epoch 1/20 - Loss: 129.8229
Epoch 2/20 - Loss: 119.5947
Epoch 3/20 - Loss: 119.0577
Epoch 4/20 - Loss: 119.0054
Epoch 5/20 - Loss: 118.8053
Epoch 6/20 - Loss: 118.7379
Epoch 7/20 - Loss: 118.4502
Epoch 8/20 - Loss: 118.2425
Epoch 9/20 - Loss: 112.2412
Epoch 10/20 - Loss: 89.8083
Epoch 11/20 - Loss: 70.5885
Epoch 12/20 - Loss: 58.3951
Epoch 13/20 - Loss: 48.9651
Epoch 14/20 - Loss: 43.5657
Epoch 15/20 - Loss: 43.6122
Epoch 16/20 - Loss: 42.2416
Epoch 17/20 - Loss: 40.2058
Epoch 18/20 - Loss: 37.6605
Epoch 19/20 - Loss: 36.2833
Epoch 20/20 - Loss: 34.8856


In [33]:
model.eval()
with torch.no_grad():
    logits = model(X_test_t).squeeze()
    test_preds = torch.sigmoid(logits).numpy()

auc = roc_auc_score(y_test, test_preds)
print("AUC:", auc)


AUC: 0.9927463064658267


In [34]:
best_f1 = 0
best_t = 0

for t in np.arange(0.01, 0.99, 0.01):
    pred_labels = (test_preds >= t).astype(int)
    f1 = f1_score(y_test, pred_labels)
    if f1 > best_f1:
        best_f1 = f1
        best_t = t

print("Best Threshold:", best_t)
print("Best F1:", best_f1)


Best Threshold: 0.04
Best F1: 0.9458286145715364


In [35]:
import joblib
import pandas as pd

# Save model
torch.save(model.state_dict(), "final_dl_model.pth")

# Save predictions
df_preds = pd.DataFrame({
    "prob_default": test_preds,
    "true_label": y_test.values
})
df_preds.to_csv("dl_predictions.csv", index=False)

print("Files saved: final_dl_model.pth, dl_predictions.csv")


Files saved: final_dl_model.pth, dl_predictions.csv
