In [None]:
from project_utils.autosave_plots import enable_autosave
from project_utils.plotting import plot_loss_curves

from typing import TypedDict, Tuple, Callable, Sized, cast

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.container import BarContainer
import random
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, TensorDataset, DataLoader
import torch.nn as nn
import torch
from tqdm.auto import tqdm
from sklearn.metrics import average_precision_score, precision_recall_curve

In [None]:
# save plots to results/
enable_autosave("deep_learning", quiet=True)

In [None]:
# enable retina plots
%matplotlib inline
%config InlineBackend.figure_format = "retina"

In [None]:
# set function for reproducibility
SEED = 42


def set_seed(seed: int = SEED) -> None:
    """Set seed for reproducibility."""
    torch.manual_seed(seed)
    random.seed(seed)
    np.random.seed(seed)

## Load the cleaned data

In [None]:
df = pd.read_csv("../data/processed/creditcard_clean.csv")
df.head()

## Prepare training and validation data

In [None]:
# separate out class from other features
X = df.drop(columns=["Class"])
y = df["Class"]

# split the data 80:20 into train and test_validation (we'll split the latter again)
X_train, X_test_validation, y_train, y_test_validation = train_test_split(
    X, y, test_size=0.2, random_state=SEED, stratify=y
)

# split the test_validation data into test and validation
X_test, X_validation, y_test, y_validation = train_test_split(
    X_test_validation,
    y_test_validation,
    test_size=0.5,
    random_state=SEED,
    stratify=y_test_validation,
)

In [None]:
# Check distribution
for name, labels in zip(
    ["Train", "Validation", "Test"], [y_train, y_validation, y_test]
):
    print(f"{name}: {len(labels)} samples, {labels.mean()*100:.3f}% fraud")

print(
    f"Shapes: X_train={X_train.shape}, X_validation={X_validation.shape}, X_test={X_test.shape}"
)

### Tensorification

In [None]:
# convert to numpy arrays
X_train_np = X_train.values.astype(np.float32)
y_train_np = y_train.values.astype(np.float32)

X_validation_np = X_validation.values.astype(np.float32)
y_validation_np = y_validation.values.astype(np.float32)

X_test_np = X_test.values.astype(np.float32)
y_test_np = y_test.values.astype(np.float32)

# convert to PyTorch tensors
X_train_tensor = torch.from_numpy(X_train_np)
y_train_tensor = torch.from_numpy(y_train_np).unsqueeze(
    1
)  # add extra dim for BCEWithLogitsLoss

X_validation_tensor = torch.from_numpy(X_validation_np)
y_validation_tensor = torch.from_numpy(y_validation_np).unsqueeze(
    1
)  # add extra dim for BCEWithLogitsLoss

X_test_tensor = torch.from_numpy(X_test_np)
y_test_tensor = torch.from_numpy(y_test_np).unsqueeze(
    1
)  # add extra dim for BCEWithLogitsLoss

# create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
validation_dataset = TensorDataset(X_validation_tensor, y_validation_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# create DataLoaders
Batch = Tuple[torch.Tensor, torch.Tensor]
set_seed(SEED)
train_loader: DataLoader[Batch] = DataLoader(
    cast(Dataset[Batch], train_dataset), batch_size=32, shuffle=True
)
set_seed(SEED)
validation_loader: DataLoader[Batch] = DataLoader(
    cast(Dataset[Batch], validation_dataset), batch_size=32, shuffle=False
)
set_seed(SEED)
test_loader: DataLoader[Batch] = DataLoader(
    cast(Dataset[Batch], test_dataset), batch_size=32, shuffle=False
)

# define class imbalance weights
pos_weight_value = (y_train == 0).sum() / (y_train == 1).sum()
pos_weight = torch.tensor([pos_weight_value], dtype=torch.float32).to("cpu")
print(f"Positive class weight: {pos_weight.item():.4f}")

## Define training and validation loop

In [None]:
# define type for criterion
Criterion = Callable[[torch.Tensor, torch.Tensor], torch.Tensor]


class TrainResults(TypedDict):
    name: str
    model: nn.Module
    train_losses: list[float]
    val_losses: list[float]
    val_aps: float


def train_and_evaluate_model(
    name: str,
    model: nn.Module,
    optimizer: torch.optim.Optimizer,
    criterion: Criterion,
    train_loader: DataLoader[Batch],
    X_val_tensor: torch.Tensor,
    y_val_np: np.ndarray,
    num_epochs: int = 30,
) -> TrainResults:
    train_losses: list[float] = []
    val_losses: list[float] = []
    dataset_size = len(cast(Sized, train_loader.dataset))

    for epoch in tqdm(range(num_epochs), desc=f"Training: {name}"):
        model.train()
        running_loss = 0.0

        for X_batch, y_batch in train_loader:
            optimizer.zero_grad()
            outputs = model(X_batch)  # (batch, 1)
            loss = criterion(outputs, y_batch)  # BCEWithLogitsLoss
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * X_batch.size(0)

        epoch_train_loss = running_loss / dataset_size
        train_losses.append(epoch_train_loss)

        # validation loss
        model.eval()
        with torch.no_grad():
            val_logits = model(X_val_tensor)
            val_loss = criterion(
                val_logits, torch.from_numpy(y_val_np).unsqueeze(1).float()
            )
            epoch_val_loss = val_loss.item()
            val_losses.append(epoch_val_loss)

    # final APS
    model.eval()
    with torch.no_grad():
        val_logits = model(X_val_tensor)
        val_probs = torch.sigmoid(val_logits).numpy().ravel()
    aps = average_precision_score(y_val_np, val_probs)

    return {
        "name": name,
        "model": model,
        "train_losses": train_losses,
        "val_losses": val_losses,
        "val_aps": aps,
    }

In [None]:
# collect experiments and models
experiments = {}
trained_models = {}

## Baseline model

In [None]:
class CreditCardFraudModel(nn.Module):
    def __init__(self, input_dim: int) -> None:
        super(CreditCardFraudModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return cast(torch.Tensor, self.network(x))

### With class imbalance

In [None]:
# MLP v0 with pos_weights
set_seed(SEED)  # reset rng
model_v0 = CreditCardFraudModel(input_dim=X_train.shape[1])
criterion_v0 = nn.BCEWithLogitsLoss(pos_weight=pos_weight)
optimizer_v0 = torch.optim.Adam(model_v0.parameters(), lr=0.001)

result_v0 = train_and_evaluate_model(
    name="MLP_v0_pos_weights",
    model=model_v0,
    criterion=criterion_v0,
    optimizer=optimizer_v0,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v0["name"]] = {
    "version": 0,
    "val_aps": result_v0["val_aps"],
    "final_train_loss": result_v0["train_losses"][-1],
    "final_val_loss": result_v0["val_losses"][-1],
}

trained_models[result_v0["name"]] = result_v0["model"]

In [None]:
# results
print(f"Experiment: {result_v0['name']}")
print(f"Validation APS: {result_v0['val_aps']:.4f}")
plot_loss_curves(
    result_v0["train_losses"],
    result_v0["val_losses"],
    title=f"{result_v0['name']} Loss Curves",
    smoothing=3,
)

### Without class imbalance

In [None]:
# MLP v0.1 without pos_weights
set_seed(SEED)  # reset rng
model_v0_1 = CreditCardFraudModel(input_dim=X_train.shape[1])
criterion_v0_1 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v0_1 = torch.optim.Adam(model_v0_1.parameters(), lr=0.001)

result_v0_1 = train_and_evaluate_model(
    name="MLP_v0.1_without_pos_weights",
    model=model_v0_1,
    criterion=criterion_v0_1,
    optimizer=optimizer_v0_1,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v0_1["name"]] = {
    "version": 0.1,
    "val_aps": result_v0_1["val_aps"],
    "final_train_loss": result_v0_1["train_losses"][-1],
    "final_val_loss": result_v0_1["val_losses"][-1],
}

trained_models[result_v0_1["name"]] = result_v0_1["model"]

In [None]:
# results
print(f"Experiment: {result_v0_1['name']}")
print(f"Validation APS: {result_v0_1['val_aps']:.4f}")
plot_loss_curves(
    result_v0_1["train_losses"],
    result_v0_1["val_losses"],
    title=f"{result_v0_1['name']} Loss Curves",
    smoothing=1,
)

## 2 Layers

In [None]:
class CreditCardFraudModelDense(nn.Module):
    def __init__(
        self, input_dim: int, use_dropout: bool = False, p: float = 0.3
    ) -> None:
        super(CreditCardFraudModelDense, self).__init__()
        layers = [
            nn.Linear(input_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 32),
            nn.ReLU(),
        ]

        if use_dropout:
            layers.append(nn.Dropout(p=p))

        layers.append(nn.Linear(32, 1))

        self.network = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return cast(torch.Tensor, self.network(x))

In [None]:
# MLP v1 without pos_weights
set_seed(SEED)  # reset rng
model_v1 = CreditCardFraudModelDense(input_dim=X_train.shape[1])
criterion_v1 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v1 = torch.optim.Adam(model_v1.parameters(), lr=0.001)

result_v1 = train_and_evaluate_model(
    name="MLP_v1_without_pos_weights",
    model=model_v1,
    criterion=criterion_v1,
    optimizer=optimizer_v1,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v1["name"]] = {
    "version": 1,
    "val_aps": result_v1["val_aps"],
    "final_train_loss": result_v1["train_losses"][-1],
    "final_val_loss": result_v1["val_losses"][-1],
}

trained_models[result_v1["name"]] = result_v1["model"]

In [None]:
# results
print(f"Experiment: {result_v1['name']}")
print(f"Validation APS: {result_v1['val_aps']:.4f}")
plot_loss_curves(
    result_v1["train_losses"],
    result_v1["val_losses"],
    title=f"{result_v1['name']} Loss Curves",
    smoothing=1,
)

In [None]:
# MLP v1.1 without pos_weights 1e-4 lr
set_seed(SEED)  # reset rng
model_v1_1 = CreditCardFraudModelDense(input_dim=X_train.shape[1])
criterion_v1_1 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v1_1 = torch.optim.Adam(model_v1_1.parameters(), lr=0.0001)

result_v1_1 = train_and_evaluate_model(
    name="MLP_v1.1_without_pos_weights",
    model=model_v1_1,
    criterion=criterion_v1_1,
    optimizer=optimizer_v1_1,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v1_1["name"]] = {
    "version": 1.1,
    "val_aps": result_v1_1["val_aps"],
    "final_train_loss": result_v1_1["train_losses"][-1],
    "final_val_loss": result_v1_1["val_losses"][-1],
}

trained_models[result_v1_1["name"]] = result_v1_1["model"]

In [None]:
# results
print(f"Experiment: {result_v1_1['name']}")
print(f"Validation APS: {result_v1_1['val_aps']:.4f}")
plot_loss_curves(
    result_v1_1["train_losses"],
    result_v1_1["val_losses"],
    title=f"{result_v1_1['name']} Loss Curves",
    smoothing=1,
)

### 2 layers + dropout

In [None]:
# MLP v1.2 without pos_weights,  1e-4 lr, dropout 0.3
set_seed(SEED)  # reset rng
model_v1_2 = CreditCardFraudModelDense(
    input_dim=X_train.shape[1], use_dropout=True, p=0.3
)
criterion_v1_2 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v1_2 = torch.optim.Adam(model_v1_2.parameters(), lr=0.0001)

result_v1_2 = train_and_evaluate_model(
    name="MLP_v1.2_without_pos_weights",
    model=model_v1_2,
    criterion=criterion_v1_2,
    optimizer=optimizer_v1_2,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v1_2["name"]] = {
    "version": 1.2,
    "val_aps": result_v1_2["val_aps"],
    "final_train_loss": result_v1_2["train_losses"][-1],
    "final_val_loss": result_v1_2["val_losses"][-1],
}

trained_models[result_v1_2["name"]] = result_v1_2["model"]

In [None]:
# results
print(f"Experiment: {result_v1_2['name']}")
print(f"Validation APS: {result_v1_2['val_aps']:.4f}")
plot_loss_curves(
    result_v1_2["train_losses"],
    result_v1_2["val_losses"],
    title=f"{result_v1_2['name']} Loss Curves",
    smoothing=1,
)

### 2 layers + dropout + mild class imbalance

In [None]:
# MLP v1.3 without pos_weights,  1e-4 lr, dropout 0.3
set_seed(SEED)  # reset rng
model_v1_3 = CreditCardFraudModelDense(
    input_dim=X_train.shape[1], use_dropout=True, p=0.3
)
criterion_v1_3 = nn.BCEWithLogitsLoss(pos_weight=torch.sqrt(pos_weight))
optimizer_v1_3 = torch.optim.Adam(model_v1_3.parameters(), lr=0.0001)

result_v1_3 = train_and_evaluate_model(
    name="MLP_v1.3_with_sqrt_pos_weights",
    model=model_v1_3,
    criterion=criterion_v1_3,
    optimizer=optimizer_v1_3,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v1_3["name"]] = {
    "version": 1.3,
    "val_aps": result_v1_3["val_aps"],
    "final_train_loss": result_v1_3["train_losses"][-1],
    "final_val_loss": result_v1_3["val_losses"][-1],
}

trained_models[result_v1_3["name"]] = result_v1_3["model"]

In [None]:
# results
print(f"Experiment: {result_v1_3['name']}")
print(f"Validation APS: {result_v1_3['val_aps']:.4f}")
plot_loss_curves(
    result_v1_3["train_losses"],
    result_v1_3["val_losses"],
    title=f"{result_v1_3['name']} Loss Curves",
    smoothing=1,
)

## 3 Layers (wider)

In [None]:
class CreditCardFraudModelDenseWide(nn.Module):
    def __init__(
        self, input_dim: int, use_dropout: bool = False, p: float = 0.3
    ) -> None:
        super(CreditCardFraudModelDenseWide, self).__init__()
        layers = [
            nn.Linear(input_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
        ]

        if use_dropout:
            layers.append(nn.Dropout(p=p))

        layers.extend(
            [
                nn.Linear(32, 16),
                nn.ReLU(),
                nn.Linear(16, 1),
            ]
        )

        self.network = nn.Sequential(*layers)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return cast(torch.Tensor, self.network(x))

In [None]:
# MLP v2 without pos_weights,  1e-4 lr, dropout 0.3
set_seed(SEED)  # reset rng
model_v2 = CreditCardFraudModelDenseWide(
    input_dim=X_train.shape[1], use_dropout=True, p=0.3
)
criterion_v2 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v2 = torch.optim.Adam(model_v2.parameters(), lr=0.0001)

result_v2 = train_and_evaluate_model(
    name="MLP_v2_without_pos_weights",
    model=model_v2,
    criterion=criterion_v2,
    optimizer=optimizer_v2,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v2["name"]] = {
    "version": 2,
    "val_aps": result_v2["val_aps"],
    "final_train_loss": result_v2["train_losses"][-1],
    "final_val_loss": result_v2["val_losses"][-1],
}

trained_models[result_v2["name"]] = result_v2["model"]

In [None]:
# results
print(f"Experiment: {result_v2['name']}")
print(f"Validation APS: {result_v2['val_aps']:.4f}")
plot_loss_curves(
    result_v2["train_losses"],
    result_v2["val_losses"],
    title=f"{result_v2['name']} Loss Curves",
    smoothing=1,
)

### 3 Layers + aggressive dropout

In [None]:
# MLP v2.1 without pos_weights,  1e-4 lr, dropout 0.5
set_seed(SEED)  # reset rng
model_v2_1 = CreditCardFraudModelDenseWide(
    input_dim=X_train.shape[1], use_dropout=True, p=0.5
)
criterion_v2_1 = nn.BCEWithLogitsLoss(pos_weight=None)
optimizer_v2_1 = torch.optim.Adam(model_v2_1.parameters(), lr=0.0001)

result_v2_1 = train_and_evaluate_model(
    name="MLP_v2.1_without_pos_weights",
    model=model_v2_1,
    criterion=criterion_v2_1,
    optimizer=optimizer_v2_1,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v2_1["name"]] = {
    "version": 2.1,
    "val_aps": result_v2_1["val_aps"],
    "final_train_loss": result_v2_1["train_losses"][-1],
    "final_val_loss": result_v2_1["val_losses"][-1],
}

trained_models[result_v2_1["name"]] = result_v2_1["model"]

In [None]:
# results
print(f"Experiment: {result_v2_1['name']}")
print(f"Validation APS: {result_v2_1['val_aps']:.4f}")
plot_loss_curves(
    result_v2_1["train_losses"],
    result_v2_1["val_losses"],
    title=f"{result_v2_1['name']} Loss Curves",
    smoothing=1,
)

### 3 layers + milder class imbalance

In [None]:
# MLP v2.2 with sqrt pos_weights,  1e-4 lr, dropout 0.3
set_seed(SEED)  # reset rng
model_v2_2 = CreditCardFraudModelDenseWide(
    input_dim=X_train.shape[1], use_dropout=True, p=0.3
)
criterion_v2_2 = nn.BCEWithLogitsLoss(pos_weight=pos_weight.sqrt())
optimizer_v2_2 = torch.optim.Adam(model_v2_2.parameters(), lr=0.0001)

result_v2_2 = train_and_evaluate_model(
    name="MLP_v2.2_with_sqrt_pos_weights",
    model=model_v2_2,
    criterion=criterion_v2_2,
    optimizer=optimizer_v2_2,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v2_2["name"]] = {
    "version": 2.2,
    "val_aps": result_v2_2["val_aps"],
    "final_train_loss": result_v2_2["train_losses"][-1],
    "final_val_loss": result_v2_2["val_losses"][-1],
}

trained_models[result_v2_2["name"]] = result_v2_2["model"]

In [None]:
# results
print(f"Experiment: {result_v2_2['name']}")
print(f"Validation APS: {result_v2_2['val_aps']:.4f}")
plot_loss_curves(
    result_v2_2["train_losses"],
    result_v2_2["val_losses"],
    title=f"{result_v2_2['name']} Loss Curves",
    smoothing=1,
)

### 3 layers + milder class imbalance

In [None]:
# MLP v2.3 with one-tenth sqrt pos_weights,  1e-4 lr, dropout 0.3
set_seed(SEED)  # reset rng
model_v2_3 = CreditCardFraudModelDenseWide(
    input_dim=X_train.shape[1], use_dropout=True, p=0.3
)
criterion_v2_3 = nn.BCEWithLogitsLoss(pos_weight=pos_weight.sqrt() / 10)
optimizer_v2_3 = torch.optim.Adam(model_v2_3.parameters(), lr=0.0001)

result_v2_3 = train_and_evaluate_model(
    name="MLP_v2.3_with_one_tenth_sqrt_pos_weights",
    model=model_v2_3,
    criterion=criterion_v2_3,
    optimizer=optimizer_v2_3,
    train_loader=train_loader,
    X_val_tensor=X_validation_tensor,
    y_val_np=y_validation_np,
    num_epochs=50,
)

experiments[result_v2_3["name"]] = {
    "version": 2.3,
    "val_aps": result_v2_3["val_aps"],
    "final_train_loss": result_v2_3["train_losses"][-1],
    "final_val_loss": result_v2_3["val_losses"][-1],
}

trained_models[result_v2_3["name"]] = result_v2_3["model"]

In [None]:
# results
print(f"Experiment: {result_v2_3['name']}")
print(f"Validation APS: {result_v2_3['val_aps']:.4f}")
plot_loss_curves(
    result_v2_3["train_losses"],
    result_v2_3["val_losses"],
    title=f"{result_v2_3['name']} Loss Curves",
    smoothing=1,
)

## Results

### Experiment results

In [None]:
# plot validation APS for all experiments
experiments_val_aps = pd.DataFrame(experiments).T.sort_values(
    by="version", ascending=True
)["val_aps"]
plt.figure(figsize=(10, 4))
ax = sns.barplot(x=experiments_val_aps.index, y=experiments_val_aps.values)
ax.bar_label(
    cast(BarContainer, ax.containers[0]),
    fmt="%.4f",
    label_type="edge",
    padding=2,
    fontsize=8,
)
plt.xticks(rotation=45, ha="right")
plt.xlabel("Model")
plt.ylabel("Average Precision Score")
plt.title("Deep Learning Model Comparison on Validation Set")
plt.show()

### Evaluate best model on validation set

In [None]:
# choose MLP model
set_seed(SEED)
model_v1_2.eval()
with torch.no_grad():
    val_logits_v1_2 = model_v1_2(X_validation_tensor)
    val_probs_v1_2 = torch.sigmoid(val_logits_v1_2).numpy().ravel()
aps = average_precision_score(y_validation_np, val_probs_v1_2)
print(f"Validation APS for MLP_v1.2_without_pos_weights: {aps:.4f}")

# choose MLP model
set_seed(SEED)
model_v2_3.eval()
with torch.no_grad():
    val_logits_v2_3 = model_v2_3(X_validation_tensor)
    val_probs_v2_3 = torch.sigmoid(val_logits_v2_3).numpy().ravel()
aps = average_precision_score(y_validation_np, val_probs_v2_3)
print(f"Validation APS for MLP_v2.3_without_pos_weights: {aps:.4f}")

### Compare against ML baselines

In [None]:
# load ml baseline scores
ml_baseline_scores = pd.read_csv("../results/ml_baselines/validation_scores.csv")

In [None]:
# calculate precision-recall curves
validation_precision_v14, validation_recall_v14, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["V14"]
)
validation_precision_lr, validation_recall_lr, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["Logistic_Regression"]
)
validation_precision_rf, validation_recall_rf, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["Random_Forest"]
)
validation_precision_xgb, validation_recall_xgb, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["XGBoost"]
)
validation_precision_model_v1_2, validation_recall_model_v1_2, _ = (
    precision_recall_curve(y_validation_np, val_probs_v1_2)
)
# calculate average precision scores
validation_aps_v14 = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["V14"]
)
validation_aps_lr = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["Logistic_Regression"]
)
validation_aps_rf = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["Random_Forest"]
)
validation_aps_xgb = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["XGBoost"]
)
validation_aps_model_v1_2 = average_precision_score(y_validation_np, val_probs_v1_2)

# plot precision-recall curves
plt.figure(figsize=(6, 5))

# plot V14 VRC
plt.plot(
    validation_precision_v14,
    validation_recall_v14,
    lw=2,
    label=f"V14 | APS = {validation_aps_v14:.3f}",
)
plt.fill_between(validation_precision_v14, validation_recall_v14, alpha=0.2)

# plot LT PRC
plt.plot(
    validation_precision_lr,
    validation_recall_lr,
    lw=2,
    label=f"LT | APS = {validation_aps_lr:.3f}",
)
plt.fill_between(validation_precision_lr, validation_recall_lr, alpha=0.2)

# plot RF PRC
plt.plot(
    validation_precision_rf,
    validation_recall_rf,
    lw=2,
    label=f"RF | APS = {validation_aps_rf:.3f}",
)
plt.fill_between(validation_precision_rf, validation_recall_rf, alpha=0.2)

# plot XGB PRC
plt.plot(
    validation_precision_xgb,
    validation_recall_xgb,
    lw=2,
    label=f"XGB | APS = {validation_aps_xgb:.3f}",
)
plt.fill_between(validation_precision_xgb, validation_recall_xgb, alpha=0.2)

# plot MLP v1.2 PRC
plt.plot(
    validation_precision_model_v1_2,
    validation_recall_model_v1_2,
    lw=2,
    label=f"MLP v1.2 | APS = {validation_aps_model_v1_2:.3f}",
)
plt.fill_between(
    validation_precision_model_v1_2, validation_recall_model_v1_2, alpha=0.2
)

plt.xlabel("Recall (frauds caught)")
plt.ylabel("Precision (alerts correct)")
plt.title(
    "Precision-Recall Curve | Validation Set | V14 vs Classic ML Baselines vs Deep Learning"
)
plt.legend()
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.grid()
plt.tight_layout()
plt.show()

In [None]:
# barplot aps by model
model_names = ["V14", "Logistic Regression", "Random Forest", "XGBoost", "MLP v1.2"]
validation_aps_scores = [
    validation_aps_v14,
    validation_aps_lr,
    validation_aps_rf,
    validation_aps_xgb,
    validation_aps_model_v1_2,
]
plt.figure(figsize=(8, 4))
ax = sns.barplot(
    x=model_names,
    y=validation_aps_scores,
    hue=model_names,
    palette=["gray", "blue", "green", "orange", "red"],
)
# add value labels
for container in ax.containers:
    ax.bar_label(
        cast(BarContainer, container),
        fmt="%.4f",
        label_type="edge",
        padding=2,
        fontsize=8,
    )
plt.ylabel("Average Precision Score (APS)")
plt.title("Model Comparison on Validation Set")
plt.ylim(0, 1)
plt.tight_layout()
plt.show()

### Final evaluation on test set

In [None]:
# choose MLP model
set_seed(SEED)
model_v1_2.eval()
with torch.no_grad():
    test_logits_v1_2 = model_v1_2(X_test_tensor)
    test_probs_v1_2 = torch.sigmoid(test_logits_v1_2).numpy().ravel()
aps = average_precision_score(y_test_np, test_probs_v1_2)
print(f"Test APS for MLP_v1.2_without_pos_weights: {aps:.4f}")

In [None]:
# load ml baseline scores
ml_baseline_scores = pd.read_csv("../results/ml_baselines/test_scores.csv")

In [None]:
# calculate precision-recall curves
test_precision_v14, test_recall_v14, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["V14"]
)
test_precision_lr, test_recall_lr, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["Logistic_Regression"]
)
test_precision_rf, test_recall_rf, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["Random_Forest"]
)
test_precision_xgb, test_recall_xgb, _ = precision_recall_curve(
    ml_baseline_scores["y_true"], ml_baseline_scores["XGBoost"]
)
test_precision_model_v1_2, test_recall_model_v1_2, _ = precision_recall_curve(
    y_test_np, test_probs_v1_2
)
# calculate average precision scores
test_aps_v14 = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["V14"]
)
test_aps_lr = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["Logistic_Regression"]
)
test_aps_rf = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["Random_Forest"]
)
test_aps_xgb = average_precision_score(
    ml_baseline_scores["y_true"], ml_baseline_scores["XGBoost"]
)
test_aps_model_v1_2 = average_precision_score(y_test_np, test_probs_v1_2)

# plot precision-recall curves
plt.figure(figsize=(6, 5))

# plot V14 VRC
plt.plot(
    test_precision_v14, test_recall_v14, lw=2, label=f"V14 | APS = {test_aps_v14:.3f}"
)
plt.fill_between(test_precision_v14, test_recall_v14, alpha=0.2)

# plot LT PRC
plt.plot(test_precision_lr, test_recall_lr, lw=2, label=f"LT | APS = {test_aps_lr:.3f}")
plt.fill_between(test_precision_lr, test_recall_lr, alpha=0.2)

# plot RF PRC
plt.plot(test_precision_rf, test_recall_rf, lw=2, label=f"RF | APS = {test_aps_rf:.3f}")
plt.fill_between(test_precision_rf, test_recall_rf, alpha=0.2)

# plot XGB PRC
plt.plot(
    test_precision_xgb, test_recall_xgb, lw=2, label=f"XGB | APS = {test_aps_xgb:.3f}"
)
plt.fill_between(test_precision_xgb, test_recall_xgb, alpha=0.2)

# plot MLP v1.2 PRC
plt.plot(
    test_precision_model_v1_2,
    test_recall_model_v1_2,
    lw=2,
    label=f"MLP v1.2 | APS = {test_aps_model_v1_2:.3f}",
)
plt.fill_between(test_precision_model_v1_2, test_recall_model_v1_2, alpha=0.2)

plt.xlabel("Recall (frauds caught)")
plt.ylabel("Precision (alerts correct)")
plt.title(
    "Precision-Recall Curve | Test Set | V14 vs Classic ML Baselines vs Deep Learning"
)
plt.legend()
plt.xlim(0, 1)
plt.ylim(0, 1)
plt.grid()
plt.tight_layout()
plt.show()

In [None]:
# barplot aps by model
model_names = ["V14", "Logistic Regression", "Random Forest", "XGBoost", "MLP v1.2"]
test_aps_scores = [
    test_aps_v14,
    test_aps_lr,
    test_aps_rf,
    test_aps_xgb,
    test_aps_model_v1_2,
]
plt.figure(figsize=(8, 4))
ax = sns.barplot(
    x=model_names,
    y=test_aps_scores,
    hue=model_names,
    palette=["gray", "blue", "green", "orange", "red"],
)
# add value labels
for container in ax.containers:
    ax.bar_label(
        cast(BarContainer, container),
        fmt="%.4f",
        label_type="edge",
        padding=2,
        fontsize=8,
    )
plt.ylabel("Average Precision Score (APS)")
plt.title("Model Comparison on Test Set")
plt.ylim(0, 1)
plt.tight_layout()
plt.show()