# ü§ñ Deep Learning Models for Credit Card Fraud Detection

## MLZoomcamp Capstone Project - Model Development

---

### üìã Overview

This notebook implements and compares multiple deep learning architectures for fraud detection:
1. **Baseline Autoencoder** - Simple reconstruction-based anomaly detection
2. **Variational Autoencoder (VAE)** - Probabilistic approach with KL divergence
3. **Deep SVDD** - Sequential pattern learning
4. **Ensemble Model** - Combining multiple approaches

### üéØ Objectives

- Train models on normal transactions only (semi-supervised)
- Use reconstruction error as anomaly score
- Compare performance across architectures
- Optimize thresholds for business metrics
- Save best models for deployment

---

**Added in this update:** Baseline **IsolationForest**, **Deep SVDD**, and removal of  (sequence length is 1 in this dataset).


## 1. Environment Setup

In [34]:
# Core libraries
import os
import sys
import warnings
import json
import pickle
import time
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Tuple, Optional
warnings.filterwarnings('ignore')

# Data manipulation
import numpy as np
import pandas as pd
from scipy import stats

# Visualization
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# PyTorch
import platform
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import Adam, AdamW
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR, OneCycleLR
from torch.nn.utils import clip_grad_norm_

# Scikit-learn
from sklearn.metrics import (
    roc_auc_score, average_precision_score, precision_recall_curve,
    roc_curve, confusion_matrix, classification_report,
    f1_score, precision_score, recall_score
)

# Set device
def get_device():
    """Get the best available device for M4 Mac"""
    if torch.backends.mps.is_available():
        # Metal Performance Shaders for Apple Silicon
        device = torch.device("mps")
        print(f"üéØ Using Metal Performance Shaders (MPS) on {platform.processor()}")
        
        # Optional: Set memory fraction to avoid OOM errors
        # torch.mps.set_per_process_memory_fraction(0.0)  # 0.0 means no limit
        
        return device
    elif torch.cuda.is_available():
        # Fallback to CUDA if available (unlikely on Mac)
        device = torch.device("cuda")
        print(f"üî• Using CUDA GPU")
        return device
    else:
        # CPU fallback
        device = torch.device("cpu")
        print(f"üíª Using CPU (No MPS or CUDA available)")
        return device

device = get_device()

# Set random seeds
def set_seed(seed=42):
    """
    Set random seeds for reproducibility across different devices.
    Works with CPU, CUDA, and MPS (Apple Silicon).
    """
    # Python random
    import random
    random.seed(seed)
    
    # Numpy
    np.random.seed(seed)
    
    # PyTorch
    torch.manual_seed(seed)
    
    # CUDA specific (only if available)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)  # For multi-GPU
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False
    
    # Set Python hash seed for reproducibility
    os.environ['PYTHONHASHSEED'] = str(seed)
    
    print(f"‚úÖ Random seed set to {seed} for all devices")

# Set the seed
set_seed(42)

# Custom colors
COLORS = {
    'normal': '#2E7D32',
    'fraud': '#C62828',
    'primary': '#1565C0',
    'secondary': '#FF6F00'
}

print("‚úÖ Environment setup complete")

üéØ Using Metal Performance Shaders (MPS) on arm
‚úÖ Random seed set to 42 for all devices
‚úÖ Environment setup complete


## 2. Load Preprocessed Data

In [35]:
# Load preprocessed data
artifacts_dir = Path('artifacts')

# Load arrays
X_train_scaled = np.load(artifacts_dir / 'X_train_scaled.npy')
X_val_scaled = np.load(artifacts_dir / 'X_val_scaled.npy')
X_test_scaled = np.load(artifacts_dir / 'X_test_scaled.npy')
y_train = np.load(artifacts_dir / 'y_train.npy')
y_val = np.load(artifacts_dir / 'y_val.npy')
y_test = np.load(artifacts_dir / 'y_test.npy')

# Load configuration
with open(artifacts_dir / 'config.json', 'r') as f:
    config = json.load(f)

n_features = config['n_features']
batch_size = config['batch_size']

print("‚úÖ Data loaded successfully")
print(f"\nüìä Data shapes:")
print(f"  X_train: {X_train_scaled.shape}")
print(f"  X_val: {X_val_scaled.shape}")
print(f"  X_test: {X_test_scaled.shape}")
print(f"\n‚öôÔ∏è Configuration:")
print(f"  Number of features: {n_features}")
print(f"  Batch size: {batch_size}")

‚úÖ Data loaded successfully

üìä Data shapes:
  X_train: (199364, 88)
  X_val: (42721, 88)
  X_test: (42722, 88)

‚öôÔ∏è Configuration:
  Number of features: 88
  Batch size: 256


## 2.1 Baseline: IsolationForest (normal-only training)

IsolationForest provides a fast unsupervised baseline. We fit it on **normal train** only and score validation/test.


In [36]:
from sklearn.ensemble import IsolationForest
from sklearn.metrics import roc_auc_score, average_precision_score

# Fit on NORMAL train only
normal_idx = (y_train == 0)
X_train_normal = X_train_scaled[normal_idx]

iso = IsolationForest(n_estimators=200, contamination="auto", random_state=42, n_jobs=-1)
iso.fit(X_train_normal)

def iso_score(X):
    return -iso.decision_function(X)  # higher => more anomalous

val_scores_iso = iso_score(X_val_scaled)
test_scores_iso = iso_score(X_test_scaled)

iso_val_auroc = roc_auc_score(y_val, val_scores_iso)
iso_val_auprc = average_precision_score(y_val, val_scores_iso)
iso_test_auroc = roc_auc_score(y_test, test_scores_iso)
iso_test_auprc = average_precision_score(y_test, test_scores_iso)

print(f"IsolationForest VAL  AUROC={iso_val_auroc:.4f} AUPRC={iso_val_auprc:.4f}")
print(f"IsolationForest TEST AUROC={iso_test_auroc:.4f} AUPRC={iso_test_auprc:.4f}")


IsolationForest VAL  AUROC=0.9456 AUPRC=0.0241
IsolationForest TEST AUROC=0.9486 AUPRC=0.0355


In [37]:
# Create PyTorch datasets
class FraudDataset(Dataset):
    def __init__(self, X, y=None):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y) if y is not None else None
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        if self.y is not None:
            return self.X[idx], self.y[idx]
        return self.X[idx]

# Create datasets
train_dataset = FraudDataset(X_train_scaled, y_train)
val_dataset = FraudDataset(X_val_scaled, y_val)
test_dataset = FraudDataset(X_test_scaled, y_test)

# Create dataset with only normal samples for training
normal_idx = y_train == 0
X_train_normal = X_train_scaled[normal_idx]
y_train_normal = y_train[normal_idx]
train_dataset_normal = FraudDataset(X_train_normal, y_train_normal)

# Create data loaders
train_loader_normal = DataLoader(train_dataset_normal, batch_size=batch_size, shuffle=True, num_workers=0)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)

print(f"\nüì¶ DataLoaders created:")
print(f"  Train (normal only): {len(train_loader_normal)} batches, {len(train_dataset_normal)} samples")
print(f"  Train (all): {len(train_loader)} batches, {len(train_dataset)} samples")
print(f"  Validation: {len(val_loader)} batches, {len(val_dataset)} samples")
print(f"  Test: {len(test_loader)} batches, {len(test_dataset)} samples")


üì¶ DataLoaders created:
  Train (normal only): 778 batches, 198980 samples
  Train (all): 779 batches, 199364 samples
  Validation: 167 batches, 42721 samples
  Test: 167 batches, 42722 samples


## 3. Model Architectures

### 3.1 Baseline Autoencoder

In [38]:
class BaselineAutoencoder(nn.Module):
    """Baseline autoencoder for anomaly detection"""
    
    def __init__(self, input_dim, encoding_dim=32, hidden_dims=[64, 48]):
        super(BaselineAutoencoder, self).__init__()
        
        self.input_dim = input_dim
        self.encoding_dim = encoding_dim
        
        # Build encoder
        encoder_layers = []
        prev_dim = input_dim
        
        for hidden_dim in hidden_dims:
            encoder_layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_dim = hidden_dim
        
        encoder_layers.append(nn.Linear(prev_dim, encoding_dim))
        self.encoder = nn.Sequential(*encoder_layers)
        
        # Build decoder (mirror of encoder)
        decoder_layers = []
        prev_dim = encoding_dim
        
        for hidden_dim in reversed(hidden_dims):
            decoder_layers.extend([
                nn.Linear(prev_dim, hidden_dim),
                nn.BatchNorm1d(hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            prev_dim = hidden_dim
        
        decoder_layers.append(nn.Linear(prev_dim, input_dim))
        self.decoder = nn.Sequential(*decoder_layers)
        
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded
    
    def encode(self, x):
        return self.encoder(x)
    
    def decode(self, z):
        return self.decoder(z)

# Initialize baseline model
baseline_model = BaselineAutoencoder(
    input_dim=n_features,
    encoding_dim=32,
    hidden_dims=[64, 48]
).to(device)

print("‚úÖ Baseline Autoencoder initialized")
print(f"   Parameters: {sum(p.numel() for p in baseline_model.parameters()):,}")

‚úÖ Baseline Autoencoder initialized
   Parameters: 21,272


### 3.2 Variational Autoencoder (VAE)

In [39]:
class VAE(nn.Module):
    """Variational Autoencoder for anomaly detection"""
    
    def __init__(self, input_dim, latent_dim=20, hidden_dim=64):
        super(VAE, self).__init__()
        
        self.input_dim = input_dim
        self.latent_dim = latent_dim
        
        # Encoder
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.bn1 = nn.BatchNorm1d(hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim//2)
        self.bn2 = nn.BatchNorm1d(hidden_dim//2)
        
        # Latent space
        self.fc_mu = nn.Linear(hidden_dim//2, latent_dim)
        self.fc_logvar = nn.Linear(hidden_dim//2, latent_dim)
        
        # Decoder
        self.fc3 = nn.Linear(latent_dim, hidden_dim//2)
        self.bn3 = nn.BatchNorm1d(hidden_dim//2)
        self.fc4 = nn.Linear(hidden_dim//2, hidden_dim)
        self.bn4 = nn.BatchNorm1d(hidden_dim)
        self.fc5 = nn.Linear(hidden_dim, input_dim)
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.2)
        
    def encode(self, x):
        h = self.relu(self.bn1(self.fc1(x)))
        h = self.dropout(h)
        h = self.relu(self.bn2(self.fc2(h)))
        h = self.dropout(h)
        
        mu = self.fc_mu(h)
        logvar = self.fc_logvar(h)
        
        # Clamp logvar to prevent explosion
        logvar = torch.clamp(logvar, min=-20, max=2)
        
        return mu, logvar
    
    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std
    
    def decode(self, z):
        h = self.relu(self.bn3(self.fc3(z)))
        h = self.dropout(h)
        h = self.relu(self.bn4(self.fc4(h)))
        h = self.dropout(h)
        return self.fc5(h)
    
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z)
        return recon, mu, logvar

# Initialize VAE
vae_model = VAE(
    input_dim=n_features,
    latent_dim=20,
    hidden_dim=64
).to(device)

print("‚úÖ VAE initialized")
print(f"   Parameters: {sum(p.numel() for p in vae_model.parameters()):,}")

‚úÖ VAE initialized
   Parameters: 17,984


### 3.3 Deep SVDD (replacement for LSTM)

Deep SVDD learns a representation that keeps **normal** samples close to a learned center **c**. The anomaly score is the squared distance to **c**.


In [40]:
class DeepSVDD(nn.Module):
    def __init__(
        self,
        input_dim: int,
        hidden_dims: list = [128, 64],
        rep_dim: int = 16,
        activation: str = "relu",
        dropout_rate: float = 0.0,
    ):
        super().__init__()
        act = nn.ReLU() if activation == "relu" else nn.LeakyReLU(0.1)

        layers = []
        prev = input_dim
        for h in hidden_dims:
            layers += [nn.Linear(prev, h), act]
            if dropout_rate > 0:
                layers += [nn.Dropout(dropout_rate)]
            prev = h
        layers += [nn.Linear(prev, rep_dim)]
        self.net = nn.Sequential(*layers)

        # IMPORTANT: buffer for center
        self.register_buffer("center_c", torch.zeros(rep_dim))

    def forward(self, x):
        return self.net(x)


In [41]:
# Initialize Deep SVDD model
svdd_model = DeepSVDD(
    input_dim=n_features,
    hidden_dims=[128, 64],
    rep_dim=16,
    activation='relu',
    dropout_rate=0.0
).to(device)

print("‚úÖ Deep SVDD initialized")
print(f"   Parameters: {sum(p.numel() for p in svdd_model.parameters()):,}")


‚úÖ Deep SVDD initialized
   Parameters: 20,688


## 4. Training Utilities

In [42]:
class EarlyStopping:
    def __init__(self, patience=10, verbose=False, path=None):
        self.patience = patience
        self.verbose = verbose
        self.path = path

        self.counter = 0
        self.best_score = None
        self.early_stop = False

    def __call__(self, score, model):
        if self.best_score is None:
            self.best_score = score
            self._save_checkpoint(model)
            return

        if score <= self.best_score:
            self.counter += 1
            if self.verbose:
                print(f"EarlyStopping counter: {self.counter}/{self.patience}")
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self._save_checkpoint(model)
            self.counter = 0

    def _save_checkpoint(self, model):
        if self.path is not None:
            torch.save(model.state_dict(), self.path)
            if self.verbose:
                print(f"Saved best model to {self.path}")


In [43]:
class ModelTrainer:
    """Unified trainer for Autoencoder / VAE / Deep SVDD (tabular anomaly detection)."""

    def __init__(self, model, model_type='autoencoder', learning_rate=1e-3, device='cpu'):
        self.model = model.to(device)
        self.model_type = model_type  # 'autoencoder' | 'vae' | 'deep_svdd'
        self.device = device

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=learning_rate, weight_decay=1e-5)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            self.optimizer, mode='min', patience=5, factor=0.5
        )

        self.history = {'train_loss': [], 'val_loss': [], 'val_auroc': [], 'val_auprc': []}
        self.best_model_state = None
        self.best_auprc = 0.0  # default: maximize AUPRC for fraud
        self.best_auroc = 0.0

    @torch.no_grad()
    def init_svdd_center(self, train_loader, eps: float = 1e-3):
        """Initialize DeepSVDD center c using mean representation on normal train."""
        self.model.eval()
        reps = []
        for batch in train_loader:
            x = batch[0] if isinstance(batch, (list, tuple)) else batch
            x = x.to(self.device)
            z = self.model(x)
            reps.append(z.detach().cpu())
        reps = torch.cat(reps, dim=0)
        c = reps.mean(dim=0)
        # avoid exactly-0 dims
        c[(c.abs() < eps)] = eps * c[(c.abs() < eps)].sign().clamp(min=1)
        # center_c is a registered buffer on DeepSVDD
        self.model.center_c.data = c.to(self.device)

    def train_epoch(self, train_loader):
        self.model.train()
        total_loss, nb = 0.0, 0

        for batch in train_loader:
            x = batch[0] if isinstance(batch, (list, tuple)) else batch
            x = x.to(self.device)

            self.optimizer.zero_grad()

            if self.model_type == 'vae':
                recon, mu, logvar = self.model(x)
                recon_loss = F.mse_loss(recon, x, reduction='sum')
                kld = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp().clamp(max=1e10))
                loss = (recon_loss + kld) / x.size(0)

            elif self.model_type == 'deep_svdd':
                z = self.model(x)
                loss = torch.mean(torch.sum((z - self.model.center_c) ** 2, dim=1))

            else:  # autoencoder
                recon = self.model(x)
                loss = F.mse_loss(recon, x)

            if torch.isnan(loss) or torch.isinf(loss):
                continue

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
            self.optimizer.step()

            total_loss += float(loss.item())
            nb += 1

            if isinstance(self.device, torch.device) and self.device.type == 'mps' and nb % 50 == 0:
                torch.mps.empty_cache()

        return total_loss / max(nb, 1)

    @torch.no_grad()
    def anomaly_scores(self, x):
        """Return per-sample anomaly scores (higher = more anomalous)."""
        if self.model_type == 'vae':
            recon, _, _ = self.model(x)
            return F.mse_loss(recon, x, reduction='none').mean(dim=1)

        if self.model_type == 'deep_svdd':
            z = self.model(x)
            return torch.sum((z - self.model.center_c) ** 2, dim=1)

        recon = self.model(x)
        return F.mse_loss(recon, x, reduction='none').mean(dim=1)

    def evaluate(self, data_loader):
        self.model.eval()
        all_scores, all_labels = [], []
        total_loss, nb = 0.0, 0

        for x, y in data_loader:
            x = x.to(self.device)
            scores = self.anomaly_scores(x)

            scores_np = scores.detach().cpu().numpy()
            scores_np = np.nan_to_num(scores_np, nan=1000.0, posinf=1000.0, neginf=0.0)

            all_scores.append(scores_np)
            all_labels.append(y.numpy())

            total_loss += float(np.mean(scores_np))
            nb += 1

        scores_all = np.concatenate(all_scores, axis=0)
        labels_all = np.concatenate(all_labels, axis=0).astype(int)

        auroc = roc_auc_score(labels_all, scores_all) if len(np.unique(labels_all)) > 1 else 0.0
        auprc = average_precision_score(labels_all, scores_all) if len(np.unique(labels_all)) > 1 else 0.0

        return (total_loss / max(nb, 1)), auroc, auprc, scores_all, labels_all

    def train(self, train_loader, val_loader, epochs=50, early_stopping_patience=10, model_name='model', optimize_metric='auprc'):
        """Train model, selecting best checkpoint by AUROC/AUPRC."""
        # DeepSVDD requires center initialization
        if self.model_type == 'deep_svdd' and hasattr(self.model, 'center_c'):
            self.init_svdd_center(train_loader)

        stopper = EarlyStopping(patience=early_stopping_patience, verbose=True, path=f'best_{model_name}.pth')
        best_value = -1.0

        for epoch in range(epochs):
            train_loss = self.train_epoch(train_loader)
            val_loss, val_auroc, val_auprc, _, _ = self.evaluate(val_loader)
            self.scheduler.step(val_loss)

            self.history['train_loss'].append(train_loss)
            self.history['val_loss'].append(val_loss)
            self.history['val_auroc'].append(val_auroc)
            self.history['val_auprc'].append(val_auprc)

            current = val_auprc if optimize_metric == 'auprc' else val_auroc
            if current > best_value:
                best_value = current
                self.best_auprc = max(self.best_auprc, val_auprc)
                self.best_auroc = max(self.best_auroc, val_auroc)
                self.best_model_state = {k: v.detach().cpu().clone() for k, v in self.model.state_dict().items()}

            stopper(val_loss, self.model)

            if (epoch + 1) % 10 == 0 or epoch == 0:
                print(f"Epoch {epoch+1:03d}/{epochs} | train={train_loss:.6f} val={val_loss:.6f} "
                      f"AUROC={val_auroc:.4f} AUPRC={val_auprc:.4f}")

            if stopper.early_stop:
                print("Early stopping triggered.")
                break

        if self.best_model_state is not None:
            self.model.load_state_dict(self.best_model_state)

        return self.history


## 5. Model Training

### 5.1 Train Baseline Autoencoder

In [44]:
print("üöÄ Training Baseline Autoencoder...")
print("=" * 60)

baseline_trainer = ModelTrainer(
    baseline_model,
    model_type='autoencoder',
    learning_rate=1e-3,
    device=device
)

baseline_history = baseline_trainer.train(
    train_loader_normal,
    val_loader,
    epochs=50,
    early_stopping_patience=10,
    model_name='baseline_autoencoder'
)

print("\n‚úÖ Baseline Autoencoder training complete!")

üöÄ Training Baseline Autoencoder...
Saved best model to best_baseline_autoencoder.pth
Epoch 001/50 | train=298.099930 val=19.884135 AUROC=0.9475 AUPRC=0.0695
EarlyStopping counter: 1/10
Saved best model to best_baseline_autoencoder.pth
EarlyStopping counter: 1/10
EarlyStopping counter: 2/10
EarlyStopping counter: 3/10
EarlyStopping counter: 4/10
EarlyStopping counter: 5/10
EarlyStopping counter: 6/10
EarlyStopping counter: 7/10
Epoch 010/50 | train=243.611402 val=16.631667 AUROC=0.9494 AUPRC=0.0681
EarlyStopping counter: 8/10
EarlyStopping counter: 9/10
EarlyStopping counter: 10/10
Early stopping triggered.

‚úÖ Baseline Autoencoder training complete!


### 5.2 Train VAE

In [18]:
print("\nüöÄ Training Variational Autoencoder (VAE)...")
print("=" * 60)

vae_trainer = ModelTrainer(
    vae_model,
    model_type='vae',
    learning_rate=1e-3,
    device=device
)

vae_history = vae_trainer.train(
    train_loader_normal,
    val_loader,
    epochs=50,
    early_stopping_patience=10,
    model_name='vae'
)

print("\n‚úÖ VAE training complete!")


üöÄ Training Variational Autoencoder (VAE)...
Saved best model to best_vae.pth
Epoch 001/50 | train=26609.285890 val=14.615551 AUROC=0.9552 AUPRC=0.1004
EarlyStopping counter: 1/10
EarlyStopping counter: 2/10
Saved best model to best_vae.pth
EarlyStopping counter: 1/10
EarlyStopping counter: 2/10
Saved best model to best_vae.pth
EarlyStopping counter: 1/10
EarlyStopping counter: 2/10
EarlyStopping counter: 3/10
Epoch 010/50 | train=21894.463884 val=9.892212 AUROC=0.9501 AUPRC=0.0827
EarlyStopping counter: 4/10
EarlyStopping counter: 5/10
EarlyStopping counter: 6/10
EarlyStopping counter: 7/10
EarlyStopping counter: 8/10
EarlyStopping counter: 9/10
EarlyStopping counter: 10/10
Early stopping triggered.

‚úÖ VAE training complete!


### 5.3 Train Deep SVDD

Deep SVDD is trained on **normal** transactions only. The anomaly score is the **distance to the learned center** in representation space.


In [19]:
print("\nüöÄ Training Deep SVDD...")
print("=" * 60)

svdd_trainer = ModelTrainer(
    svdd_model,
    model_type='deep_svdd',
    learning_rate=1e-3,
    device=device
)

svdd_history = svdd_trainer.train(
    train_loader_normal,
    val_loader,
    epochs=50,
    early_stopping_patience=10,
    model_name='deep_svdd',
    optimize_metric='auprc'
)

print("\n‚úÖ Deep SVDD training complete!")



üöÄ Training Deep SVDD...
Saved best model to best_deep_svdd.pth
Epoch 001/50 | train=0.276364 val=0.001614 AUROC=0.8760 AUPRC=0.0870
EarlyStopping counter: 1/10
EarlyStopping counter: 2/10
EarlyStopping counter: 3/10
EarlyStopping counter: 4/10
EarlyStopping counter: 5/10
EarlyStopping counter: 6/10
EarlyStopping counter: 7/10
EarlyStopping counter: 8/10
EarlyStopping counter: 9/10
Epoch 010/50 | train=0.000000 val=0.000000 AUROC=0.1264 AUPRC=0.0188
EarlyStopping counter: 10/10
Early stopping triggered.

‚úÖ Deep SVDD training complete!


## 6. Training Visualization

In [20]:
# Visualize training history
from plotly.subplots import make_subplots
import plotly.graph_objects as go

def plot_training_history(histories, model_names):
    fig = make_subplots(
        rows=1, cols=3,
        subplot_titles=('Training Loss', 'Validation Loss', 'Validation AUROC'),
    )

    for history, name in zip(histories, model_names):
        fig.add_trace(
            go.Scatter(y=history['train_loss'], mode='lines', name=f'{name} Train'),
            row=1, col=1
        )
        fig.add_trace(
            go.Scatter(y=history['val_loss'], mode='lines', name=f'{name} Val', line=dict(dash='dash')),
            row=1, col=2
        )
        fig.add_trace(
            go.Scatter(y=history['val_auroc'], mode='lines', name=f'{name} AUROC'),
            row=1, col=3
        )

    fig.update_layout(height=400, width=1100, title_text="Training History Comparison")
    fig.show()

# Plot training histories
plot_training_history(
    histories=[baseline_history, vae_history, svdd_history],
    model_names=['Baseline AE', 'VAE', 'Deep SVDD']
)


## 7. Model Evaluation

In [21]:
from sklearn.metrics import roc_curve, precision_recall_curve

def evaluate_trainer(trainer, loader, name):
    test_loss, auroc, auprc, scores, labels = trainer.evaluate(loader)
    print(f"{name:15s} | loss={test_loss:.6f} AUROC={auroc:.4f} AUPRC={auprc:.4f}")
    return {"scores": scores, "labels": labels, "auroc": auroc, "auprc": auprc}

print("\nüìä Model Evaluation (Test Set)")
print("=" * 60)

results = {}

# IsolationForest baseline (already computed on scaled arrays)
auroc_iso = roc_auc_score(y_test, test_scores_iso)
auprc_iso = average_precision_score(y_test, test_scores_iso)
print(f"{'IsolationForest':15s} | AUROC={auroc_iso:.4f} AUPRC={auprc_iso:.4f}")
results["IsolationForest"] = {"scores": test_scores_iso, "labels": y_test, "auroc": auroc_iso, "auprc": auprc_iso}

# Deep models
results["Baseline AE"] = evaluate_trainer(baseline_trainer, test_loader, "Baseline AE")
results["VAE"] = evaluate_trainer(vae_trainer, test_loader, "VAE")
results["Deep SVDD"] = evaluate_trainer(svdd_trainer, test_loader, "Deep SVDD")

# Summary table
results_df = pd.DataFrame(
    [{"Model": k, "AUROC": v["auroc"], "AUPRC": v["auprc"]} for k, v in results.items()]
).sort_values("AUPRC", ascending=False)

print("\nüèÜ Performance Summary")
print(results_df.to_string(index=False))



üìä Model Evaluation (Test Set)
IsolationForest | AUROC=0.9486 AUPRC=0.0355
Baseline AE     | loss=9.648925 AUROC=0.9578 AUPRC=0.1248
VAE             | loss=7.924065 AUROC=0.9574 AUPRC=0.1223
Deep SVDD       | loss=0.001791 AUROC=0.8811 AUPRC=0.0934

üèÜ Performance Summary
          Model    AUROC    AUPRC
    Baseline AE 0.957794 0.124829
            VAE 0.957416 0.122275
      Deep SVDD 0.881085 0.093359
IsolationForest 0.948558 0.035489


## 8. Performance Visualization

In [29]:
# ROC Curves
fig = go.Figure()

for name, data in results.items():
    fpr, tpr, _ = roc_curve(data['labels'], data['scores'])
    fig.add_trace(go.Scatter(
        x=fpr, y=tpr,
        mode='lines',
        name=f"{name} (AUROC={data['auroc']:.3f})",
        line=dict(width=2)
    ))

# Add diagonal line
fig.add_trace(go.Scatter(
    x=[0, 1], y=[0, 1],
    mode='lines',
    name='Random',
    line=dict(dash='dash', color='gray')
))

fig.update_layout(
    title='ROC Curves - Model Comparison',
    xaxis_title='False Positive Rate',
    yaxis_title='True Positive Rate',
    height=500,
    template='plotly_white'
)
fig.show()

In [30]:
# Precision-Recall Curves
fig = go.Figure()

baseline_precision = y_test.mean()  # Random classifier baseline

for name, data in results.items():
    precision, recall, _ = precision_recall_curve(data['labels'], data['scores'])
    fig.add_trace(go.Scatter(
        x=recall, y=precision,
        mode='lines',
        name=f"{name} (AUPRC={data['auprc']:.3f})",
        line=dict(width=2)
    ))

# Add baseline
fig.add_trace(go.Scatter(
    x=[0, 1], y=[baseline_precision, baseline_precision],
    mode='lines',
    name=f'Baseline ({baseline_precision:.3f})',
    line=dict(dash='dash', color='gray')
))

fig.update_layout(
    title='Precision-Recall Curves - Model Comparison',
    xaxis_title='Recall',
    yaxis_title='Precision',
    height=500,
    template='plotly_white'
)
fig.show()

## 9. Ensemble Model

In [31]:
# Simple ensemble of anomaly scores
def minmax(x):
    x = np.asarray(x, dtype=float)
    mn, mx = x.min(), x.max()
    return (x - mn) / (mx - mn) if mx > mn else np.zeros_like(x)

# Collect test scores from each model
scores_iso = test_scores_iso
scores_ae = results["Baseline AE"]["scores"]
scores_vae = results["VAE"]["scores"]
scores_svdd = results["Deep SVDD"]["scores"]

S = np.column_stack([minmax(scores_iso), minmax(scores_ae), minmax(scores_vae), minmax(scores_svdd)])

# Ensemble strategies
ensemble_avg = S.mean(axis=1)
ensemble_max = S.max(axis=1)
weights = np.array([0.2, 0.25, 0.30, 0.25])  # a reasonable starting point
ensemble_weighted = S @ (weights / weights.sum())

# Evaluate
def report(name, s):
    auroc = roc_auc_score(y_test, s)
    auprc = average_precision_score(y_test, s)
    print(f"{name:15s} | AUROC={auroc:.4f} AUPRC={auprc:.4f}")
    return auroc, auprc

print("\nü§ù Ensemble Performance (Test Set)")
print("=" * 60)
ens_metrics = {}
ens_metrics["Ensemble_Average"] = report("Average", ensemble_avg)
ens_metrics["Ensemble_Max"] = report("Max", ensemble_max)
ens_metrics["Ensemble_Weighted"] = report("Weighted", ensemble_weighted)




ü§ù Ensemble Performance (Test Set)
Average         | AUROC=0.9491 AUPRC=0.0390
Max             | AUROC=0.9485 AUPRC=0.0353
Weighted        | AUROC=0.9493 AUPRC=0.0403
