In [1]:
"""
üî¨ DenseNet-121 GridSearch - Fair Comparison with Pretrained Weights

EXACT SAME PARAMETERS AS RESNET-101 & EEGNET:
1. ‚úÖ Optimizer: Adam, AdamW, Adagrad
2. ‚úÖ Activation: ReLU, LeakyReLU
3. ‚úÖ L1: [0] (same as others)
4. ‚úÖ L2: [0, 1e-4, 1e-3]
5. ‚úÖ Early Stopping: patience=10
6. ‚úÖ LR Scheduler: CosineAnnealingLR
7. ‚úÖ Loss: SoftFocalLoss (gamma=3.0)
8. ‚úÖ Data: Hybrid loading
9. ‚úÖ CV: 3-fold
10. ‚úÖ Pretrained: ImageNet weights

Total: 18 configs √ó 3 folds = 54 runs (~10 hours)
"""

import os
from pathlib import Path
import random
import time
import gc
import json
import warnings
from datetime import datetime
from itertools import product
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, WeightedRandomSampler

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score


In [2]:
# CELL 1: Setup & Imports

print("="*80)
print(" DenseNet-121 GridSearch - Pretrained ".center(80, "="))
print("="*80)
print(f"\nStarted: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

seed_everything(42)
print("‚úÖ Seed: 42")

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"‚úÖ Device: {device}")
if torch.cuda.is_available():
    print(f"   GPU: {torch.cuda.get_device_name(0)}")
    torch.cuda.empty_cache()

DATA_PKG = Path("data_package")
SPEC_DIR = Path("spec_hr_out")
RESULTS_DIR = Path("densenet_gridsearch_results")
RESULTS_DIR.mkdir(exist_ok=True)

print(f"\n‚úÖ Results: {RESULTS_DIR}")



Started: 2026-01-25 23:52:28
‚úÖ Seed: 42
‚úÖ Device: cuda:0
   GPU: NVIDIA GeForce RTX 5060 Ti

‚úÖ Results: densenet_gridsearch_results


In [3]:
# CELL 2: Load Data

meta_use = pd.read_csv(DATA_PKG / "meta_use.csv")
lbl = np.load(DATA_PKG / "labels.npz", allow_pickle=True)
y_soft = lbl["y_soft"]
w_conf = lbl["w_conf"]
classes = [str(c) for c in lbl["classes"]]
y_hard = y_soft.argmax(axis=1)

print("‚úÖ Data loaded")
print(f"   Samples: {len(y_hard)}")
print(f"   Classes: {classes}")

N_FOLDS = 3
skf = StratifiedKFold(n_splits=N_FOLDS, shuffle=True, random_state=42)
folds = list(skf.split(meta_use, y_hard))
print(f"\n‚úÖ Created {N_FOLDS}-fold CV")


‚úÖ Data loaded
   Samples: 17089
   Classes: ['seizure', 'lpd', 'gpd', 'lrda', 'grda', 'other']

‚úÖ Created 3-fold CV


In [4]:
# CELL 3: Dataset Class

class SpecDataset(Dataset):
    def __init__(self, df, root_dir, y_soft, w_conf, F_target=81, T_target=600):
        self.df = df.reset_index(drop=True)
        self.root = Path(root_dir)
        self.y_soft = y_soft
        self.w_conf = w_conf
        self.F_target = F_target
        self.T_target = T_target

    def __len__(self):
        return len(self.df)

    def _center_crop_pad(self, x):
        C, F, T = x.shape
        if F >= self.F_target:
            f0 = (F - self.F_target) // 2
            x = x[:, f0:f0+self.F_target, :]
        else:
            pad = self.F_target - F
            x = np.pad(x, ((0,0),(pad//2, pad-pad//2),(0,0)), mode="constant")
        if T >= self.T_target:
            t0 = (T - self.T_target) // 2
            x = x[:, :, t0:t0+self.T_target]
        else:
            pad = self.T_target - T
            x = np.pad(x, ((0,0),(0,0),(pad//2, pad-pad//2)), mode="constant")
        return x.copy()

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        eid = int(row.eeg_id)

        npz = np.load(self.root / f"{eid}_hr.npz")
        x = npz["x"]
        x = self._center_crop_pad(x)
        x = torch.from_numpy(x).float()

        x = F.interpolate(x.unsqueeze(0), size=(224, 224),
                          mode="bilinear", align_corners=False).squeeze(0)

        if x.size(0) == 4:
            x = torch.cat([x[:3].mean(0, keepdim=True).repeat(3, 1, 1)], dim=0)

        y = torch.from_numpy(self.y_soft[self.df.index[idx]]).float()
        w = torch.tensor(self.w_conf[self.df.index[idx]], dtype=torch.float32)

        return x, y, w

print("‚úÖ Dataset ready")


‚úÖ Dataset ready


In [5]:
# CELL 4: DenseNet-121 Model (Pretrained)

from torchvision import models

class DenseNet121_Pretrained(nn.Module):
    """DenseNet-121 with pretrained ImageNet weights"""

    def __init__(self, n_classes=6, activation='relu', freeze_backbone=False):
        super().__init__()
        self.activation_name = activation

        self.backbone = models.densenet121(pretrained=True)

        if freeze_backbone:
            for param in self.backbone.features.parameters():
                param.requires_grad = False

        in_features = self.backbone.classifier.in_features
        self.backbone.classifier = nn.Identity()

        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(in_features, 512),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.Linear(512, n_classes)
        )

    def get_activation(self):
        if self.activation_name == 'relu':
            return F.relu
        elif self.activation_name == 'leakyrelu':
            return lambda x: F.leaky_relu(x, negative_slope=0.01)
        else:
            return F.relu

    def forward(self, x):
        features = self.backbone(x)

        x = self.classifier[0](features)
        x = self.classifier[1](x)
        x = self.classifier[2](x)
        x = self.get_activation()(x)
        x = self.classifier[3](x)
        x = self.classifier[4](x)
        return x

print("‚úÖ DenseNet-121 model ready")
print("   Pretrained: ImageNet weights")
print("   Supports: ReLU, LeakyReLU")


‚úÖ DenseNet-121 model ready
   Pretrained: ImageNet weights
   Supports: ReLU, LeakyReLU


In [6]:
# CELL 5: SoftFocalLoss

class SoftFocalLoss(nn.Module):
    def __init__(self, alpha=None, gamma=3.0):
        super().__init__()
        self.alpha = alpha
        self.gamma = gamma

    def forward(self, logits, soft_targets, sample_weights=None):
        hard_targets = soft_targets.argmax(dim=1)
        probs = F.softmax(logits, dim=1)
        p_t = probs.gather(1, hard_targets.unsqueeze(1)).squeeze(1)
        ce_loss = -(soft_targets * F.log_softmax(logits, dim=1)).sum(dim=1)
        focal_weight = ((1 - p_t) ** self.gamma)
        loss = focal_weight * ce_loss

        if self.alpha is not None:
            alpha_t = self.alpha[hard_targets]
            loss = alpha_t * loss

        if sample_weights is not None:
            loss = loss * sample_weights

        return loss.mean()

print("‚úÖ SoftFocalLoss ready")


‚úÖ SoftFocalLoss ready


In [7]:
# CELL 6: Hybrid Data Loader

def create_hybrid_loader(fold=0, target_ratio=0.4, weight_power=3.0, batch_size=16):
    tr_idx, va_idx = folds[fold]
    df_tr = meta_use.iloc[tr_idx]
    y_soft_tr, w_conf_tr = y_soft[tr_idx], w_conf[tr_idx]

    y_hard_tr = y_soft_tr.argmax(axis=1)
    counts = np.bincount(y_hard_tr, minlength=6)
    target = int(counts.max() * target_ratio)

    indices_add = []
    for i in range(6):
        mask = y_hard_tr == i
        if mask.sum() < target:
            idx = np.where(mask)[0]
            n_add = target - mask.sum()
            indices_add.extend(np.random.choice(idx, n_add, replace=True))

    all_idx = np.concatenate([np.arange(len(y_hard_tr)), indices_add])
    np.random.shuffle(all_idx)

    df_tr_over = df_tr.iloc[all_idx].reset_index(drop=True)
    y_soft_over, w_conf_over = y_soft_tr[all_idx], w_conf_tr[all_idx]

    y_hard_over = y_soft_over.argmax(axis=1)
    counts_over = np.bincount(y_hard_over, minlength=6)

    weights = (len(y_hard_over) / (counts_over + 1)) ** weight_power
    weights = torch.FloatTensor(weights / weights.sum() * 6)

    sample_weights = weights[y_hard_over].numpy()
    sampler = WeightedRandomSampler(
        weights=sample_weights,
        num_samples=len(sample_weights),
        replacement=True
    )

    ds_tr = SpecDataset(df_tr_over, SPEC_DIR, y_soft_over, w_conf_over)
    dl_tr = DataLoader(ds_tr, batch_size=batch_size, sampler=sampler, num_workers=0)

    ds_va = SpecDataset(meta_use.iloc[va_idx], SPEC_DIR, y_soft[va_idx], w_conf[va_idx])
    dl_va = DataLoader(ds_va, batch_size=batch_size, shuffle=False, num_workers=0)

    return dl_tr, dl_va, weights

print("‚úÖ Hybrid loader ready (batch_size=16 for DenseNet-121)")


‚úÖ Hybrid loader ready (batch_size=16 for DenseNet-121)


In [8]:
# CELL 7: Evaluation

@torch.no_grad()
def evaluate_full(model, loader):
    model.eval()
    preds, targets = [], []

    for x, y, w in loader:
        x = x.to(device)
        logits = model(x)
        preds.append(logits.argmax(1).cpu().numpy())
        targets.append(y.argmax(1).cpu().numpy())

    y_pred = np.concatenate(preds)
    y_true = np.concatenate(targets)

    return {
        'accuracy': accuracy_score(y_true, y_pred),
        'precision': precision_score(y_true, y_pred, average='macro', zero_division=0),
        'recall': recall_score(y_true, y_pred, average='macro', zero_division=0),
        'f1': f1_score(y_true, y_pred, average='macro', zero_division=0),
    }

print("‚úÖ Evaluation ready")


‚úÖ Evaluation ready


In [9]:
# CELL 8: Training Function

def train_one_config(fold, optimizer_name, activation, l1_lambda, l2_lambda,
                     lr=3e-4, batch_size=16, epochs=30, patience=10):

    print(f"      [1/5] Data...", end=" ", flush=True)
    t0 = time.time()
    dl_tr, dl_va, class_weights = create_hybrid_loader(fold=fold, batch_size=batch_size)
    print(f"‚úì ({time.time()-t0:.1f}s)", flush=True)

    print(f"      [2/5] Model (DenseNet-121, {activation})...", end=" ", flush=True)
    t0 = time.time()
    model = DenseNet121_Pretrained(
        n_classes=6,
        activation=activation,
        freeze_backbone=False
    ).to(device)
    print(f"‚úì ({time.time()-t0:.1f}s)", flush=True)

    print(f"      [3/5] Optimizer ({optimizer_name}, L2={l2_lambda:.0e})...", end=" ", flush=True)
    if optimizer_name == 'adam':
        optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_lambda)
    elif optimizer_name == 'adamw':
        optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=l2_lambda)
    elif optimizer_name == 'adagrad':
        optimizer = torch.optim.Adagrad(model.parameters(), lr=lr, weight_decay=l2_lambda)
    else:
        raise ValueError(f"Unknown optimizer: {optimizer_name}")
    print("‚úì", flush=True)

    print(f"      [4/5] Loss & Scheduler...", end=" ", flush=True)
    criterion = SoftFocalLoss(alpha=class_weights.to(device), gamma=3.0)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    print("‚úì", flush=True)

    print(f"      [5/5] Training (patience={patience}, L1={l1_lambda:.0e})...", flush=True)
    best_f1, best_state, no_improve = 0.0, None, 0

    for epoch in range(1, epochs + 1):
        model.train()
        train_loss, n = 0.0, 0

        for x, y, w in dl_tr:
            x, y, w = x.to(device), y.to(device), w.to(device)
            optimizer.zero_grad()
            logits = model(x)
            loss = criterion(logits, y, w)

            if l1_lambda > 0:
                l1_norm = sum(p.abs().sum() for p in model.parameters())
                loss = loss + l1_lambda * l1_norm

            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()

            train_loss += loss.item() * x.size(0)
            n += x.size(0)

        train_loss /= max(n, 1)
        val_results = evaluate_full(model, dl_va)
        scheduler.step()

        if val_results['f1'] > best_f1:
            best_f1 = val_results['f1']
            best_state = {k: v.cpu().clone() for k, v in model.state_dict().items()}
            no_improve = 0
        else:
            no_improve += 1
            if no_improve >= patience:
                print(f"        Early stop at epoch {epoch}", flush=True)
                break

        if epoch % 5 == 0 or epoch == 1:
            print(f"        Epoch {epoch:2d}: F1={val_results['f1']:.4f}, Loss={train_loss:.4f}", flush=True)

        if epoch % 5 == 0:
            gc.collect()
            if torch.cuda.is_available():
                torch.cuda.empty_cache()

    if best_state:
        model.load_state_dict(best_state)

    final_results = evaluate_full(model, dl_va)

    del model, optimizer, scheduler, dl_tr, dl_va
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

    return final_results

print("‚úÖ Training function ready")
print("   Optimizers: Adam, AdamW, Adagrad")
print("   Activations: ReLU, LeakyReLU")
print("   Early stopping: patience=10")


‚úÖ Training function ready
   Optimizers: Adam, AdamW, Adagrad
   Activations: ReLU, LeakyReLU
   Early stopping: patience=10


In [10]:
# CELL 9: Grid Configuration (SAME AS RESNET & EEGNET)

print("\n" + "="*80)
print(" DENSENET-121 GRIDSEARCH - SAME AS RESNET-101 ".center(80, "="))
print("="*80)

param_grid = {
    'optimizer': ['adam', 'adamw', 'adagrad'],
    'activation': ['relu', 'leakyrelu'],
    'l1_lambda': [0],
    'l2_lambda': [0, 1e-4, 1e-3],
}

fixed_params = {
    'lr': 3e-4,
    'batch_size': 16,
    'epochs': 30,
    'patience': 10,
}

keys = list(param_grid.keys())
values = list(param_grid.values())
combinations = list(product(*values))

print("\nüìã HYPERPARAMETER GRID:")
print("-"*80)
print(f"  Optimizer:   {param_grid['optimizer']}")
print(f"  Activation:  {param_grid['activation']}")
print(f"  L1 lambda:   {param_grid['l1_lambda']}")
print(f"  L2 lambda:   {param_grid['l2_lambda']}")

print("\nüìä GRIDSEARCH STATISTICS:")
print("-"*80)
print(f"  Total combinations: {len(combinations)}")
print(f"  Folds per config:   {N_FOLDS}")
print(f"  Total trainings:    {len(combinations) * N_FOLDS}")

print("\nüíæ AUTO-SAVE:")
print("-"*80)
print(f"  {RESULTS_DIR}/densenet_gridsearch_progress.json")
print(f"  {RESULTS_DIR}/densenet_gridsearch_final.json")




üìã HYPERPARAMETER GRID:
--------------------------------------------------------------------------------
  Optimizer:   ['adam', 'adamw', 'adagrad']
  Activation:  ['relu', 'leakyrelu']
  L1 lambda:   [0]
  L2 lambda:   [0, 0.0001, 0.001]

üìä GRIDSEARCH STATISTICS:
--------------------------------------------------------------------------------
  Total combinations: 18
  Folds per config:   3
  Total trainings:    54

üíæ AUTO-SAVE:
--------------------------------------------------------------------------------
  densenet_gridsearch_results/densenet_gridsearch_progress.json
  densenet_gridsearch_results/densenet_gridsearch_final.json


In [11]:
# CELL 10: Run GridSearch

print("\n" + "="*80)
print(" STARTING GRIDSEARCH ".center(80, "="))
print("="*80)
print(f"Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

all_results = []
start_time = time.time()

for combo_idx, combo in enumerate(combinations, 1):
    params = dict(zip(keys, combo))

    print("\n" + "="*80)
    print(f" CONFIG {combo_idx}/{len(combinations)} ".center(80, "="))
    print("="*80)
    print(f"  Optimizer: {params['optimizer']}")
    print(f"  Activation: {params['activation']}")
    print(f"  L1: {params['l1_lambda']:.0e}")
    print(f"  L2: {params['l2_lambda']:.0e}")
    print("-"*80)

    fold_results = []

    for fold in range(N_FOLDS):
        print(f"\n    Fold {fold+1}/{N_FOLDS}...", flush=True)
        fold_start = time.time()

        try:
            result = train_one_config(
                fold=fold,
                optimizer_name=params['optimizer'],
                activation=params['activation'],
                l1_lambda=params['l1_lambda'],
                l2_lambda=params['l2_lambda'],
                **fixed_params
            )
            fold_results.append(result)
            print(f"\n    ‚úì Fold {fold+1}: F1={result['f1']:.4f} ({(time.time()-fold_start)/60:.1f} min)", flush=True)
        except Exception as e:
            print(f"\n    ‚úó Error: {e}", flush=True)
            fold_results.append({'f1': 0.0, 'accuracy': 0.0, 'precision': 0.0, 'recall': 0.0})

    mean_metrics = {
        'f1': float(np.mean([r['f1'] for r in fold_results])),
        'accuracy': float(np.mean([r['accuracy'] for r in fold_results])),
        'precision': float(np.mean([r['precision'] for r in fold_results])),
        'recall': float(np.mean([r['recall'] for r in fold_results])),
        'f1_std': float(np.std([r['f1'] for r in fold_results])),
    }

    result_entry = {
        'config_id': combo_idx,
        'params': params,
        'mean_metrics': mean_metrics,
        'fold_results': fold_results,
        'timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
    }
    all_results.append(result_entry)

    print(f"\n  Mean F1:  {mean_metrics['f1']:.4f} ¬± {mean_metrics['f1_std']:.4f}")
    print(f"  Mean Acc: {mean_metrics['accuracy']:.4f}")
    print(f"  Mean Prec:{mean_metrics['precision']:.4f}")
    print(f"  Mean Rec: {mean_metrics['recall']:.4f}")

    with open(RESULTS_DIR / 'densenet_gridsearch_progress.json', 'w') as f:
        json.dump(all_results, f, indent=2, default=str)
    print("  üíæ Saved", flush=True)

with open(RESULTS_DIR / 'densenet_gridsearch_final.json', 'w') as f:
    json.dump({
        'all_results': all_results,
        'param_grid': param_grid,
        'fixed_params': fixed_params,
        'total_time_hours': (time.time() - start_time) / 3600,
    }, f, indent=2, default=str)

print("\n" + "="*80)
print(" GRIDSEARCH COMPLETE ".center(80, "="))
print("="*80)
print(f"Total time: {(time.time()-start_time)/3600:.2f} hours")



Started: 2026-01-25 23:53:26


  Optimizer: adam
  Activation: relu
  L1: 0e+00
  L2: 0e+00
--------------------------------------------------------------------------------

    Fold 1/3...
      [1/5] Data... ‚úì (0.0s)
      [2/5] Model (DenseNet-121, relu)... ‚úì (0.5s)
      [3/5] Optimizer (adam, L2=0e+00)... ‚úì
      [4/5] Loss & Scheduler... ‚úì
      [5/5] Training (patience=10, L1=0e+00)...
        Epoch  1: F1=0.3353, Loss=0.8246
        Epoch  5: F1=0.3313, Loss=0.3102
        Epoch 10: F1=0.3510, Loss=0.1582
        Early stop at epoch 14

    ‚úì Fold 1: F1=0.3609 (35.7 min)

    Fold 2/3...
      [1/5] Data... ‚úì (0.0s)
      [2/5] Model (DenseNet-121, relu)... ‚úì (0.2s)
      [3/5] Optimizer (adam, L2=0e+00)... ‚úì
      [4/5] Loss & Scheduler... ‚úì
      [5/5] Training (patience=10, L1=0e+00)...
        Epoch  1: F1=0.3182, Loss=0.8092
        Epoch  5: F1=0.3574, Loss=0.3178
        Epoch 10: F1=0.3426, Loss=0.1542
        Epoch 15: F1=0.3621, Loss=0.1027
        

In [12]:
# CELL 11: Analyze Results (F1 + Acc + Precision + Recall)

sorted_results = sorted(all_results, key=lambda x: x['mean_metrics']['f1'], reverse=True)

print("\n" + "="*80)
print(" DENSENET-121 GRIDSEARCH RESULTS ".center(80, "="))
print("="*80)

print("\nüèÜ TOP 10 CONFIGURATIONS:")
print("="*80)
print(f"{'Rank':<6} {'Optimizer':>10} {'Activation':>12} {'L1':>8} {'L2':>8} "
      f"{'F1':>10} {'Acc':>8} {'Prec':>8} {'Rec':>8}")
print("-"*80)

for i, result in enumerate(sorted_results[:10], 1):
    p = result['params']
    m = result['mean_metrics']
    print(f"{i:<6} {p['optimizer']:>10} {p['activation']:>12} {p['l1_lambda']:>8.0e} "
          f"{p['l2_lambda']:>8.0e} {m['f1']:>10.4f} {m['accuracy']:>8.4f} "
          f"{m['precision']:>8.4f} {m['recall']:>8.4f}")

best = sorted_results[0]

print("\n" + "="*80)
print(" BEST DENSENET-121 CONFIGURATION ".center(80, "="))
print("="*80)
print(f"  Optimizer:  {best['params']['optimizer']}")
print(f"  Activation: {best['params']['activation']}")
print(f"  L1:         {best['params']['l1_lambda']:.0e}")
print(f"  L2:         {best['params']['l2_lambda']:.0e}")

print(f"\n  F1:        {best['mean_metrics']['f1']:.4f} ¬± {best['mean_metrics']['f1_std']:.4f}")
print(f"  Accuracy:  {best['mean_metrics']['accuracy']:.4f}")
print(f"  Precision: {best['mean_metrics']['precision']:.4f}")
print(f"  Recall:    {best['mean_metrics']['recall']:.4f}")

print("\nüíæ Results saved to:")
print(f"   {RESULTS_DIR}/densenet_gridsearch_final.json")




üèÜ TOP 10 CONFIGURATIONS:
Rank    Optimizer   Activation       L1       L2         F1      Acc     Prec      Rec
--------------------------------------------------------------------------------
1           adamw    leakyrelu    0e+00    0e+00     0.4931   0.5289   0.4853   0.5198
2           adamw         relu    0e+00    0e+00     0.4929   0.5257   0.4843   0.5197
3           adamw         relu    0e+00    1e-03     0.4896   0.5319   0.4821   0.5154
4            adam    leakyrelu    0e+00    0e+00     0.4574   0.4770   0.4346   0.5116
5           adamw    leakyrelu    0e+00    1e-04     0.4539   0.4777   0.4340   0.5106
6           adamw         relu    0e+00    1e-04     0.4534   0.4794   0.4305   0.5103
7            adam         relu    0e+00    0e+00     0.4532   0.4767   0.4261   0.5128
8           adamw    leakyrelu    0e+00    1e-03     0.4521   0.4742   0.4258   0.5068
9            adam    leakyrelu    0e+00    1e-04     0.4328   0.4449   0.4093   0.4960
10           adam  