# MNIST Publication Experiments (Kaggle Notebook)

This notebook runs a publication-quality MNIST experiment suite:
- 5 optimizers × N seeds
- 10 epochs per run (configurable)
- Saves per-run CSVs and a statistical comparison CSV

Tips:
- Enable GPU (Notebook Settings → Accelerator: GPU)
- Enable Internet to download MNIST from torchvision

## Parameters
Adjust seeds, epochs, and batch size here. Set `QUICK=True` for a short run (3 seeds, 3 epochs).

In [None]:
# Parameters
SEEDS = list(range(1, 11))  # 1..10
EPOCHS = 10
BATCH_SIZE = 128
RESULTS_DIR = 'results'
QUICK = False  # if True, overrides to seeds=[1,2,3], epochs=3

if QUICK:
    SEEDS = [1, 2, 3]
    EPOCHS = 3

print('Seeds:', SEEDS)
print('Epochs:', EPOCHS)
print('Batch size:', BATCH_SIZE)
print('Results dir:', RESULTS_DIR)

## Imports and Helpers
Model, data loaders, training, and evaluation utilities.

In [None]:
import os, time, math, random
from pathlib import Path
import numpy as np
import pandas as pd
from tqdm import tqdm
from scipy import stats

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

def set_seed(seed: int):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

class SimpleMLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(28 * 28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

def get_data_loaders(batch_size: int, num_workers: int = 2, pin_memory: bool = True):
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
    ])
    # Kaggle path
    root = '/kaggle/working/data' if os.path.exists('/kaggle/working') else './data'
    train_dataset = datasets.MNIST(root=root, train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST(root=root, train=False, download=True, transform=transform)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                              num_workers=num_workers, pin_memory=pin_memory)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False,
                             num_workers=num_workers, pin_memory=pin_memory)
    return train_loader, test_loader

def train_one_epoch(model, loader, optimizer, device):
    model.train()
    total_loss = 0.0
    correct = 0
    total = 0
    for data, target in loader:
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * data.size(0)
        pred = output.argmax(dim=1)
        correct += (pred == target).sum().item()
        total += data.size(0)
    return total_loss / total, correct / total

def evaluate(model, loader, device):
    model.eval()
    total_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for data, target in loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = F.cross_entropy(output, target)
            total_loss += loss.item() * data.size(0)
            pred = output.argmax(dim=1)
            correct += (pred == target).sum().item()
            total += data.size(0)
    return total_loss / total, correct / total

## Run Experiments
Runs 5 optimizers × N seeds and saves per-run CSVs to `RESULTS_DIR`.

In [None]:
def run_single_experiment(optimizer_name: str, seed: int, lr: float, epochs: int, batch_size: int, results_dir: Path):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    set_seed(seed)
    train_loader, test_loader = get_data_loaders(batch_size)
    model = SimpleMLP().to(device)
    if optimizer_name == 'SGD':
        optimizer = optim.SGD(model.parameters(), lr=lr)
    elif optimizer_name == 'SGD_Momentum':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif optimizer_name == 'Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif optimizer_name == 'AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4)
    elif optimizer_name == 'AMSGrad':
        optimizer = optim.Adam(model.parameters(), lr=lr, amsgrad=True)
    else:
        raise ValueError(f'Unknown optimizer: {optimizer_name}')
    history = []
    start = time.time()
    for epoch in range(1, epochs + 1):
        train_loss, train_acc = train_one_epoch(model, train_loader, optimizer, device)
        test_loss, test_acc = evaluate(model, test_loader, device)
        history.append({'epoch': epoch, 'train_loss': train_loss, 'train_acc': train_acc, 'test_loss': test_loss, 'test_acc': test_acc})
        tqdm.write(f'Seed {seed} | {optimizer_name} | Epoch {epoch}/{epochs} | ' +
                   f'train_loss={train_loss:.4f}, train_acc={train_acc:.2%}, test_loss={test_loss:.4f}, test_acc={test_acc:.2%}')
    elapsed = time.time() - start
    df = pd.DataFrame(history)
    Path(results_dir).mkdir(parents=True, exist_ok=True)
    out_name = f'NN_SimpleMLP_MNIST_{optimizer_name}_lr{lr}_seed{seed}_publication.csv'
    df.to_csv(Path(results_dir) / out_name, index=False)
    return df, elapsed

def run_suite(seeds, epochs, batch_size, results_dir: str):
    optimizers = [('SGD', 0.01), ('SGD_Momentum', 0.05), ('Adam', 0.001), ('AdamW', 0.001), ('AMSGrad', 0.001)]
    total_runs = len(optimizers) * len(seeds)
    print(f'Total experiments to run: {total_runs}')
    completed = 0
    durations = []
    for opt_name, lr in optimizers:
        for seed in seeds:
            try:
                tqdm.write(f
                _, dur = run_single_experiment(opt_name, seed, lr, epochs, batch_size, Path(results_dir))
                durations.append(dur)
                completed += 1
            except Exception as e:
                tqdm.write(f'❌ Error: {e}')
    print(f
)
    if durations:
        avg_min = np.mean(durations) / 60.0
        print(f'Avg time per run: {avg_min:.2f} min')

## Statistical Comparison
Paired tests with Holm-Bonferroni correction (saves CSV to results dir).

In [None]:
def compute_statistics(results_dir: str):
    import glob, re
    patterns = {
        'SGD': f
,
        'SGD_Momentum': f
,
        'Adam': f
,
        'AdamW': f
,
        'AMSGrad': f
,
    }
    data = {}
    for opt, pattern in patterns.items():
        vals = {}
        for f in glob.glob(pattern):
            m = re.search(r'seed(\d+)', f)
            if not m:
                continue
            seed = int(m.group(1))
            df = pd.read_csv(f)
            final_row = df.iloc[-1]
            vals[seed] = final_row['test_loss']
        data[opt] = vals
    comparisons = [
        ('Adam', 'SGD'), ('AdamW', 'Adam'), ('AMSGrad', 'Adam'), ('SGD_Momentum', 'SGD'),
        ('AdamW', 'SGD'), ('AMSGrad', 'SGD'), ('AMSGrad', 'AdamW'), ('SGD_Momentum', 'Adam'),
    ]
    rows = []
    for A, B in comparisons:
        common = sorted(list(set(data.get(A, {}).keys()) & set(data.get(B, {}).keys())))
        if len(common) < 3:
            continue
        vals_A = np.array([data[A][s] for s in common])
        vals_B = np.array([data[B][s] for s in common])
        _, pA = stats.shapiro(vals_A)
        _, pB = stats.shapiro(vals_B)
        if pA > 0.05 and pB > 0.05:
            stat_name = 'Paired t-test'
            _, p = stats.ttest_rel(vals_A, vals_B)
            d = (vals_A - vals_B).mean() / (vals_A - vals_B).std(ddof=1)
        else:
            stat_name = 'Wilcoxon'
            W, p = stats.wilcoxon(vals_A, vals_B)
            n = len(vals_A)
            d = 1 - (2 * W) / (n * (n + 1))
        rows.append({'Optimizer A': A, 'Optimizer B': B, 'n_common_seeds': len(common), 'Mean A': float(vals_A.mean()), 'Std A': float(vals_A.std(ddof=1)), 'Mean B': float(vals_B.mean()), 'Std B': float(vals_B.std(ddof=1)), 'Test': stat_name, 'p-value': float(p), 'Effect size (d or r)': float(d)})
    df = pd.DataFrame(rows)
    if df.empty:
        print('No comparisons could be computed (need >=3 common seeds per pair).')
        return None
    m = len(df)
    order = np.argsort(df['p-value'].values)
    holm_sig = np.zeros(m, dtype=bool)
    alpha = 0.05
    for k, idx in enumerate(order):
        if df.loc[idx, 'p-value'] < alpha / (m - k):
            holm_sig[idx] = True
        else:
            break
    df['Significant (Holm-Bonferroni)'] = holm_sig
    out = Path(results_dir) / 'mnist_statistical_comparisons_publication.csv'
    Path(results_dir).mkdir(parents=True, exist_ok=True)
    df.to_csv(out, index=False)
    print(f'Saved statistical comparisons to: {out}')
    df

## Execute Suite
Runs all experiments and computes statistics.

In [None]:
print('='*60)
print('MNIST Publication Experiments (Notebook)')
print('='*60)
print('Device:', 'cuda' if torch.cuda.is_available() else 'cpu')
print('Seeds:', SEEDS)
print('Epochs:', EPOCHS)
print('Batch size:', BATCH_SIZE)
print('Results dir:', RESULTS_DIR)
print('='*60)

run_suite(SEEDS, EPOCHS, BATCH_SIZE, RESULTS_DIR)
compute_statistics(RESULTS_DIR)

print('Done.')

## Inspect Outputs
Lists generated files.

In [None]:
import glob
files = sorted(glob.glob(f'{RESULTS_DIR}/NN_SimpleMLP_MNIST_*_publication.csv'))
print(f'Per-run CSVs: {len(files)} files')
for f in files[:10]:
    print(' -', f)
if len(files) > 10:
    print(f' ... and {len(files)-10} more')

stat_path = Path(RESULTS_DIR) / 'mnist_statistical_comparisons_publication.csv'
print('Stats CSV exists:', stat_path.exists(), '|', stat_path)