# Learning Rate Scheduler Sweep

This notebook compares different LR schedulers on the central baseline to find the optimal one.

**Schedulers tested:**
1. Cosine Annealing (current default)
2. Step LR (decay every N epochs)
3. Exponential LR
4. ReduceOnPlateau
5. Constant LR (no scheduling)

**Estimated runtime: ~1 hour on T4 GPU** (5 schedulers × 20 epochs, eval every 2)

In [None]:
import os
import sys

if not os.path.exists('AML-Project-2') and not os.path.exists('src'):
    !git clone https://github.com/emanueleR3/AML-Project-2.git
    %cd AML-Project-2

!pip install -q -r requirements.txt

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim.lr_scheduler import (
    CosineAnnealingLR,
    StepLR,
    ExponentialLR,
    ReduceLROnPlateau,
    CosineAnnealingWarmRestarts
)
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from src.utils import set_seed, get_device, ensure_dir, save_metrics_json
from src.data import load_cifar100, create_dataloader
from src.model import build_model
from src.train import train_one_epoch, evaluate

OUTPUT_DIR = 'output/scheduler_sweep'
ensure_dir(OUTPUT_DIR)
device = get_device()
print(f"Device: {device}")

set_seed(42)

In [None]:
# Load CIFAR-100
train_full, test_data = load_cifar100(data_dir='./data', download=True)

train_size = int(0.9 * len(train_full))
val_size = len(train_full) - train_size
train_data, val_data = torch.utils.data.random_split(
    train_full, [train_size, val_size],
    generator=torch.Generator().manual_seed(42)
)

train_loader = create_dataloader(train_data, batch_size=64, shuffle=True)
val_loader = create_dataloader(val_data, batch_size=64, shuffle=False)
test_loader = create_dataloader(test_data, batch_size=64, shuffle=False)

print(f"Train: {len(train_data)}, Val: {len(val_data)}, Test: {len(test_data)}")

In [None]:
# Model config
config = {
    'model_name': 'dino_vits16',
    'num_classes': 100,
    'freeze_policy': 'head_only',
    'dropout': 0.1,
    'device': device,
    'seed': 42
}

# Training config
NUM_EPOCHS = 20
BASE_LR = 0.001
WEIGHT_DECAY = 1e-4
EVAL_FREQ = 2  # Evaluate every 2 epochs

In [None]:
def get_scheduler(name, optimizer, num_epochs):
    """Factory function to create schedulers by name."""
    schedulers = {
        'cosine': lambda: CosineAnnealingLR(optimizer, T_max=num_epochs),
        'step': lambda: StepLR(optimizer, step_size=5, gamma=0.5),
        'exponential': lambda: ExponentialLR(optimizer, gamma=0.9),
        'plateau': lambda: ReduceLROnPlateau(optimizer, mode='max', patience=3, factor=0.5),
        'constant': lambda: None,
        'warmup_cosine': lambda: CosineAnnealingWarmRestarts(optimizer, T_0=5, T_mult=2),
    }
    return schedulers.get(name, lambda: None)()


def train_with_scheduler(scheduler_name, num_epochs=NUM_EPOCHS, eval_freq=EVAL_FREQ):
    """Train central baseline with specified scheduler."""
    print(f"\nTraining with: {scheduler_name}")
    print("-" * 40)
    
    model = build_model(config)
    model.to(device)
    
    optimizer = optim.AdamW(model.parameters(), lr=BASE_LR, weight_decay=WEIGHT_DECAY)
    scheduler = get_scheduler(scheduler_name, optimizer, num_epochs)
    criterion = nn.CrossEntropyLoss()
    
    history = {
        'epoch': [], 'train_loss': [], 'train_acc': [],
        'val_acc': [], 'test_acc': [], 'lr': []
    }
    
    best_val_acc = 0
    last_val_acc = 0
    last_test_acc = 0
    
    for epoch in range(1, num_epochs + 1):
        current_lr = optimizer.param_groups[0]['lr']
        
        # Train - NOTE: order is (model, loader, optimizer, criterion, device)
        train_loss, train_acc = train_one_epoch(
            model, train_loader, optimizer, criterion, device, show_progress=False
        )
        
        # Evaluate every eval_freq epochs or at the end
        if epoch % eval_freq == 0 or epoch == num_epochs:
            _, val_acc = evaluate(model, val_loader, criterion, device, show_progress=False)
            _, test_acc = evaluate(model, test_loader, criterion, device, show_progress=False)
            last_val_acc = val_acc
            last_test_acc = test_acc
            
            if val_acc > best_val_acc:
                best_val_acc = val_acc
            
            print(f"Epoch {epoch:2d} | LR: {current_lr:.6f} | Train: {train_acc:.1f}% | Val: {val_acc:.1f}% | Test: {test_acc:.1f}%")
        else:
            val_acc = float('nan')
            test_acc = float('nan')
            print(f"Epoch {epoch:2d} | LR: {current_lr:.6f} | Train: {train_acc:.1f}%")
        
        # Update scheduler
        if scheduler is not None:
            if scheduler_name == 'plateau':
                scheduler.step(last_val_acc)
            else:
                scheduler.step()
        
        history['epoch'].append(epoch)
        history['train_loss'].append(train_loss)
        history['train_acc'].append(train_acc)
        history['val_acc'].append(val_acc)
        history['test_acc'].append(test_acc)
        history['lr'].append(current_lr)
    
    history['best_val_acc'] = best_val_acc
    history['final_test_acc'] = last_test_acc
    
    return history

## Run Scheduler Sweep

In [None]:
SCHEDULERS = ['cosine', 'step', 'exponential', 'plateau', 'constant']

results = {}

for sched_name in SCHEDULERS:
    print(f"\n{'='*50}")
    print(f"Scheduler: {sched_name.upper()}")
    print(f"{'='*50}")
    
    history = train_with_scheduler(sched_name)
    results[sched_name] = history
    
    save_metrics_json(os.path.join(OUTPUT_DIR, f'scheduler_{sched_name}.json'), history)
    
    print(f"\n→ Best Val: {history['best_val_acc']:.2f}% | Final Test: {history['final_test_acc']:.2f}%")

## Results Summary

In [None]:
print("\n" + "="*60)
print("SCHEDULER COMPARISON RESULTS")
print("="*60)

summary_data = []
for name, hist in results.items():
    summary_data.append({
        'Scheduler': name.upper(),
        'Best Val Acc (%)': hist['best_val_acc'],
        'Final Test Acc (%)': hist['final_test_acc'],
    })

df = pd.DataFrame(summary_data).sort_values('Best Val Acc (%)', ascending=False)
print(df.to_string(index=False))

best = df.iloc[0]
print(f"\n★ BEST SCHEDULER: {best['Scheduler']} ({best['Best Val Acc (%)']:.2f}% val, {best['Final Test Acc (%)']:.2f}% test)")

In [None]:
fig, axes = plt.subplots(1, 3, figsize=(15, 4))

colors = {'cosine': '#2ecc71', 'step': '#3498db', 'exponential': '#e74c3c', 
          'plateau': '#9b59b6', 'constant': '#95a5a6'}

# Plot 1: Validation Accuracy (filter out NaN)
ax1 = axes[0]
for name, hist in results.items():
    epochs = np.array(hist['epoch'])
    val_acc = np.array(hist['val_acc'])
    valid = ~np.isnan(val_acc)
    ax1.plot(epochs[valid], val_acc[valid], 'o-', label=name, color=colors.get(name, 'gray'), linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('Validation Accuracy (%)')
ax1.set_title('Validation Accuracy by Scheduler')
ax1.legend()
ax1.grid(alpha=0.3)

# Plot 2: Learning Rate Schedule
ax2 = axes[1]
for name, hist in results.items():
    ax2.plot(hist['epoch'], hist['lr'], label=name, color=colors.get(name, 'gray'), linewidth=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('Learning Rate')
ax2.set_title('Learning Rate Schedule')
ax2.legend()
ax2.grid(alpha=0.3)
ax2.set_yscale('log')

# Plot 3: Final Comparison Bar
ax3 = axes[2]
names = list(results.keys())
test_accs = [results[n]['final_test_acc'] for n in names]
bars = ax3.bar(names, test_accs, color=[colors.get(n, 'gray') for n in names])
ax3.set_ylabel('Test Accuracy (%)')
ax3.set_title('Final Test Accuracy')
ax3.grid(axis='y', alpha=0.3)
for bar, acc in zip(bars, test_accs):
    ax3.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.3, f'{acc:.1f}', ha='center', fontsize=10)

plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, 'scheduler_comparison.pdf'), bbox_inches='tight')
plt.savefig(os.path.join(OUTPUT_DIR, 'scheduler_comparison.png'), dpi=150, bbox_inches='tight')
plt.show()

In [None]:
summary = {
    'best_scheduler': df.iloc[0]['Scheduler'].lower(),
    'best_val_acc': df.iloc[0]['Best Val Acc (%)'],
    'best_test_acc': df.iloc[0]['Final Test Acc (%)'],
    'all_results': {name: {'val': h['best_val_acc'], 'test': h['final_test_acc']} for name, h in results.items()}
}

save_metrics_json(os.path.join(OUTPUT_DIR, 'summary.json'), summary)
df.to_csv(os.path.join(OUTPUT_DIR, 'scheduler_comparison.csv'), index=False)

print(f"\nResults saved to {OUTPUT_DIR}/")