In [3]:
import os
import numpy as np
import pandas as pd
import cv2
import torch
import torch.nn as nn
import torch.optim as optim
import timm
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')

print("="*80)
print("NOTEBOOK 7: HYPERPARAMETER TUNING - 5 MODELS, 3 SETS")
print("="*80)

# ========== CONFIG ==========
TRAIN_DIR = '/kaggle/input/split-dataset/train'
VAL_DIR = '/kaggle/input/split-dataset/val'
TEST_DIR = '/kaggle/input/split-dataset/test'
OUTPUT_DIR = '/kaggle/working'
IMG_SIZE = 224
NUM_EPOCHS = 10  # reduced for quick tuning; increase if needed
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

os.makedirs(OUTPUT_DIR, exist_ok=True)

print(f"\n✓ Device: {DEVICE}")
print(f"✓ Image size: {IMG_SIZE}")
print(f"✓ Epochs per run: {NUM_EPOCHS}")

# ========== CUSTOM DATASET ==========
class OCTDataset(Dataset):
    def __init__(self, root_dir, img_size=224):
        self.img_size = img_size
        self.images = []
        self.labels = []
        self.class_names = sorted([d for d in os.listdir(root_dir) if os.path.isdir(os.path.join(root_dir, d))])
        
        for label, cls_name in enumerate(self.class_names):
            cls_path = os.path.join(root_dir, cls_name)
            for img_file in os.listdir(cls_path):
                if img_file.lower().endswith(('.png', '.jpg', '.jpeg')):
                    self.images.append(os.path.join(cls_path, img_file))
                    self.labels.append(label)
    
    def __len__(self):
        return len(self.images)
    
    def __getitem__(self, idx):
        img_path = self.images[idx]
        img = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
        if img is None:
            img = np.zeros((self.img_size, self.img_size), dtype=np.uint8)
        img = cv2.resize(img, (self.img_size, self.img_size))
        img_rgb = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
        img_norm = img_rgb.astype(np.float32) / 255.0
        img_norm = (img_norm - 0.5) / 0.5
        img_tensor = torch.from_numpy(img_norm).permute(2, 0, 1)
        label = torch.tensor(self.labels[idx], dtype=torch.long)
        return img_tensor, label

# Load datasets once
train_dataset = OCTDataset(TRAIN_DIR, IMG_SIZE)
val_dataset = OCTDataset(VAL_DIR, IMG_SIZE)
test_dataset = OCTDataset(TEST_DIR, IMG_SIZE)

print(f"\n✓ Train: {len(train_dataset)}, Val: {len(val_dataset)}, Test: {len(test_dataset)}")

# ========== CUSTOM HEAD FOR CNNS ==========
class CustomHead(nn.Module):
    def __init__(self, in_features, num_classes=4):
        super().__init__()
        self.fc = nn.Sequential(
            nn.Linear(in_features, 512),
            nn.BatchNorm1d(512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Linear(256, num_classes)
        )
    def forward(self, x):
        return self.fc(x)

# ========== HYPERPARAMETER SETS ==========
hyperparam_sets = [
    {
        "name": "Set1_Adam_lr1e-3_bs32",
        "optimizer": "adam",
        "lr": 1e-3,
        "batch_size": 32,
        "weight_decay": 1e-4
    },
    {
        "name": "Set2_Adam_lr5e-4_bs16",
        "optimizer": "adam",
        "lr": 5e-4,
        "batch_size": 16,
        "weight_decay": 1e-4
    },
    {
        "name": "Set3_SGD_lr1e-2_bs32",
        "optimizer": "sgd",
        "lr": 1e-2,
        "batch_size": 32,
        "weight_decay": 5e-4,
        "momentum": 0.9
    }
]

print("\n✓ Setup complete")


NOTEBOOK 7: HYPERPARAMETER TUNING - 5 MODELS, 3 SETS

✓ Device: cuda
✓ Image size: 224
✓ Epochs per run: 10

✓ Train: 15967, Val: 2000, Test: 2002

✓ Setup complete


In [None]:
print("\n" + "="*80)
print("CELL 1: RESNET50 HYPERPARAMETER TUNING")
print("="*80)

MODEL_NAME = "resnet50"
results_resnet50 = []

for cfg in hyperparam_sets:
    print(f"\n{'='*60}")
    print(f"Running: {MODEL_NAME} - {cfg['name']}")
    print(f"{'='*60}")
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)
    
    # Build model
    model = timm.create_model('resnet50', pretrained=False, num_classes=4)
    model.fc = CustomHead(2048, num_classes=4)
    model = model.to(DEVICE)
    model.train()
    
    # Build optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], 
                             weight_decay=cfg['weight_decay'])
    
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0.0
    best_state = None
    history = {"train_loss": [], "val_loss": [], "val_acc": []}
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # --- Train ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (train)", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # --- Validate ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (val)", leave=False):
                x, y = x.to(DEVICE), y.to(DEVICE)
                out = model(x)
                loss = criterion(out, y)
                val_loss += loss.item() * x.size(0)
                preds = out.argmax(1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
    
    # Load best model
    model.load_state_dict(best_state)
    
    # --- Test Evaluation ---
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            preds = out.argmax(1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)
    
    test_acc = test_correct / test_total
    
    results_resnet50.append({
        "Model": "ResNet50",
        "Hyperparam Set": cfg['name'],
        "LR": cfg['lr'],
        "Batch Size": cfg['batch_size'],
        "Optimizer": cfg['optimizer'],
        "Best Val Acc": f"{best_val_acc:.4f}",
        "Test Acc": f"{test_acc:.4f}"
    })
    
    print(f"\n✓ {cfg['name']} completed | Best Val Acc: {best_val_acc:.4f}, Test Acc: {test_acc:.4f}")

# Display results
print("\n" + "="*80)
print("RESNET50 RESULTS")
print("="*80)
df_resnet50 = pd.DataFrame(results_resnet50)
print(df_resnet50.to_string(index=False))



CELL 1: RESNET50 HYPERPARAMETER TUNING

Running: resnet50 - Set1_Adam_lr1e-3_bs32


                                                                     

Epoch 1/10 | Train Loss: 0.9978, Val Loss: 0.9351, Val Acc: 0.6070


                                                                     

Epoch 2/10 | Train Loss: 0.8714, Val Loss: 1.1127, Val Acc: 0.5700


                                                                     

Epoch 3/10 | Train Loss: 0.8275, Val Loss: 0.8615, Val Acc: 0.6555


                                                                     

Epoch 4/10 | Train Loss: 0.7863, Val Loss: 0.9794, Val Acc: 0.6460


                                                                     

Epoch 5/10 | Train Loss: 0.7532, Val Loss: 0.8176, Val Acc: 0.6785


                                                                     

Epoch 6/10 | Train Loss: 0.7205, Val Loss: 0.8809, Val Acc: 0.6490


                                                                     

Epoch 7/10 | Train Loss: 0.6872, Val Loss: 0.7083, Val Acc: 0.7190


                                                                     

Epoch 8/10 | Train Loss: 0.6713, Val Loss: 0.6719, Val Acc: 0.7270


                                                                     

Epoch 9/10 | Train Loss: 0.6549, Val Loss: 0.6344, Val Acc: 0.7425


                                                                      

Epoch 10/10 | Train Loss: 0.6461, Val Loss: 0.6165, Val Acc: 0.7455


                                                        


✓ Set1_Adam_lr1e-3_bs32 completed | Best Val Acc: 0.7455, Test Acc: 0.7248

Running: resnet50 - Set2_Adam_lr5e-4_bs16


                                                                     

Epoch 1/10 | Train Loss: 1.0252, Val Loss: 0.8889, Val Acc: 0.6590


                                                                     

Epoch 2/10 | Train Loss: 0.8887, Val Loss: 0.8621, Val Acc: 0.6480


                                                                     

Epoch 3/10 | Train Loss: 0.8423, Val Loss: 0.8714, Val Acc: 0.6455


                                                                     

Epoch 4/10 | Train Loss: 0.7975, Val Loss: 0.7502, Val Acc: 0.6935


                                                                     

Epoch 5/10 | Train Loss: 0.7511, Val Loss: 0.9374, Val Acc: 0.5630


                                                                     

Epoch 6/10 | Train Loss: 0.7140, Val Loss: 0.6336, Val Acc: 0.7455


                                                                     

Epoch 7/10 | Train Loss: 0.6941, Val Loss: 1.2693, Val Acc: 0.5075


                                                                     

Epoch 8/10 | Train Loss: 0.6761, Val Loss: 0.6662, Val Acc: 0.7285


                                                                     

Epoch 9/10 | Train Loss: 0.6629, Val Loss: 0.6081, Val Acc: 0.7480


                                                                      

Epoch 10/10 | Train Loss: 0.6485, Val Loss: 0.7103, Val Acc: 0.6965


                                                          


✓ Set2_Adam_lr5e-4_bs16 completed | Best Val Acc: 0.7480, Test Acc: 0.6983

Running: resnet50 - Set3_SGD_lr1e-2_bs32


                                                                     

Epoch 1/10 | Train Loss: 1.1679, Val Loss: 1.1934, Val Acc: 0.5420


                                                                     

Epoch 2/10 | Train Loss: 0.9764, Val Loss: 1.9266, Val Acc: 0.4275


                                                                     

Epoch 3/10 | Train Loss: 0.9170, Val Loss: 1.0837, Val Acc: 0.5420


                                                                     

Epoch 4/10 | Train Loss: 0.8829, Val Loss: 1.1164, Val Acc: 0.5250


                                                                     

Epoch 5/10 | Train Loss: 0.8328, Val Loss: 1.2597, Val Acc: 0.5310


Epoch 6/10 (train):  97%|█████████▋| 485/499 [03:20<00:05,  2.37it/s]

In [None]:
print("\n" + "="*80)
print("CELL 2: MOBILENETV2 HYPERPARAMETER TUNING")
print("="*80)

MODEL_NAME = "mobilenetv2"
results_mobilenetv2 = []

for cfg in hyperparam_sets:
    print(f"\n{'='*60}")
    print(f"Running: {MODEL_NAME} - {cfg['name']}")
    print(f"{'='*60}")
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)
    
    # Build model
    model = timm.create_model('mobilenetv2_100', pretrained=False, num_classes=4)
    model.classifier = CustomHead(1280, num_classes=4)
    model = model.to(DEVICE)
    model.train()
    
    # Build optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], 
                             weight_decay=cfg['weight_decay'])
    
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0.0
    best_state = None
    history = {"train_loss": [], "val_loss": [], "val_acc": []}
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # --- Train ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (train)", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # --- Validate ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (val)", leave=False):
                x, y = x.to(DEVICE), y.to(DEVICE)
                out = model(x)
                loss = criterion(out, y)
                val_loss += loss.item() * x.size(0)
                preds = out.argmax(1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
    
    # Load best model
    model.load_state_dict(best_state)
    
    # --- Test Evaluation ---
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            preds = out.argmax(1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)
    
    test_acc = test_correct / test_total
    
    results_mobilenetv2.append({
        "Model": "MobileNetV2",
        "Hyperparam Set": cfg['name'],
        "LR": cfg['lr'],
        "Batch Size": cfg['batch_size'],
        "Optimizer": cfg['optimizer'],
        "Best Val Acc": f"{best_val_acc:.4f}",
        "Test Acc": f"{test_acc:.4f}"
    })
    
    print(f"\n✓ {cfg['name']} completed | Best Val Acc: {best_val_acc:.4f}, Test Acc: {test_acc:.4f}")

# Display results
print("\n" + "="*80)
print("MOBILENETV2 RESULTS")
print("="*80)
df_mobilenetv2 = pd.DataFrame(results_mobilenetv2)
print(df_mobilenetv2.to_string(index=False))


In [None]:
print("\n" + "="*80)
print("CELL 3: EFFICIENTNETB0 HYPERPARAMETER TUNING")
print("="*80)

MODEL_NAME = "efficientnetb0"
results_efficientnetb0 = []

for cfg in hyperparam_sets:
    print(f"\n{'='*60}")
    print(f"Running: {MODEL_NAME} - {cfg['name']}")
    print(f"{'='*60}")
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)
    
    # Build model
    model = timm.create_model('efficientnet_b0', pretrained=False, num_classes=4)
    model.classifier = CustomHead(1280, num_classes=4)
    model = model.to(DEVICE)
    model.train()
    
    # Build optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], 
                             weight_decay=cfg['weight_decay'])
    
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0.0
    best_state = None
    history = {"train_loss": [], "val_loss": [], "val_acc": []}
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # --- Train ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (train)", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # --- Validate ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (val)", leave=False):
                x, y = x.to(DEVICE), y.to(DEVICE)
                out = model(x)
                loss = criterion(out, y)
                val_loss += loss.item() * x.size(0)
                preds = out.argmax(1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
    
    # Load best model
    model.load_state_dict(best_state)
    
    # --- Test Evaluation ---
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            preds = out.argmax(1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)
    
    test_acc = test_correct / test_total
    
    results_efficientnetb0.append({
        "Model": "EfficientNetB0",
        "Hyperparam Set": cfg['name'],
        "LR": cfg['lr'],
        "Batch Size": cfg['batch_size'],
        "Optimizer": cfg['optimizer'],
        "Best Val Acc": f"{best_val_acc:.4f}",
        "Test Acc": f"{test_acc:.4f}"
    })
    
    print(f"\n✓ {cfg['name']} completed | Best Val Acc: {best_val_acc:.4f}, Test Acc: {test_acc:.4f}")

# Display results
print("\n" + "="*80)
print("EFFICIENTNETB0 RESULTS")
print("="*80)
df_efficientnetb0 = pd.DataFrame(results_efficientnetb0)
print(df_efficientnetb0.to_string(index=False))


In [None]:
print("\n" + "="*80)
print("CELL 4: SWIN HYPERPARAMETER TUNING")
print("="*80)

MODEL_NAME = "swin"
results_swin = []

for cfg in hyperparam_sets:
    print(f"\n{'='*60}")
    print(f"Running: {MODEL_NAME} - {cfg['name']}")
    print(f"{'='*60}")
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)
    
    # Build model
    model = timm.create_model('swin_tiny_patch4_window7_224', pretrained=False, num_classes=4)
    model = model.to(DEVICE)
    model.train()
    
    # Build optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], 
                             weight_decay=cfg['weight_decay'])
    
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0.0
    best_state = None
    history = {"train_loss": [], "val_loss": [], "val_acc": []}
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # --- Train ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (train)", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # --- Validate ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (val)", leave=False):
                x, y = x.to(DEVICE), y.to(DEVICE)
                out = model(x)
                loss = criterion(out, y)
                val_loss += loss.item() * x.size(0)
                preds = out.argmax(1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
    
    # Load best model
    model.load_state_dict(best_state)
    
    # --- Test Evaluation ---
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            preds = out.argmax(1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)
    
    test_acc = test_correct / test_total
    
    results_swin.append({
        "Model": "Swin",
        "Hyperparam Set": cfg['name'],
        "LR": cfg['lr'],
        "Batch Size": cfg['batch_size'],
        "Optimizer": cfg['optimizer'],
        "Best Val Acc": f"{best_val_acc:.4f}",
        "Test Acc": f"{test_acc:.4f}"
    })
    
    print(f"\n✓ {cfg['name']} completed | Best Val Acc: {best_val_acc:.4f}, Test Acc: {test_acc:.4f}")

# Display results
print("\n" + "="*80)
print("SWIN RESULTS")
print("="*80)
df_swin = pd.DataFrame(results_swin)
print(df_swin.to_string(index=False))


In [None]:
print("\n" + "="*80)
print("CELL 5: VIT HYPERPARAMETER TUNING")
print("="*80)

MODEL_NAME = "vit"
results_vit = []

for cfg in hyperparam_sets:
    print(f"\n{'='*60}")
    print(f"Running: {MODEL_NAME} - {cfg['name']}")
    print(f"{'='*60}")
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=cfg['batch_size'], shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=cfg['batch_size'], shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=cfg['batch_size'], shuffle=False)
    
    # Build model
    model = timm.create_model('vit_base_patch16_224', pretrained=False, num_classes=4)
    model = model.to(DEVICE)
    model.train()
    
    # Build optimizer
    if cfg['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=cfg['lr'], weight_decay=cfg['weight_decay'])
    else:  # SGD
        optimizer = optim.SGD(model.parameters(), lr=cfg['lr'], momentum=cfg['momentum'], 
                             weight_decay=cfg['weight_decay'])
    
    criterion = nn.CrossEntropyLoss()
    
    best_val_acc = 0.0
    best_state = None
    history = {"train_loss": [], "val_loss": [], "val_acc": []}
    
    # Training loop
    for epoch in range(NUM_EPOCHS):
        # --- Train ---
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for x, y in tqdm(train_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (train)", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            optimizer.zero_grad()
            out = model(x)
            loss = criterion(out, y)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item() * x.size(0)
            preds = out.argmax(1)
            train_correct += (preds == y).sum().item()
            train_total += y.size(0)
        
        train_loss /= train_total
        train_acc = train_correct / train_total
        
        # --- Validate ---
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for x, y in tqdm(val_loader, desc=f"Epoch {epoch+1}/{NUM_EPOCHS} (val)", leave=False):
                x, y = x.to(DEVICE), y.to(DEVICE)
                out = model(x)
                loss = criterion(out, y)
                val_loss += loss.item() * x.size(0)
                preds = out.argmax(1)
                val_correct += (preds == y).sum().item()
                val_total += y.size(0)
        
        val_loss /= val_total
        val_acc = val_correct / val_total
        
        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)
        
        print(f"Epoch {epoch+1}/{NUM_EPOCHS} | Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        if val_acc > best_val_acc:
            best_val_acc = val_acc
            best_state = model.state_dict().copy()
    
    # Load best model
    model.load_state_dict(best_state)
    
    # --- Test Evaluation ---
    model.eval()
    test_correct = 0
    test_total = 0
    
    with torch.no_grad():
        for x, y in tqdm(test_loader, desc="Testing", leave=False):
            x, y = x.to(DEVICE), y.to(DEVICE)
            out = model(x)
            preds = out.argmax(1)
            test_correct += (preds == y).sum().item()
            test_total += y.size(0)
    
    test_acc = test_correct / test_total
    
    results_vit.append({
        "Model": "ViT",
        "Hyperparam Set": cfg['name'],
        "LR": cfg['lr'],
        "Batch Size": cfg['batch_size'],
        "Optimizer": cfg['optimizer'],
        "Best Val Acc": f"{best_val_acc:.4f}",
        "Test Acc": f"{test_acc:.4f}"
    })
    
    print(f"\n✓ {cfg['name']} completed | Best Val Acc: {best_val_acc:.4f}, Test Acc: {test_acc:.4f}")

# Display results
print("\n" + "="*80)
print("VIT RESULTS")
print("="*80)
df_vit = pd.DataFrame(results_vit)
print(df_vit.to_string(index=False))


In [None]:
print("\n" + "="*80)
print("COMBINED HYPERPARAMETER TUNING SUMMARY")
print("="*80)

# Combine all results
all_results = results_resnet50 + results_mobilenetv2 + results_efficientnetb0 + results_swin + results_vit
df_all = pd.DataFrame(all_results)

print("\nAll Models - All Hyperparameter Sets:")
print(df_all.to_string(index=False))

# Save to CSV
csv_path = os.path.join(OUTPUT_DIR, 'hyperparameter_tuning_results.csv')
df_all.to_csv(csv_path, index=False)
print(f"\n✓ Results saved to: {csv_path}")

# Best per model
print("\n" + "="*80)
print("BEST HYPERPARAMETER SET PER MODEL")
print("="*80)
for model in ["ResNet50", "MobileNetV2", "EfficientNetB0", "Swin", "ViT"]:
    model_data = df_all[df_all["Model"] == model]
    best_idx = model_data['Test Acc'].str.replace("0.", "").astype(float).idxmax()
    best_row = df_all.loc[best_idx]
    print(f"\n{model}:")
    print(f"  Best Set: {best_row['Hyperparam Set']}")
    print(f"  Val Acc: {best_row['Best Val Acc']}, Test Acc: {best_row['Test Acc']}")

print("\n" + "="*80)
print("HYPERPARAMETER TUNING COMPLETE")
print("="*80)
