In [15]:
import os, json
from datetime import datetime

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torchvision import datasets, transforms
from sklearn.metrics import confusion_matrix

import wandb

In [16]:
# -------------------------
# 0) Setup
# -------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
OUT_DIR = "./outputs"
os.makedirs(OUT_DIR, exist_ok=True)

wandb_project = "cinic10-final-run"

CLASS_NAMES = [
    "airplane","automobile","bird","cat","deer",
    "dog","frog","horse","ship","truck"
]

In [17]:
# -------------------------
# 1) Data transforms
# -------------------------
train_transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

In [18]:
# -------------------------
# 2) Load datasets
# -------------------------
DATA_DIR = "./data"   # <-- adjust path

train_dataset = datasets.ImageFolder(os.path.join(DATA_DIR, "train"), transform=train_transform)
val_dataset   = datasets.ImageFolder(os.path.join(DATA_DIR, "valid"), transform=test_transform)
test_dataset  = datasets.ImageFolder(os.path.join(DATA_DIR, "test"),  transform=test_transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True, num_workers=4)
val_loader   = torch.utils.data.DataLoader(val_dataset,   batch_size=128, shuffle=False, num_workers=4)
test_loader  = torch.utils.data.DataLoader(test_dataset,  batch_size=128, shuffle=False, num_workers=4)


In [19]:
# -------------------------
# 3) Model definition
# -------------------------
class FeedForwardMLP(nn.Module):
    def __init__(self, hidden_sizes=[2048,1024,512,256], dropout=0.5, num_classes=10):
        super().__init__()
        layers, in_dim = [], 3*32*32
        for h in hidden_sizes:
            layers.append(nn.Linear(in_dim, h))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_dim = h
        layers.append(nn.Linear(in_dim, num_classes))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.net(x)

In [20]:

# -------------------------
# 4) Training function
# -------------------------
def l1_penalty(model):
    return sum(p.abs().sum() for p in model.parameters())

def run_training(model, train_loader, val_loader, cfg, writer=None):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()

    if cfg.get("optimizer", "SGD") == "AdamW":
        optimizer = optim.AdamW(model.parameters(), lr=cfg["lr"], weight_decay=cfg["weight_decay"])
    else:
        optimizer = optim.SGD(model.parameters(), lr=cfg["lr"], momentum=cfg["momentum"], weight_decay=cfg["weight_decay"])

    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.5)

    history = {"train_loss": [], "val_loss": [], "train_acc": [], "val_acc": [], "lr": []}
    best_val_loss, patience_counter = float("inf"), 0

    for epoch in range(cfg["epochs"]):
        # Train
        model.train()
        train_loss, correct, total = 0.0, 0, 0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            if cfg.get("l1_lambda", 0.0) > 0.0:
                loss += cfg["l1_lambda"] * l1_penalty(model)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * xb.size(0)
            _, predicted = preds.max(1)
            correct += predicted.eq(yb).sum().item()
            total += yb.size(0)
        train_loss /= total
        train_acc = correct / total

        # Validate
        model.eval()
        val_loss, correct, total = 0.0, 0, 0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = criterion(preds, yb)
                val_loss += loss.item() * xb.size(0)
                _, predicted = preds.max(1)
                correct += predicted.eq(yb).sum().item()
                total += yb.size(0)
        val_loss /= total
        val_acc = correct / total

        scheduler.step()
        lr_curr = optimizer.param_groups[0]["lr"]

        history["train_loss"].append(train_loss)
        history["val_loss"].append(val_loss)
        history["train_acc"].append(train_acc)
        history["val_acc"].append(val_acc)
        history["lr"].append(lr_curr)

        if writer:
            writer.add_scalar("Loss/train", train_loss, epoch)
            writer.add_scalar("Loss/val", val_loss, epoch)
            writer.add_scalar("Accuracy/train", train_acc, epoch)
            writer.add_scalar("Accuracy/val", val_acc, epoch)
            writer.add_scalar("LR", lr_curr, epoch)

        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "val_loss": val_loss,
            "train_acc": train_acc,
            "val_acc": val_acc,
            "lr": lr_curr,
        })

        print(f"Epoch {epoch+1}/{cfg['epochs']} Train loss={train_loss:.4f}, acc={train_acc:.4f} Val loss={val_loss:.4f}, acc={val_acc:.4f} lr={lr_curr:.5f}")

        if val_loss < best_val_loss:
            best_val_loss, patience_counter, best_model = val_loss, 0, model.state_dict()
        else:
            patience_counter += 1
            if patience_counter >= cfg["patience"]:
                print("Early stopping triggered")
                break

    model.load_state_dict(best_model)
    return model, history

In [21]:
# -------------------------
# 5) Final Training (train=90k, val=90k, test=90k)
# -------------------------
print("Final training on TRAIN (90,000 samples only, with augmentation). Validation and testing on held-out sets.")

final_cfg = {
    "lr": 0.01,
    "weight_decay": 5e-4,
    "momentum": 0.9,
    "epochs": 50,
    "l1_lambda": 0.0,
    "patience": 8
}

tb_name = f"final_train_{datetime.now().strftime('%Y%m%d%H%M%S')}"
writer = SummaryWriter(log_dir=os.path.join(OUT_DIR, "runs", tb_name))

wandb.init(
    project=wandb_project,
    name=tb_name,
    config={**final_cfg, "batch_size": 64, "model": "FeedForwardMLP", "hidden_sizes": [2048,1024,512,256], "dropout": 0.5}
)



Final training on TRAIN (90,000 samples only, with augmentation). Validation and testing on held-out sets.


[34m[1mwandb[0m: Currently logged in as: [33minnocent-ics-2025[0m ([33mnimbus-neuron[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
final_model = FeedForwardMLP(hidden_sizes=[2048,1024,512,256], dropout=0.5)
final_model, final_history = run_training(final_model, train_loader, val_loader, final_cfg, writer=writer)
writer.close()

Epoch 1/50 Train loss=2.1285, acc=0.1960 Val loss=1.9423, acc=0.2799 lr=0.01000


In [5]:
# -------------------------
# 6) Evaluate on Test Set
# -------------------------
criterion = nn.CrossEntropyLoss()
def evaluate(model, criterion, dataloader):
    model.eval()
    loss, correct, total = 0.0, 0, 0
    with torch.no_grad():
        for xb, yb in dataloader:
            xb, yb = xb.to(device), yb.to(device)
            preds = model(xb)
            loss += criterion(preds, yb).item() * xb.size(0)
            _, predicted = preds.max(1)
            correct += predicted.eq(yb).sum().item()
            total += yb.size(0)
    return loss/total, correct/total

test_loss, test_acc = evaluate(final_model, criterion, test_loader)
print(f"Held-out TEST set metrics -> loss:{test_loss:.4f} acc:{test_acc:.4f}")

torch.save(final_model.state_dict(), os.path.join(OUT_DIR, "final_model_train_only.pth"))