In [1]:
# Setting up
import os, random, math, time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models

from sklearn.metrics import confusion_matrix, classification_report

def seed_everything(seed=42):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = False
    torch.backends.cudnn.benchmark = True

seed_everything(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device


device(type='cuda')

In [2]:
# Cell 2: CIFAR-10 data and train/val split
import numpy as np
from collections import defaultdict
from torch.utils.data import Subset, DataLoader
from torchvision import datasets, transforms

# CIFAR 10 classes
classes = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
print("Classes:", classes)

# Normalization
mean, std = (0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)

# Baseline transformerr
tfm_base_train = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])
tfm_base_test = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean, std),
])

# Load CIFAR 10
root = "./data"
train_full = datasets.CIFAR10(root=root, train=True, download=True, transform=tfm_base_train)
test_set   = datasets.CIFAR10(root=root, train=False, download=True, transform=tfm_base_test)

print(f"Full train: {len(train_full)}, Test: {len(test_set)}")

# Stratified split
def stratified_split(dataset, val_frac=0.1, seed=42):
    rng = np.random.default_rng(seed)
    buckets = defaultdict(list)
    for idx in range(len(dataset)):
        _, y = dataset[idx]
        buckets[int(y)].append(idx)

    train_idx, val_idx = [], []
    for y, idxs in buckets.items():
        idxs = np.array(idxs)
        rng.shuffle(idxs)
        n_val = int(len(idxs) * val_frac)
        val_idx.extend(idxs[:n_val].tolist())
        train_idx.extend(idxs[n_val:].tolist())

    return train_idx, val_idx

train_idx, val_idx = stratified_split(train_full)
print(f"Train indices: {len(train_idx)}, Val indices: {len(val_idx)}")
print("Data is ready")


Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 170M/170M [00:06<00:00, 25.4MB/s]


Full train: 50000, Test: 10000
Train indices: 45000, Val indices: 5000
âœ… Data ready!


In [3]:
# Dataloaders
import torch
from torch.utils.data import DataLoader, Subset


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

def seed_everything(seed=42):
    import random
    import numpy as np
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

seed_everything(42)

# Creating dataloaders
def make_loaders(train_full, test_set, train_idx, val_idx, batch_size=128):
    train_set = Subset(train_full, train_idx)
    val_set   = Subset(train_full, val_idx)

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True,
                             num_workers=2, pin_memory=True)
    val_loader   = DataLoader(val_set, batch_size=batch_size, shuffle=False,
                             num_workers=2, pin_memory=True)
    test_loader  = DataLoader(test_set, batch_size=batch_size, shuffle=False,
                             num_workers=2, pin_memory=True)
    return train_loader, val_loader, test_loader

# Baseline dataloaders
train_loader, val_loader, test_loader = make_loaders(train_full, test_set, train_idx, val_idx)

print(f"Dataloaders are ready")
print(f"Train batches: {len(train_loader)}, Val: {len(val_loader)}, Test: {len(test_loader)}")
print(f"Batch shape:")
x, y = next(iter(train_loader))
print(f"First batch - x: {x.shape}, y: {y.shape}")


Using device: cuda
âœ… Dataloaders ready!
Train batches: 352, Val: 40, Test: 79
Batch shape: let's check first batch
First batch - x: torch.Size([128, 3, 32, 32]), y: torch.Size([128])


In [4]:

import torch.nn as nn
import torch.nn.functional as F

# Simple CNN - 3 conv blocks
class SmallCNN(nn.Module):
    def __init__(self, num_classes=10):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1), nn.ReLU(),
            nn.Conv2d(32, 32, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)  # 32x32 -> 16x16
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(32, 64, 3, padding=1), nn.ReLU(),
            nn.Conv2d(64, 64, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)  # 16x16 -> 8x8
        )
        self.conv3 = nn.Sequential(
            nn.Conv2d(64, 128, 3, padding=1), nn.ReLU(),
            nn.MaxPool2d(2)  # 8x8 -> 4x4
        )
        self.head = nn.Sequential(
            nn.Flatten(),
            nn.Linear(128 * 4 * 4, 256), nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.head(x)
        return x

# Training util
@torch.no_grad()
def evaluate(model, loader):
    model.eval()
    total, correct = 0, 0
    all_y, all_p = [], []
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        p = logits.argmax(1)
        total += y.size(0)
        correct += (p == y).sum().item()
        all_y.append(y.cpu().numpy())
        all_p.append(p.cpu().numpy())
    acc = correct / total
    y_true = np.concatenate(all_y)
    y_pred = np.concatenate(all_p)
    return acc, y_true, y_pred

def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    total, correct = 0, 0
    for x, y in loader:
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad(set_to_none=True)
        logits = model(x)
        loss = criterion(logits, y)
        loss.backward()
        optimizer.step()
        running_loss += loss.item() * y.size(0)
        total += y.size(0)
        correct += (logits.argmax(1) == y).sum().item()
    return running_loss / total, correct / total

print(" Baseline model and utilities ready")
print(f"Model params: {sum(p.numel() for p in SmallCNN().parameters()):,}")


âœ… Baseline model + utilities ready!
Model params: 666,538


In [5]:
# Train Baseline SmallCNN - 15 epochs
import torch.optim as optim
import numpy as np

# Initialization
baseline = SmallCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(baseline.parameters(), lr=1e-3, weight_decay=1e-4)

# Track best validation accuracy
best_val_acc = 0.0
best_state = None

print("Starting baseline training...")
print("-" * 60)

for epoch in range(1, 16):
    # Train
    train_loss, train_acc = train_one_epoch(baseline, train_loader, optimizer, criterion)

    # Validate
    val_acc, _, _ = evaluate(baseline, val_loader)

    # Save best model
    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in baseline.state_dict().items()}

    print(f"Epoch {epoch:2d} | Loss: {train_loss:.4f} | Train: {train_acc:.3f} | Val: {val_acc:.3f}")

# Load best model
baseline.load_state_dict(best_state)
print("\n Baseline training complete")
print(f"Best val accuracy: {best_val_acc:.3f}")


Starting baseline training...
------------------------------------------------------------
Epoch  1 | Loss: 1.5260 | Train: 0.436 | Val: 0.576
Epoch  2 | Loss: 1.0917 | Train: 0.609 | Val: 0.669
Epoch  3 | Loss: 0.8691 | Train: 0.696 | Val: 0.716
Epoch  4 | Loss: 0.7354 | Train: 0.745 | Val: 0.760
Epoch  5 | Loss: 0.6318 | Train: 0.778 | Val: 0.777
Epoch  6 | Loss: 0.5516 | Train: 0.806 | Val: 0.782
Epoch  7 | Loss: 0.4805 | Train: 0.830 | Val: 0.785
Epoch  8 | Loss: 0.4151 | Train: 0.852 | Val: 0.790
Epoch  9 | Loss: 0.3619 | Train: 0.871 | Val: 0.793
Epoch 10 | Loss: 0.3064 | Train: 0.891 | Val: 0.789
Epoch 11 | Loss: 0.2728 | Train: 0.901 | Val: 0.797
Epoch 12 | Loss: 0.2314 | Train: 0.917 | Val: 0.794
Epoch 13 | Loss: 0.2129 | Train: 0.923 | Val: 0.789
Epoch 14 | Loss: 0.1863 | Train: 0.934 | Val: 0.801
Epoch 15 | Loss: 0.1798 | Train: 0.936 | Val: 0.788

âœ… Baseline training complete!
Best val accuracy: 0.801


In [6]:
# Baseline test evaluation + confusion matrix
from sklearn.metrics import confusion_matrix, classification_report
import numpy as np

# Test evaluation
test_acc, y_true, y_pred = evaluate(baseline, test_loader)
print(f" Baseline TEST accuracy: {test_acc:.4f} ({test_acc*100:.1f}%)")

# Confusion matrix and classification report
cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix (rows=true, cols=pred):")
print(cm)

print("\nDetailed Classification Report:")
print(classification_report(y_true, y_pred, target_names=classes, digits=4))

# Save baseline results
baseline_results = {
    "test_accuracy": float(test_acc),
    "confusion_matrix": cm.tolist()
}
print(f"\n Baseline complete Test acc: {test_acc:.1%}")


âœ… Baseline TEST accuracy: 0.7873 (78.7%)

Confusion Matrix (rows=true, cols=pred):
[[822  10  48  13  18   1   8   7  42  31]
 [ 16 884   1   4   3   1   7   0  14  70]
 [ 54   5 695  44  70  53  55  10   4  10]
 [ 22   4  55 605  73 123  71  18   9  20]
 [ 10   2  41  41 813  20  44  19   4   6]
 [  9   2  32 155  42 692  25  33   2   8]
 [  2   3  32  39  32  18 862   3   6   3]
 [ 16   3  35  48  76  51   7 753   1  10]
 [ 56  20   8   7   8   5   7   4 857  28]
 [ 28  49   2   8   1   2   6   5   9 890]]

Detailed Classification Report:
              precision    recall  f1-score   support

    airplane     0.7942    0.8220    0.8079      1000
  automobile     0.9002    0.8840    0.8920      1000
        bird     0.7323    0.6950    0.7132      1000
         cat     0.6276    0.6050    0.6161      1000
        deer     0.7157    0.8130    0.7612      1000
         dog     0.7164    0.6920    0.7040      1000
        frog     0.7894    0.8620    0.8241      1000
       horse     0

In [7]:
# ResNet18 transfer learning and augmentation
import torchvision.models as models
from torchvision import transforms

print("Setting up ResNet18 (ImageNet pretrained)...")

# ResNet transforms
mean_imagenet = (0.485, 0.456, 0.406)
std_imagenet  = (0.229, 0.224, 0.225)


tfm_strong_train = transforms.Compose([
    transforms.Resize(256),
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean_imagenet, std_imagenet),
])

# Test transforms without augmentation
tfm_strong_test = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean_imagenet, std_imagenet),
])

# Reload datasets with the new transforms
train_full_strong = datasets.CIFAR10(root="./data", train=True, download=False, transform=tfm_strong_train)
test_set_strong   = datasets.CIFAR10(root="./data", train=False, download=False, transform=tfm_strong_test)

# Same stratified split indices work
strong_train_loader, strong_val_loader, strong_test_loader = make_loaders(
    train_full_strong, test_set_strong, train_idx, val_idx, batch_size=64
)

print(f"ResNet dataloaders ready Batch size: 64")
print(f"Train batches: {len(strong_train_loader)}, Val: {len(strong_val_loader)}")


Setting up ResNet18 (ImageNet pretrained)...
ResNet dataloaders ready Batch size: 64
Train batches: 704, Val: 79


In [8]:
#Train ResNet18 (frozen backbone --- full fine-tune)
import torch.optim as optim
import torch.nn as nn

# Load pretrained ResNet18, replace classifier
resnet18 = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
resnet18.fc = nn.Linear(resnet18.fc.in_features, 10)  # CIFAR-10 classes
resnet18 = resnet18.to(device)

print(f"ResNet18 total params: {sum(p.numel() for p in resnet18.parameters()):,}")
print("Phase 1: Freeze backbone, train classifier only")

# Phase 1: Freeze backbone, train classifier
for name, param in resnet18.named_parameters():
    if not name.startswith('fc.'):
        param.requires_grad = False

criterion = nn.CrossEntropyLoss(label_smoothing=0.1)
optimizer = optim.AdamW([p for p in resnet18.parameters() if p.requires_grad],
                       lr=3e-3, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=8)

best_val_acc = 0.0
best_state = None

for epoch in range(1, 9):
    train_loss, train_acc = train_one_epoch(resnet18, strong_train_loader, optimizer, criterion)
    val_acc, _, _ = evaluate(resnet18, strong_val_loader)
    scheduler.step()

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in resnet18.state_dict().items()}

    print(f"Frozen [{epoch:2d}/8] | Loss: {train_loss:.3f} | Train: {train_acc:.3f} | Val: {val_acc:.3f}")

resnet18.load_state_dict(best_state)
print(f"\n Phase 1 complete! Best val: {best_val_acc:.1%}")

print("\nPhase 2: Unfreeze all, fine-tune (lr=1e-4)...")

# Phase 2: Unfreeze everything, lower LR
for param in resnet18.parameters():
    param.requires_grad = True

optimizer = optim.AdamW(resnet18.parameters(), lr=1e-4, weight_decay=1e-4)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=12)

for epoch in range(1, 13):
    train_loss, train_acc = train_one_epoch(resnet18, strong_train_loader, optimizer, criterion)
    val_acc, _, _ = evaluate(resnet18, strong_val_loader)
    scheduler.step()

    if val_acc > best_val_acc:
        best_val_acc = val_acc
        best_state = {k: v.cpu().clone() for k, v in resnet18.state_dict().items()}

    print(f"Fine-tune [{epoch:2d}/12] | Loss: {train_loss:.3f} | Train: {train_acc:.3f} | Val: {val_acc:.3f}")

resnet18.load_state_dict(best_state)
print(f"\n ResNet18 training COMPLETE! Final best val: {best_val_acc:.1%}")


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 44.7M/44.7M [00:00<00:00, 86.9MB/s]


ResNet18 total params: 11,181,642
Phase 1: Freeze backbone, train classifier only...
Frozen [ 1/8] | Loss: 1.194 | Train: 0.713 | Val: 0.744
Frozen [ 2/8] | Loss: 1.119 | Train: 0.750 | Val: 0.743
Frozen [ 3/8] | Loss: 1.106 | Train: 0.757 | Val: 0.764
Frozen [ 4/8] | Loss: 1.086 | Train: 0.765 | Val: 0.777
Frozen [ 5/8] | Loss: 1.072 | Train: 0.771 | Val: 0.790
Frozen [ 6/8] | Loss: 1.055 | Train: 0.781 | Val: 0.774
Frozen [ 7/8] | Loss: 1.040 | Train: 0.786 | Val: 0.785
Frozen [ 8/8] | Loss: 1.032 | Train: 0.789 | Val: 0.785

âœ… Phase 1 complete! Best val: 79.0%

Phase 2: Unfreeze all, fine-tune (lr=1e-4)...
Fine-tune [ 1/12] | Loss: 0.809 | Train: 0.893 | Val: 0.933
Fine-tune [ 2/12] | Loss: 0.679 | Train: 0.947 | Val: 0.938
Fine-tune [ 3/12] | Loss: 0.629 | Train: 0.967 | Val: 0.953
Fine-tune [ 4/12] | Loss: 0.597 | Train: 0.979 | Val: 0.955
Fine-tune [ 5/12] | Loss: 0.578 | Train: 0.985 | Val: 0.956
Fine-tune [ 6/12] | Loss: 0.559 | Train: 0.992 | Val: 0.959
Fine-tune [ 7/12] | L

In [9]:
#Final test and  comparison
from sklearn.metrics import confusion_matrix, classification_report
import pandas as pd
import json

# Test ResNet18
resnet_test_acc, y_true_res, y_pred_res = evaluate(resnet18, strong_test_loader)
print(f"ResNet18 TEST accuracy: {resnet_test_acc:.4f} ({resnet_test_acc*100:.1f}%)")

# ResNet confusion matrix and report
cm_res = confusion_matrix(y_true_res, y_pred_res)
print("\nResNet Confusion Matrix (first 5 rows):")
print(cm_res[:5])
print("\nResNet Classification Report (top 5):")
report_res = classification_report(y_true_res, y_pred_res, target_names=classes, digits=4, output_dict=True)
for cls in classes[:5]:
    print(f"{cls}: {report_res[cls]['f1-score']:.4f}")

# Comparing
baseline_test_acc = 0.7873
baseline_val_acc = 0.801
resnet_val_acc = 0.968

comparison = pd.DataFrame({
    "Model": ["Baseline CNN", "ResNet18 TL"],
    "Test Acc": [f"{baseline_test_acc:.1%}", f"{resnet_test_acc:.1%}"],
    "Val Acc": [f"{baseline_val_acc:.1%}", f"{resnet_val_acc:.1%}"],
    "Improvement": ["-", f"+{((resnet_test_acc/baseline_test_acc-1)*100):.0f}%"]
})
print("\nModel Comparison:")
print(comparison.to_markdown(index=False))

# Saving results JSON for readme
final_results = {
    "baseline_test_acc": float(baseline_test_acc),
    "resnet_test_acc": float(resnet_test_acc),
    "classes": classes
}
print("\nResults saved! Copy final_results to metrics.json")
print(json.dumps(final_results, indent=2))


ResNet18 TEST accuracy: 0.9640 (96.4%)

ResNet Confusion Matrix (first 5 rows):
[[973   0   5   1   0   1   1   2  13   4]
 [  1 982   0   1   0   0   0   0   3  13]
 [  6   0 961   8   9   7   5   3   1   0]
 [  3   0   9 922  14  40   5   2   4   1]
 [  1   0   4   9 976   1   2   7   0   0]]

ResNet Classification Report (top 5):
airplane: 0.9725
automobile: 0.9796
bird: 0.9639
cat: 0.9160
deer: 0.9654

Model Comparison:
| Model        | Test Acc   | Val Acc   | Improvement   |
|:-------------|:-----------|:----------|:--------------|
| Baseline CNN | 78.7%      | 80.1%     | -             |
| ResNet18 TL  | 96.4%      | 96.8%     | +22%          |

Results saved! Copy final_results to metrics.json
{
  "baseline_test_acc": 0.7873,
  "resnet_test_acc": 0.964,
  "classes": [
    "airplane",
    "automobile",
    "bird",
    "cat",
    "deer",
    "dog",
    "frog",
    "horse",
    "ship",
    "truck"
  ]
}
