# Iteration 1
This notebook demonstrates the `train` and `test` functions for the dataset. The focus is to setup the basic model to have a good accuracy.

In [7]:
import torch
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from models.resnet50 import Net
from train_test.train import train, train_losses, train_acc,AlbumentationsWrapper

from train_test.train import train_aug,train_transforms, test_transforms,test_aug,resume_if_available,load_checkpoint,get_latest_checkpoint,save_checkpoint
from train_test.test import test, test_losses
from torchsummary import summary

In [8]:
import os, shutil
from pathlib import Path

def build_tiny_imagenet_val_per_class(root):
    val_dir = Path(root)/"val"
    images_dir = val_dir/"images"
    ann_path = val_dir/"val_annotations.txt"
    out_root = Path(root)/"val_per_class"

    if out_root.exists():
        return str(out_root)

    out_root.mkdir(parents=True, exist_ok=True)
    mapping = {}
    with open(ann_path) as f:
        for line in f:
            img, cls, *_ = line.strip().split("\t")
            mapping[img] = cls

    for img_name, cls in mapping.items():
        src = images_dir/img_name
        dst_dir = out_root/cls/"images"
        dst_dir.mkdir(parents=True, exist_ok=True)
        # use hardlink/symlink to save space if you prefer
        shutil.copy2(src, dst_dir/img_name)
    return str(out_root)

In [9]:
# Add this cell - Tiny ImageNet is much smaller (200 classes, 64x64 images)
import wget
import zipfile
import os

def download_tiny_imagenet(data_dir='./data'):
    """
    Download Tiny ImageNet (200 classes, 64x64 images)
    Total size: ~240MB
    """
    url = 'http://cs231n.stanford.edu/tiny-imagenet-200.zip'
    zip_path = os.path.join(data_dir, 'tiny-imagenet-200.zip')
    extract_path = os.path.join(data_dir, 'tiny-imagenet-200')
    
    if not os.path.exists(extract_path):
        print("Downloading Tiny ImageNet...")
        os.makedirs(data_dir, exist_ok=True)
        
        # Download
        wget.download(url, zip_path)
        
        # Extract
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(data_dir)
        
        os.remove(zip_path)  # Clean up zip file
        print(f"\nTiny ImageNet downloaded to {extract_path}")
    
    return extract_path

# Use it
tiny_imagenet_path = download_tiny_imagenet()
val_root = build_tiny_imagenet_val_per_class(tiny_imagenet_path)



# Create datasets
train_dataset = datasets.ImageFolder(
    root=os.path.join(tiny_imagenet_path, 'train'),
    transform=None
)

val_dataset = datasets.ImageFolder(
    root=val_root,
    transform=None
)

print(f"Train samples: {len(train_dataset)}")
print(f"Val samples: {len(val_dataset)}")
print(f"Number of classes: {len(train_dataset.classes)}")

Train samples: 100000
Val samples: 10000
Number of classes: 200


In [10]:
# Dataset and DataLoader




train_ds = AlbumentationsWrapper(train_dataset, train_aug)
test_ds  = AlbumentationsWrapper(val_dataset,  test_aug)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True,num_workers=0)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False,num_workers=0)


In [11]:
import numpy as np
import torch
import matplotlib.pyplot as plt

# CIFAR-10 stats used in your Normalize
MEAN = (0.4914, 0.4822, 0.4465)
STD  = (0.2470, 0.2435, 0.2616)

def show_batch(tensors, labels=None, n=20):
    """
    tensors: (B, C, H, W) normalized by MEAN/STD
    labels:  (B,) optional
    """
    if isinstance(tensors, np.ndarray):
            tensors = torch.from_numpy(tensors)
    if tensors.dim() == 3:
        tensors = tensors.unsqueeze(0)
        if labels is not None and not isinstance(labels, (list, tuple)):
            labels = [labels]
    b = min(n, tensors.size(0))
    mean = torch.tensor(MEAN, device=tensors.device).view(1, 3, 1, 1)
    std  = torch.tensor(STD,  device=tensors.device).view(1, 3, 1, 1)

    # de-normalize to [0,1] range
    imgs = tensors * std + mean
    imgs = imgs.clamp(0, 1)  # safe for display

    cols = 10
    rows = (b + cols - 1) // cols
    plt.figure(figsize=(1.6*cols, 1.6*rows))
    for i in range(b):
        plt.subplot(rows, cols, i+1)
        img = imgs[i].permute(1, 2, 0).detach().cpu().numpy()  # HWC
        plt.imshow(img)
        if labels is not None:
            plt.title(int(labels[i]))
        plt.axis('off')
    plt.tight_layout()
    plt.show()#Let us visualize few sample images


#batch_data, batch_label = next(iter(train_loader))
#print(batch_data.shape)  # Should be (batch_size, 3, 32, 32)
#show_batch(batch_data, batch_label, n=20)

#@show_batch(batch_data, batch_label, n=20)
#fig = plt.figure(figsize=(15, 10))  # Increased figure size




In [12]:
from torch.optim.lr_scheduler import StepLR
# Model, device, optimizer
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net(num_classes=200).to(device)
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4)
summary(model, input_size=(3, 244, 244))  # Adjust input size for ImageNet

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 122, 122]           9,408
       BatchNorm2d-2         [-1, 64, 122, 122]             128
              ReLU-3         [-1, 64, 122, 122]               0
           Dropout-4         [-1, 64, 122, 122]               0
         MaxPool2d-5           [-1, 64, 61, 61]               0
            Conv2d-6           [-1, 64, 61, 61]           4,096
       BatchNorm2d-7           [-1, 64, 61, 61]             128
              ReLU-8           [-1, 64, 61, 61]               0
           Dropout-9           [-1, 64, 61, 61]               0
           Conv2d-10           [-1, 64, 61, 61]          36,864
      BatchNorm2d-11           [-1, 64, 61, 61]             128
             ReLU-12           [-1, 64, 61, 61]               0
          Dropout-13           [-1, 64, 61, 61]               0
           Conv2d-14          [-1, 256,

In [None]:
from torch.optim.lr_scheduler import StepLR

def run(model, optimizer, train_loader, val_loader, epochs=50, ckpt_dir="./checkpoints",
        device="cuda", scheduler=None):
    os.makedirs(ckpt_dir, exist_ok=True)
    model.to(device)

    # default if none provided
    if scheduler is None:
        scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    # resume + carry over previous best
    best_val_acc = 0.0
    latest = get_latest_checkpoint(ckpt_dir)
    if latest is not None:
        try:
            chk = torch.load(latest, map_location="cpu")
            best_val_acc = float(chk.get("best_val_acc", 0.0))
        except Exception:
            pass

    start_epoch = resume_if_available(ckpt_dir, model, optimizer, scheduler)

    for epoch in range(start_epoch, epochs):
        print(f"\nEPOCH: {epoch}")

        train_loss, train_acc_epoch = train(model, device, train_loader, optimizer, epoch)
        val_loss, val_acc = test(model, device, val_loader)

        # step LR (use val_loss if your scheduler needs a metric)
        try:
            scheduler.step()
        except TypeError:
            scheduler.step(val_loss)

        save_checkpoint(
            ckpt_dir, model, optimizer, epoch, scheduler,
            train_loss=float(train_loss),
            train_acc=float(train_acc_epoch),
            val_loss=float(val_loss),
            val_acc=float(val_acc),
            best_val_acc=float(best_val_acc),
        )

        if val_acc > best_val_acc:
            best_val_acc = float(val_acc)
            torch.save(
                {
                    "epoch": epoch,
                    "model_state": model.state_dict(),
                    "optimizer_state": optimizer.state_dict(),
                    "scheduler_state": scheduler.state_dict(),
                    "val_acc": best_val_acc,
                    "best_val_acc": best_val_acc,
                },
                os.path.join(ckpt_dir, "best.pth"),
            )

        print(
            f"[Epoch {epoch}] "
            f"train_loss={train_loss:.4f}  train_acc={train_acc_epoch:.2f}%  "
            f"val_loss={val_loss:.4f}    val_acc={val_acc:.2f}%  "
            f"best={best_val_acc:.2f}%"
        )

EPOCH: 0


Loss=9.436583518981934 Batch_id=1 Accuracy=0.00:   0%|          | 2/1563 [03:18<43:31:38, 100.38s/it]

In [None]:
run(
    model=model,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=test_loader,
    epochs=50,
    ckpt_dir="./checkpoints",
    device=device,
)

In [None]:
# Plotting results
import matplotlib.pyplot as plt
fig, axs = plt.subplots(2,2,figsize=(15,10))
axs[0, 0].plot([t.item() for t in train_losses])
axs[0, 0].set_title("Training Loss")
axs[1, 0].plot(train_acc)
axs[1, 0].set_title("Training Accuracy")
axs[0, 1].plot(test_losses)
axs[0, 1].set_title("Test Loss")
axs[1, 1].plot(test_acc)
axs[1, 1].set_title("Test Accuracy")
plt.show()