In [None]:
import os
import random
import gc
import cv2
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import wandb
from tqdm import tqdm

# Set computation device and default dtype
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
DTYPE = torch.float32
print(f"Using device: {device}\n")


In [None]:
CLASS_NAMES = [
    "Amphibia", "Animalia", "Arachnida", "Aves", "Fungi",
    "Insecta", "Mammalia", "Mollusca", "Plantae", "Reptilia"
]
DATA_URL = "https://storage.googleapis.com/wandb_dataset/nature_12K.zip"
ZIP_FILE = "nature_12K.zip"
DATA_FOLDER = "nature_12K"
IMAGE_SIZE = (224, 224)

In [None]:
def fetch_and_extract(url: str, zip_name: str, extract_to: str) -> None:
    """
    Download a zip file from `url` and extract it to `extract_to` directory.
    """
    if not os.path.isdir(extract_to):
        if not os.path.isfile(zip_name):
            os.system(f"wget {url} -O {zip_name}")
        os.system(f"unzip -q {zip_name} -d {extract_to}")
        os.remove(zip_name)

In [None]:
def get_data_loaders(
    data_dir: str,
    batch_size: int,
    val_split: float = 0.2,
    test_split: float = 0.1,
    seed: int = 42
):
    """
    Prepare DataLoaders for training, validation, and testing.
    """
    transform = transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
    dataset = datasets.ImageFolder(data_dir, transform=transform)
    indices = list(range(len(dataset)))
    train_val_idx, test_idx = train_test_split(indices, test_size=test_split, random_state=seed)
    train_idx, val_idx = train_test_split(
        train_val_idx,
        test_size=val_split / (1 - test_split),
        random_state=seed
    )
    train_set = Subset(dataset, train_idx)
    val_set = Subset(dataset, val_idx)
    test_set = Subset(dataset, test_idx)
    return (
        DataLoader(train_set, batch_size=batch_size, shuffle=True),
        DataLoader(val_set, batch_size=batch_size, shuffle=False),
        DataLoader(test_set, batch_size=batch_size, shuffle=False)
    )

In [None]:
class SimpleCNN(nn.Module):
    def __init__(
        self,
        conv_channels: list,
        conv_kernels: list,
        hidden_dim: int,
        dropout_rate: float = 0.5
    ):
        super(SimpleCNN, self).__init__()
        layers = []
        in_channels = 3
        for out_ch, kernel in zip(conv_channels, conv_kernels):
            layers += [
                nn.Conv2d(in_channels, out_ch, kernel_size=kernel),
                nn.ReLU(inplace=True),
                nn.MaxPool2d(2)
            ]
            in_channels = out_ch
        self.features = nn.Sequential(*layers)
        # compute flattened features size
        with torch.no_grad():
            dummy = torch.zeros(1, 3, *IMAGE_SIZE)
            feat = self.features(dummy)
            flat_size = feat.numel()
        self.classifier = nn.Sequential(
            nn.Flatten(),
            nn.Linear(flat_size, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_dim, len(CLASS_NAMES))
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        x = self.features(x)
        return self.classifier(x)


In [None]:
def train_one_epoch(
    model: nn.Module,
    dataloader: DataLoader,
    criterion,
    optimizer
):
    """
    Perform one epoch of training.
    """
    model.train()
    total_loss = 0.0
    preds, targets = [], []
    for images, labels in tqdm(dataloader, desc="Train", leave=False):
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
        preds += outputs.argmax(1).cpu().tolist()
        targets += labels.cpu().tolist()
    train_acc = accuracy_score(targets, preds)
    return total_loss / len(dataloader), train_acc

def validate(
    model: nn.Module,
    dataloader: DataLoader,
    criterion
):
    """
    Evaluate model on validation set.
    """
    model.eval()
    total_loss = 0.0
    preds, targets = [], []
    with torch.no_grad():
        for images, labels in tqdm(dataloader, desc="Validate", leave=False):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            total_loss += criterion(outputs, labels).item()
            preds += outputs.argmax(1).cpu().tolist()
            targets += labels.cpu().tolist()
    val_acc = accuracy_score(targets, preds)
    return total_loss / len(dataloader), val_acc

def test_performance(
    model: nn.Module,
    dataloader: DataLoader
):
    """
    Generate classification report and confusion matrix on test set.
    """
    model.eval()
    preds, targets = [], []
    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            outputs = model(images)
            preds += outputs.argmax(1).cpu().tolist()
            targets += labels.tolist()
    print("Classification Report:")
    print(classification_report(targets, preds, target_names=CLASS_NAMES))
    print("Confusion Matrix:")
    print(confusion_matrix(targets, preds))


In [None]:
def main():
    wandb.init(
        project="Deep_Learning_Assignment2_cs24m022",
        config={
            "batch_size": 32,
            "learning_rate": 1e-4,
            "conv_channels": [16, 32, 64],
            "conv_kernels": [3, 3, 3],
            "hidden_dim": 256,
            "dropout_rate": 0.5,
            "epochs": 10
        }
    )
    cfg = wandb.config
    fetch_and_extract(DATA_URL, ZIP_FILE, DATA_FOLDER)
    train_loader, val_loader, test_loader = get_data_loaders(
        os.path.join(DATA_FOLDER, "train"), cfg.batch_size
    )
    model = SimpleCNN(
        cfg.conv_channels,
        cfg.conv_kernels,
        cfg.hidden_dim,
        cfg.dropout_rate
    ).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=cfg.learning_rate)
    for epoch in range(cfg.epochs):
        train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer)
        val_loss, val_acc = validate(model, val_loader, criterion)
        wandb.log({
            "epoch": epoch,
            "train_loss": train_loss,
            "train_acc": train_acc,
            "val_loss": val_loss,
            "val_acc": val_acc
        })
        print(
            f"Epoch {epoch+1}/{cfg.epochs}: "
            f"train_loss={train_loss:.4f}, train_acc={train_acc:.4f}, "
            f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}"
        )
    test_performance(model, test_loader)
    print("Training and evaluation completed.")

# === Cell 8: Entry Point ===
if __name__ == "__main__":
    main()