## Downloading datasets

In [None]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip
!unzip -q nature_12K.zip

--2025-04-19 04:04:38--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 74.125.143.207, 173.194.69.207, 173.194.79.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|74.125.143.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: ‘nature_12K.zip’


2025-04-19 04:06:11 (39.3 MB/s) - ‘nature_12K.zip’ saved [3816687935/3816687935]



# PART A : Training CNN from Scratch

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split
import wandb

# === Utility ===
def get_activation(name):
    activations = {
        'relu': nn.ReLU(),
        'gelu': nn.GELU(),
        'silu': nn.SiLU(),
        'mish': nn.Mish(),
    }
    return activations.get(name.lower(), nn.ReLU())

## Question 1: Building the CNN Model

## Explanation:

*   This implements a flexible CNN with configurable:
  *   Number of convolutional layers (conv_layers)
  *   Filters per layer (filters_per_layer)
  *   Kernel size (kernel_size)
  *   Activation functions (activation)
  *   Dense layer neurons (dense_neurons)

*   Each conv block contains:
  *   Convolutional layer with padding to maintain spatial dimensions
  *   Activation function (ReLU, GELU, SiLU, or Mish)
  *   Max pooling with 2x2 window

*   The model ends with:
  *   Flattening layer

  *   One dense layer with ReLU activation

  *   Output layer with neurons equal to number of classes

To answer the question about computations and parameters:

1. Total computations = Sum of (operations in each conv layer) + (operations in dense layer)

2. Total parameters = Sum of (weights in conv layers) + (weights in dense layers)

In [None]:
class CustomCNN(nn.Module):
    def __init__(
        self,
        input_channels=3,
        num_classes=10,
        conv_layers=3,
        filters_per_layer=[32, 64, 128],
        kernel_size=3,
        activation='relu',
        dense_neurons=256,
        input_size=(64, 64)
    ):
        super(CustomCNN, self).__init__()

        self.conv_blocks = nn.Sequential()
        in_channels = input_channels
        height, width = input_size

        for i in range(conv_layers):
            out_channels = filters_per_layer[i]
            self.conv_blocks.append(nn.Conv2d(in_channels, out_channels, kernel_size, padding=1))
            self.conv_blocks.append(get_activation(activation))
            self.conv_blocks.append(nn.MaxPool2d(kernel_size=2))
            in_channels = out_channels
            height //= 2
            width //= 2

        self.flattened_size = in_channels * height * width
        self.fc1 = nn.Linear(self.flattened_size, dense_neurons)
        self.fc_out = nn.Linear(dense_neurons, num_classes)

    def forward(self, x):
        x = self.conv_blocks(x)
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc_out(x)

## Question 2: Training and Hyperparameter Tuning

## Explanation:

*   Training process:
  *   Batches are loaded and sent to the appropriate device (CPU/GPU)
  *   Forward pass, loss calculation, backpropagation
  *   Metrics logged to Weights & Biases (wandb)

*   Validation:
  *   Model set to evaluation mode
  *   No gradient calculation for faster inference
  *   Accuracy and loss calculated on validation set

*   Hyperparameters being tuned:
  *   Number of filters (32, 64, 128)
  *   Activation functions (ReLU, GELU, SiLU, Mish)
  *   Filter organization (same, doubling, halving)
  *   Batch normalization (present in enhanced version)
  *   Dropout (present in enhanced version)

In [None]:
def train_model(model, train_loader, val_loader, optimizer, criterion, device, epochs=10):
    model.to(device)
    for epoch in range(epochs):
        model.train()
        running_loss, correct = 0.0, 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            correct += (outputs.argmax(1) == labels).sum().item()

        acc = correct / len(train_loader.dataset)
        wandb.log({"train_loss": running_loss / len(train_loader), "train_acc": acc, "epoch": epoch})

        # Validation
        model.eval()
        val_loss, val_correct = 0.0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()
                val_correct += (outputs.argmax(1) == labels).sum().item()

        val_acc = val_correct / len(val_loader.dataset)
        wandb.log({"val_loss": val_loss / len(val_loader), "val_acc": val_acc})
        print(f"Epoch {epoch+1}, Val Acc: {val_acc:.4f}")

## Question 3: Observations from Plots

Code that generates the plots:
The wandb.log() calls throughout the training process automatically generate:

*   Accuracy vs epoch plots
*   Loss vs epoch plots
*   Parallel coordinates plots
*   Correlation summary tables

Key observations to make:
1. How different activation functions affect training
2. Impact of increasing filter numbers
3. Effect of different filter organization patterns
4. How batch normalization affects training stability
5. Effect of dropout on overfitting

## Question 4: Testing the Best Model

## Explanation:

*   After hyperparameter tuning, select the best model
*   configuration
*   Evaluate on the untouched test set
*   Generate 10×3 grid of sample images with predictions
*   Visualize filters from first convolutional layer

In [None]:
# In the enhanced version, this would be added:
def evaluate_test_set(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return correct / total

# And to visualize predictions:
# def visualize_predictions(model, test_loader, class_names, num_samples=10):
    # Implementation as shown in enhanced version

## Question 5: GitHub Submission

## Done ✅

## Main Execution

## Explanation:

1. Initializes wandb for experiment tracking
2. Sets up data transformations and loading
3. Creates 80/20 train/validation split
4. Initializes model with configurable parameters
5. Sets up loss function and optimizer
6. Starts training process

In [None]:
def run():
    wandb.init(project="da6401-assignment2", config={
        "conv_layers": 3,
        "filters": [32, 64, 128],
        "kernel_size": 3,
        "activation": "relu",
        "dense_neurons": 256,
        "batch_size": 64,
        "epochs": 10,
        "lr": 1e-3
    })
    config = wandb.config

    transform = transforms.Compose([
        transforms.Resize((64, 64)),
        transforms.ToTensor(),
    ])

    dataset = datasets.ImageFolder("./inaturalist_12K/train", transform=transform)
    val_size = int(0.2 * len(dataset))
    train_size = len(dataset) - val_size
    train_set, val_set = random_split(dataset, [train_size, val_size])

    train_loader = DataLoader(train_set, batch_size=config.batch_size, shuffle=True)
    val_loader = DataLoader(val_set, batch_size=config.batch_size)

    model = CustomCNN(
        conv_layers=config.conv_layers,
        filters_per_layer=config.filters,
        kernel_size=config.kernel_size,
        activation=config.activation,
        dense_neurons=config.dense_neurons,
        input_size=(64, 64)
    )

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    train_model(model, train_loader, val_loader, optimizer, criterion, device, config.epochs)

if __name__ == "__main__":
    run()

# ed57ccb8a48835266e803f637f8b571506709c5d

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33myogesh084arya[0m ([33myogesh084arya-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Epoch 1, Val Acc: 0.2316
Epoch 2, Val Acc: 0.2481
Epoch 3, Val Acc: 0.2596
Epoch 4, Val Acc: 0.3002
Epoch 5, Val Acc: 0.3112
Epoch 6, Val Acc: 0.2911
Epoch 7, Val Acc: 0.3132
Epoch 8, Val Acc: 0.3252
Epoch 9, Val Acc: 0.3222
Epoch 10, Val Acc: 0.3137
