**DA6401-Assignment2-PartA**

Imports and Setup

In [None]:
# Install wandb
!pip install -q wandb
import wandb
wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mna21b075[0m ([33mna21b075-indian-institute-of-technology-madras[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

**QUESTION 1**

In [None]:
#Import necessary libraries for deep learning, data handling, and utilities
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, Subset
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np
import random

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Function to set random seeds for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
# Setting seed for consistent results across runs
set_seed()

In [None]:
#Function to create train and validation dataloaders with optional data augmentation
def get_dataloaders(batch_size=32, aug=False):
    #Initialize a list to hold the sequence of image transformations
    transform_list = []
# If augmentation is enabled, we will add random horizontal flip and random rotation
    if aug:
        transform_list.extend([
            transforms.RandomHorizontalFlip(),
            transforms.RandomRotation(15),
        ])

    transform_list.extend([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406],
                             [0.229, 0.224, 0.225])
    ])

    transform = transforms.Compose(transform_list)
    #dataset loading from the 'train' directory with the specified transforms
    dataset = ImageFolder('inaturalist_12K/train', transform=transform)
    labels = [dataset[i][1] for i in range(len(dataset))]
# dataset indices split into training and validation sets with stratified sampling
    train_idx, val_idx = train_test_split(
        list(range(len(dataset))),
        test_size=0.2,
        stratify=labels,
        random_state=42
    )
#dataloaders for the training and validation sets
    train_loader = DataLoader(Subset(dataset, train_idx), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(Subset(dataset, val_idx), batch_size=batch_size, shuffle=False)

    return train_loader, val_loader, len(dataset.classes)


In [None]:
#A custom CNN model class with configurable architecture
class CustomCNN(nn.Module):
    def __init__(self, config, num_classes):
        super().__init__()
        self.config = config
        act = self.get_activation(config.activ)
        self.layers = nn.ModuleList()

        in_channels = 3
        filters = config.num_filters

        # Input Conv Layer
        self.layers.append(nn.Conv2d(in_channels, filters, config.ker_size_input, padding=1))
        self.layers.append(act)
        self.layers.append(nn.MaxPool2d(kernel_size=2))

        # Intermediate Conv Blocks
        for _ in range(4):
            if config.filter_org == 'double':
                filters *= 2
            elif config.filter_org == 'half':
                filters = max(16, filters // 2)
            self.layers.append(nn.Conv2d(in_channels=filters // 2 if config.filter_org != 'same' else filters,
                                         out_channels=filters,
                                         kernel_size=3, padding=1))
            if config.bn:
                self.layers.append(nn.BatchNorm2d(filters))
            self.layers.append(act)
            self.layers.append(nn.MaxPool2d(kernel_size=2))

        self.conv_out = nn.Sequential(*self.layers)

        # Compute final size
        dummy = torch.randn(1, 3, 224, 224)
        with torch.no_grad():
            dummy_out = self.conv_out(dummy)
        flatten_size = dummy_out.view(1, -1).shape[1]

        self.fc1 = nn.Linear(flatten_size, config.num_nodes)
        self.bn_or_dp = nn.BatchNorm1d(config.num_nodes) if config.bn_vs_dp == 'bn' else nn.Dropout(config.dropout)
        self.output = nn.Linear(config.num_nodes, num_classes)
#Helper function to retrieve activation function by name
    def get_activation(self, name):
        return {
            'relu': nn.ReLU(),
            'silu': nn.SiLU(),
            'gelu': nn.GELU(),
            'mish': nn.Mish()
        }[name]
#orward pass through the network
    def forward(self, x):
        x = self.conv_out(x)
        x = x.view(x.size(0), -1)
        x = self.fc1(x)
        x = self.bn_or_dp(x)
        x = self.get_activation(self.config.activ)(x)
        return self.output(x)


**QUESTION 2**

20% of the training data, as validation data, for hyperparameter tuning

In [None]:
#Function to train the model using provided dataloaders and configuration
def train(model, train_loader, val_loader, config):
    model = model.to(device)
    # loss function
    criterion = nn.CrossEntropyLoss()
    #optimizer based on configuration
    optimizer = {
        'adam': torch.optim.Adam(model.parameters(), lr=config.learning_rate),
        'sgd': torch.optim.SGD(model.parameters(), lr=config.learning_rate, momentum=0.9),
        'rmsprop': torch.optim.RMSprop(model.parameters(), lr=config.learning_rate)
    }[config.optimizer]
    #Training loop over epochs
    for epoch in range(config.epochs):
        model.train()
        running_loss = 0
        correct = 0
        total = 0

        #Loop over training data batches
        for inputs, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}"):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs, 1) #batch accuracy
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_acc = correct / total #training accuracy for the epoch
        val_acc, val_loss = evaluate(model, val_loader) # model evaluation on validation data

        wandb.log({
            "train_loss": running_loss / len(train_loader),
            "train_accuracy": train_acc,
            "val_loss": val_loss,
            "val_accuracy": val_acc
        })
# Function to evaluate model performance on a given dataset
def evaluate(model, loader):
    model.eval()
    criterion = nn.CrossEntropyLoss()
    correct = 0
    total = 0
    val_loss = 0

    with torch.no_grad():
        for inputs, labels in loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            val_loss += criterion(outputs, labels).item()
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    return correct / total, val_loss / len(loader) #overall accuracy and average loss over the dataset


In [None]:
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'learning_rate': {'values': [1e-3, 1e-4]},
        'activ': {'values': ['relu']},
        'bn': {'values': [0, 1]},
        'num_filters': {'values': [32, 64]},
        'filter_org': {'values': ['same', 'double']},
        'epochs': {'values': [5]},
        'dropout': {'values': [0.2, 0.3]},
        'data_aug': {'values': [0, 1]},
        'optimizer': {'values': ['adam', 'sgd', 'rmsprop']},
        'batch_size': {'values': [32, 64]},
        'ker_size_input': {'values': [3, 5]},
        'bn_vs_dp': {'values': ['bn', 'dp']},
        'num_nodes': {'values': [512, 1024]}
    }
}


In [None]:
def sweep_train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        train_loader, val_loader, num_classes = get_dataloaders(config.batch_size, bool(config.data_aug))
        model = CustomCNN(config, num_classes)
        train(model, train_loader, val_loader, config)

sweep_id = wandb.sweep(sweep_config, project="asgn2_q1_torch")
wandb.agent(sweep_id, function=sweep_train, count=2)


Create sweep with ID: 98dgdtoy
Sweep URL: https://wandb.ai/na21b075-indian-institute-of-technology-madras/asgn2_q1_torch/sweeps/98dgdtoy


[34m[1mwandb[0m: Agent Starting Run: hx7w58me with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bn: 1
[34m[1mwandb[0m: 	bn_vs_dp: bn
[34m[1mwandb[0m: 	data_aug: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_nodes: 1024
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1: 100%|██████████| 141/141 [02:26<00:00,  1.04s/it]
Epoch 2: 100%|██████████| 141/141 [02:22<00:00,  1.01s/it]
Epoch 3: 100%|██████████| 141/141 [02:22<00:00,  1.01s/it]
Epoch 4: 100%|██████████| 141/141 [02:24<00:00,  1.02s/it]
Epoch 5: 100%|██████████| 141/141 [02:22<00:00,  1.01s/it]


0,1
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▄▆▆█
val_loss,█▅▃▂▁

0,1
train_accuracy,0.38527
train_loss,1.78192
val_accuracy,0.364
val_loss,1.84424


[34m[1mwandb[0m: Agent Starting Run: e094i7y1 with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bn: 0
[34m[1mwandb[0m: 	bn_vs_dp: bn
[34m[1mwandb[0m: 	data_aug: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 3
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: 32
[34m[1mwandb[0m: 	num_nodes: 512
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1: 100%|██████████| 282/282 [02:25<00:00,  1.94it/s]
Epoch 2: 100%|██████████| 282/282 [02:21<00:00,  1.99it/s]
Epoch 3: 100%|██████████| 282/282 [02:21<00:00,  1.99it/s]
Epoch 4: 100%|██████████| 282/282 [02:20<00:00,  2.00it/s]
Epoch 5: 100%|██████████| 282/282 [02:23<00:00,  1.97it/s]


0,1
train_accuracy,▁▅▆▇█
train_loss,█▄▃▂▁
val_accuracy,▁▅▅▆█
val_loss,█▆▃▅▁

0,1
train_accuracy,0.3736
train_loss,1.80046
val_accuracy,0.351
val_loss,1.86412


In [None]:
sweep_config = {
    'method': 'random',
    'metric': {'name': 'val_accuracy', 'goal': 'maximize'},
    'parameters': {
        'learning_rate': {'values': [1e-3, 1e-4]},
        'activ': {'values': ['relu']},
        'bn': {'values': [0, 1]},
        'num_filters': {'values': [32, 64]},
        'filter_org': {'values': ['same', 'double']},
        'epochs': {'values': [5,10]},
        'dropout': {'values': [0.2, 0.3]},
        'data_aug': {'values': [0, 1]},
        'optimizer': {'values': ['adam', 'sgd', 'rmsprop']},
        'batch_size': {'values': [32, 64]},
        'ker_size_input': {'values': [3, 5]},
        'bn_vs_dp': {'values': ['bn', 'dp']},
        'num_nodes': {'values': [512, 1024]}
    }
}
def sweep_train(config=None):
    with wandb.init(config=config):
        config = wandb.config
        train_loader, val_loader, num_classes = get_dataloaders(config.batch_size, bool(config.data_aug))
        model = CustomCNN(config, num_classes)
        train(model, train_loader, val_loader, config)

sweep_id = wandb.sweep(sweep_config, project="asgn2_q1_torch")
wandb.agent(sweep_id, function=sweep_train, count=5)  # run 2 experiments

Create sweep with ID: wl9lsblt
Sweep URL: https://wandb.ai/na21b075-indian-institute-of-technology-madras/asgn2_q1_torch/sweeps/wl9lsblt


[34m[1mwandb[0m: Agent Starting Run: 23z0xufy with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bn: 1
[34m[1mwandb[0m: 	bn_vs_dp: dp
[34m[1mwandb[0m: 	data_aug: 0
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	num_nodes: 1024
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1: 100%|██████████| 141/141 [02:31<00:00,  1.08s/it]
Epoch 2: 100%|██████████| 141/141 [02:30<00:00,  1.07s/it]
Epoch 3: 100%|██████████| 141/141 [02:31<00:00,  1.08s/it]
Epoch 4: 100%|██████████| 141/141 [02:30<00:00,  1.07s/it]
Epoch 5: 100%|██████████| 141/141 [02:29<00:00,  1.06s/it]


0,1
train_accuracy,▁▄▆▇█
train_loss,█▅▄▂▁
val_accuracy,▁▄▃█▇
val_loss,▇▄█▁▂

0,1
train_accuracy,0.4146
train_loss,1.66267
val_accuracy,0.365
val_loss,1.83237


[34m[1mwandb[0m: Agent Starting Run: 1uwxyej5 with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bn: 1
[34m[1mwandb[0m: 	bn_vs_dp: dp
[34m[1mwandb[0m: 	data_aug: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	num_nodes: 512
[34m[1mwandb[0m: 	optimizer: rmsprop


Epoch 1: 100%|██████████| 282/282 [02:45<00:00,  1.71it/s]
Epoch 2: 100%|██████████| 282/282 [02:44<00:00,  1.72it/s]
Epoch 3: 100%|██████████| 282/282 [02:45<00:00,  1.71it/s]
Epoch 4: 100%|██████████| 282/282 [02:46<00:00,  1.69it/s]
Epoch 5: 100%|██████████| 282/282 [02:47<00:00,  1.69it/s]
Epoch 6: 100%|██████████| 282/282 [02:46<00:00,  1.69it/s]
Epoch 7: 100%|██████████| 282/282 [02:47<00:00,  1.69it/s]
Epoch 8: 100%|██████████| 282/282 [02:46<00:00,  1.69it/s]
Epoch 9: 100%|██████████| 282/282 [02:47<00:00,  1.68it/s]
Epoch 10: 100%|██████████| 282/282 [02:46<00:00,  1.69it/s]


0,1
train_accuracy,█▃▃▂▁▃▃▂▂▃
train_loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁█████████
val_loss,█▁▁▁▁▁▁▁▁▁

0,1
train_accuracy,0.09412
train_loss,2.31221
val_accuracy,0.1
val_loss,2.30259


[34m[1mwandb[0m: Agent Starting Run: 78dxzk2b with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	bn: 0
[34m[1mwandb[0m: 	bn_vs_dp: bn
[34m[1mwandb[0m: 	data_aug: 1
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	num_nodes: 512
[34m[1mwandb[0m: 	optimizer: rmsprop


Epoch 1: 100%|██████████| 282/282 [02:41<00:00,  1.74it/s]
Epoch 2: 100%|██████████| 282/282 [02:41<00:00,  1.75it/s]
Epoch 3: 100%|██████████| 282/282 [02:43<00:00,  1.73it/s]
Epoch 4: 100%|██████████| 282/282 [02:44<00:00,  1.72it/s]
Epoch 5: 100%|██████████| 282/282 [02:40<00:00,  1.75it/s]
Epoch 6: 100%|██████████| 282/282 [02:42<00:00,  1.73it/s]
Epoch 7: 100%|██████████| 282/282 [02:42<00:00,  1.73it/s]
Epoch 8: 100%|██████████| 282/282 [02:43<00:00,  1.72it/s]
Epoch 9: 100%|██████████| 282/282 [02:41<00:00,  1.74it/s]
Epoch 10: 100%|██████████| 282/282 [02:42<00:00,  1.73it/s]


0,1
train_accuracy,▁▃▄▅▅▆▆▇▇█
train_loss,█▆▅▄▄▃▃▂▂▁
val_accuracy,▂▂▁▄▅▅▆▄▇█
val_loss,▃▄█▄▂▂▂▃▂▁

0,1
train_accuracy,0.43683
train_loss,1.61812
val_accuracy,0.365
val_loss,1.80449


[34m[1mwandb[0m: Agent Starting Run: ton43quk with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bn: 1
[34m[1mwandb[0m: 	bn_vs_dp: dp
[34m[1mwandb[0m: 	data_aug: 0
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_org: same
[34m[1mwandb[0m: 	ker_size_input: 3
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	num_nodes: 512
[34m[1mwandb[0m: 	optimizer: sgd


Epoch 1: 100%|██████████| 141/141 [02:05<00:00,  1.13it/s]
Epoch 2: 100%|██████████| 141/141 [02:03<00:00,  1.14it/s]
Epoch 3: 100%|██████████| 141/141 [02:02<00:00,  1.15it/s]
Epoch 4: 100%|██████████| 141/141 [02:03<00:00,  1.14it/s]
Epoch 5: 100%|██████████| 141/141 [02:03<00:00,  1.14it/s]
Epoch 6: 100%|██████████| 141/141 [02:01<00:00,  1.16it/s]
Epoch 7: 100%|██████████| 141/141 [02:03<00:00,  1.14it/s]
Epoch 8: 100%|██████████| 141/141 [02:02<00:00,  1.15it/s]
Epoch 9: 100%|██████████| 141/141 [02:04<00:00,  1.14it/s]
Epoch 10: 100%|██████████| 141/141 [02:02<00:00,  1.15it/s]


0,1
train_accuracy,▁▃▄▅▅▆▆▇██
train_loss,█▆▅▅▄▃▃▂▂▁
val_accuracy,▁▃▃▄▆▇▅▅█▆
val_loss,█▆▆▄▂▁▄▇▁▃

0,1
train_accuracy,0.4955
train_loss,1.46586
val_accuracy,0.365
val_loss,1.87584


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: os8y4pf4 with config:
[34m[1mwandb[0m: 	activ: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	bn: 0
[34m[1mwandb[0m: 	bn_vs_dp: dp
[34m[1mwandb[0m: 	data_aug: 1
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	filter_org: double
[34m[1mwandb[0m: 	ker_size_input: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	num_filters: 64
[34m[1mwandb[0m: 	num_nodes: 1024
[34m[1mwandb[0m: 	optimizer: rmsprop


Epoch 1: 100%|██████████| 141/141 [02:45<00:00,  1.17s/it]
Epoch 2: 100%|██████████| 141/141 [02:45<00:00,  1.17s/it]
Epoch 3: 100%|██████████| 141/141 [02:45<00:00,  1.17s/it]
Epoch 4: 100%|██████████| 141/141 [02:52<00:00,  1.22s/it]
Epoch 5: 100%|██████████| 141/141 [02:50<00:00,  1.21s/it]
Epoch 6: 100%|██████████| 141/141 [02:50<00:00,  1.21s/it]
Epoch 7: 100%|██████████| 141/141 [02:52<00:00,  1.23s/it]
Epoch 8: 100%|██████████| 141/141 [02:53<00:00,  1.23s/it]
Epoch 9: 100%|██████████| 141/141 [02:50<00:00,  1.21s/it]
Epoch 10: 100%|██████████| 141/141 [02:50<00:00,  1.21s/it]


0,1
train_accuracy,▁▃▄▅▆▆▇▇▇█
train_loss,█▆▅▄▃▃▂▂▁▁
val_accuracy,▁▂▆▅▇█▇███
val_loss,▇█▄▄▃▂▂▁▁▂

0,1
train_accuracy,0.42549
train_loss,1.65964
val_accuracy,0.384
val_loss,1.82406
