In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, random_split


# Data Preprocessing

In [2]:
# We resize the input images, convert them to tensors and normalize them to [-1,1]
transform = transforms.Compose([
    transforms.Resize((128, 128)),   # Resize images to 128x128
    transforms.ToTensor(),           # Convert images to PyTorch tensors
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))  # Normalize to [-1, 1]
])

# Augmented transform
augmented_transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

In [3]:
train_dir = '/kaggle/input/inaturalist-1/inaturalist_12K/train'
test_dir = '/kaggle/input/inaturalist-1/inaturalist_12K/val'

train_dataset = ImageFolder(root=train_dir, transform=transform)
test_dataset = ImageFolder(root=test_dir, transform=transform)



In [4]:
# Load the full training dataset
full_train_dataset = ImageFolder(root=train_dir, transform=transform)

# Calculate split sizes
val_size = int(0.2 * len(full_train_dataset))
train_size = len(full_train_dataset) - val_size

# Split the dataset
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Loaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, num_workers=2)

# Load the test set
test_dataset = ImageFolder(root=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

In [5]:
#Make sure every class is represented equally in validation data

from collections import Counter
import matplotlib.pyplot as plt

# Get original dataset labels (from the subset indices)
val_targets = [full_train_dataset.targets[i] for i in val_dataset.indices]

# Count frequency of each class
val_class_counts = Counter(val_targets)

print(sorted(val_class_counts.items()))

[(0, 203), (1, 185), (2, 213), (3, 200), (4, 194), (5, 189), (6, 186), (7, 228), (8, 205), (9, 196)]


In [6]:
num_classes = len(full_train_dataset.classes)

class CNN(nn.Module):
    def __init__(self, num_classes, conv_filters=[96, 256, 384, 384, 256], kernel_sizes=[3, 3, 3, 3, 3], activation_fn=F.relu, fc_units=[1024],dropout=[0.0],
        use_batchnorm=False):
        super(CNN, self).__init__()
        
        
        assert len(conv_filters) == len(kernel_sizes), "conv_filters and kernel_sizes must be the same length"
        
        self.activation_fn = activation_fn
        self.use_batchnorm = use_batchnorm

        self.pool = nn.MaxPool2d(2, 2)
        
        
        self.conv_layers = nn.ModuleList()
        self.batchnorm_layers = nn.ModuleList()


        in_channels = 3  
        for out_channels, kernel_size in zip(conv_filters, kernel_sizes):
            self.conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size, padding=kernel_size//2))
            
            if use_batchnorm:
                self.batchnorm_layers.append(nn.BatchNorm2d(out_channels))
            
            in_channels = out_channels  
        
        
        # Dynamically compute the flattened size 
        with torch.no_grad():
            dummy_input = torch.zeros(1, 3, 128, 128)  
            x = dummy_input
            for idx, conv in enumerate(self.conv_layers):
                x = conv(x)
                if self.use_batchnorm:
                    x = self.batchnorm_layers[idx](x)
                x = self.activation_fn(x)
                x = self.pool(x)
            self.flattened_size = x.view(1, -1).size(1)


        # Only one dense layer
        self.fc = nn.Linear(self.flattened_size, fc_units[0])
        self.dropout = nn.Dropout(dropout)

        # Final output layer
        self.fc_out = nn.Linear(fc_units[0], num_classes)
    
    def forward(self, x):
        for idx, conv in enumerate(self.conv_layers):
            x = conv(x)
            if self.use_batchnorm:
                x = self.batchnorm_layers[idx](x)
            x = self.activation_fn(x)
            x = self.pool(x)

        x = torch.flatten(x, 1)
        
        x = self.activation_fn(self.fc(x))
        x = self.dropout(x)
        x = self.fc_out(x)
        return x

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#model = CNN(num_classes, conv_filters=[64, 128, 256, 256, 128], kernel_sizes=[5, 3, 3, 3, 3], activation_fn=F.leaky_relu, fc_units=[512, 256]).to(device)

In [8]:
"""
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(5):
    model.train()
    running_loss = 0.0
    correct_train = 0
    total_train = 0

    # Training phase
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Forward pass
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Calculate training metrics
        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total_train += labels.size(0)
        correct_train += (predicted == labels).sum().item()

    # Compute training loss and accuracy for the epoch
    train_loss = running_loss / len(train_loader)
    train_acc = 100 * correct_train / total_train

    # --- Validation phase ---
    model.eval()  # Set model to evaluation mode
    correct_val = 0
    total_val = 0

    with torch.no_grad():  # Disable gradient computation
        for val_inputs, val_labels in val_loader:
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
            val_outputs = model(val_inputs)
            _, val_predicted = torch.max(val_outputs.data, 1)
            total_val += val_labels.size(0)
            correct_val += (val_predicted == val_labels).sum().item()

    # Compute validation accuracy for the epoch
    val_acc = 100 * correct_val / total_val  
    model.train()  # Switch back to training mode

    # Print epoch-wise training and validation metrics
    print(f"Epoch {epoch+1}/{15}")
    print(f"  Training Loss: {train_loss:.4f} | Training Accuracy: {train_acc:.2f}%")
    print(f"  Validation Accuracy: {val_acc:.2f}%\n")


# --- Test phase (after training) ---
model.eval()
correct_test = 0
total_test = 0
with torch.no_grad():
    for test_inputs, test_labels in test_loader:
        test_inputs, test_labels = test_inputs.to(device), test_labels.to(device)
        test_outputs = model(test_inputs)
        _, test_predicted = torch.max(test_outputs.data, 1)
        total_test += test_labels.size(0)
        correct_test += (test_predicted == test_labels).sum().item()

test_acc = 100 * correct_test / total_test
print(f"Final Test Accuracy: {test_acc:.2f}%")
"""

'\n# Loss function and optimizer\ncriterion = nn.CrossEntropyLoss()\noptimizer = torch.optim.Adam(model.parameters(), lr=0.001)\n\n# Training loop\nfor epoch in range(5):\n    model.train()\n    running_loss = 0.0\n    correct_train = 0\n    total_train = 0\n\n    # Training phase\n    for inputs, labels in train_loader:\n        inputs, labels = inputs.to(device), labels.to(device)\n\n        # Forward pass\n        optimizer.zero_grad()\n        outputs = model(inputs)\n        loss = criterion(outputs, labels)\n\n        # Backward pass and optimize\n        loss.backward()\n        optimizer.step()\n\n        # Calculate training metrics\n        running_loss += loss.item()\n        _, predicted = torch.max(outputs.data, 1)\n        total_train += labels.size(0)\n        correct_train += (predicted == labels).sum().item()\n\n    # Compute training loss and accuracy for the epoch\n    train_loss = running_loss / len(train_loader)\n    train_acc = 100 * correct_train / total_train\n\

In [9]:
import wandb


In [10]:
activation_map = {
    'relu': F.relu,
    'gelu': F.gelu,
    'silu': F.silu,
    'mish': F.mish
}

In [11]:
def train(config=None):
    with wandb.init(config=config) as run:
        config = wandb.config
        
        run.name = f"filters={config.conv_filters}_act={config.activation_fn}_aug={config.use_augmentation}_bn={config.use_batchnorm}_dropout={config.dropout}"
        run.save()

        # Choose transform for training
        if config.use_augmentation:
            train_transform = augmented_transform
        else:
            train_transform = transform

        # Load full dataset with chosen transform
        full_train_dataset = ImageFolder(train_dir, transform=train_transform)

        # Split into train and val
        val_size = int(0.2 * len(full_train_dataset))
        train_size = len(full_train_dataset) - val_size
        train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

        # Set val transform to basic (no augmentation)
        val_dataset.dataset.transform = transform

        # Data loaders
        train_loader = DataLoader(train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2)
        val_loader = DataLoader(val_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2)


        # Model instantiation 
        model = CNN(
                  num_classes=num_classes,
                  conv_filters=config.conv_filters,
                  kernel_sizes=[3] * len(config.conv_filters),
                  activation_fn=activation_map[config.activation_fn],
                  fc_units=config.fc_units,
                  dropout=config.dropout,
                  use_batchnorm=config.use_batchnorm
              ).to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=config.lr)

        for epoch in range(config.epochs):
            model.train()
            running_loss = 0.0
            correct_train = 0
            total_train = 0

            for inputs, labels in train_loader:
                inputs, labels = inputs.to(device), labels.to(device)

                optimizer.zero_grad()
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()

                running_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total_train += labels.size(0)
                correct_train += (predicted == labels).sum().item()

            train_loss = running_loss / len(train_loader)
            train_acc = 100 * correct_train / total_train

            # Validation phase
            model.eval()
            correct_val = 0
            total_val = 0
            with torch.no_grad():
                for val_inputs, val_labels in val_loader:
                    val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)
                    val_outputs = model(val_inputs)
                    _, val_predicted = torch.max(val_outputs.data, 1)
                    total_val += val_labels.size(0)
                    correct_val += (val_predicted == val_labels).sum().item()

            val_acc = 100 * correct_val / total_val
            model.train()

            print(f"Epoch {epoch+1}/{config.epochs}")
            print(f"  Training Loss: {train_loss:.4f} | Training Accuracy: {train_acc:.2f}%")
            print(f"  Validation Accuracy: {val_acc:.2f}%\n")

            # Log metrics to W&B
            wandb.log({
                "train_loss": train_loss,
                "train_acc": train_acc,
                "val_acc": val_acc
            })


In [12]:
sweep_config = {
    'method': 'bayes',  
    'metric': {
        'name': 'val_acc',
        'goal': 'maximize'
    },
    'parameters': {
        'lr': {
            'values': [0.001, 0.0005, 0.0001]
        },
        'epochs': {
            'values': [5,10]
        },
        'conv_filters': {
            'values': [
                [32, 64, 128, 256, 512],
                [64, 128, 256, 512, 1024],
                [32, 64, 64, 128, 128],
                [128, 128, 128, 128, 128],
                [1024,512,256,128,64]
            ]
        },
        'activation_fn': {
            'values': ['relu', 'gelu', 'silu', 'mish']
        },
        'fc_units': {
            'values': [[512], [1024], [512, 256]]
        },
        'batch_size': {'values': [32, 64]},
        'use_batchnorm': {'values': [True, False]},
        'use_augmentation': {'values': [True, False]},
        'dropout': {'values': [0,0.2,0.3,0.5]}

    }
}


In [13]:
wandb.login(key='af7d7cf29d8954a13afb06c7a0d0c196c36ac51b')

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mma24m003[0m ([33mma24m003-iit-madras[0m). Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
# Create sweep
sweep_id = wandb.sweep(sweep_config, project="inaturalist-hyperparam-tuning")

# Launch sweep agents
wandb.agent(sweep_id, function=train, count=10)  # runs 10 experiments


Create sweep with ID: ascb14im
Sweep URL: https://wandb.ai/ma24m003-iit-madras/inaturalist-hyperparam-tuning/sweeps/ascb14im


[34m[1mwandb[0m: Agent Starting Run: toun9m1s with config:
[34m[1mwandb[0m: 	activation_fn: mish
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [1024, 512, 256, 128, 64]
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	fc_units: [1024]
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: False




Epoch 1/5
  Training Loss: 2.1606 | Training Accuracy: 20.77%
  Validation Accuracy: 24.56%

Epoch 2/5
  Training Loss: 2.0051 | Training Accuracy: 28.84%
  Validation Accuracy: 27.96%

Epoch 3/5
  Training Loss: 1.8824 | Training Accuracy: 33.26%
  Validation Accuracy: 31.42%

Epoch 4/5
  Training Loss: 1.7846 | Training Accuracy: 37.24%
  Validation Accuracy: 33.27%

Epoch 5/5
  Training Loss: 1.6892 | Training Accuracy: 41.00%
  Validation Accuracy: 34.17%



0,1
train_acc,▁▄▅▇█
train_loss,█▆▄▂▁
val_acc,▁▃▆▇█

0,1
train_acc,41.0
train_loss,1.68922
val_acc,34.16708


[34m[1mwandb[0m: Agent Starting Run: amu3z2kn with config:
[34m[1mwandb[0m: 	activation_fn: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [32, 64, 64, 128, 128]
[34m[1mwandb[0m: 	dropout: 0.2
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	fc_units: [512]
[34m[1mwandb[0m: 	lr: 0.0005
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: False


Epoch 1/10
  Training Loss: 2.1841 | Training Accuracy: 19.69%
  Validation Accuracy: 26.96%

Epoch 2/10
  Training Loss: 2.0147 | Training Accuracy: 28.55%
  Validation Accuracy: 28.66%

Epoch 3/10
  Training Loss: 1.9171 | Training Accuracy: 32.12%
  Validation Accuracy: 30.77%

Epoch 4/10
  Training Loss: 1.8434 | Training Accuracy: 35.16%
  Validation Accuracy: 32.67%

Epoch 5/10
  Training Loss: 1.7688 | Training Accuracy: 37.52%
  Validation Accuracy: 34.12%

Epoch 6/10
  Training Loss: 1.6684 | Training Accuracy: 41.30%
  Validation Accuracy: 34.52%

Epoch 7/10
  Training Loss: 1.5299 | Training Accuracy: 46.26%
  Validation Accuracy: 34.97%

Epoch 8/10
  Training Loss: 1.3432 | Training Accuracy: 53.51%
  Validation Accuracy: 35.77%

Epoch 9/10
  Training Loss: 1.1026 | Training Accuracy: 61.27%
  Validation Accuracy: 34.42%

Epoch 10/10
  Training Loss: 0.8137 | Training Accuracy: 72.62%
  Validation Accuracy: 34.12%



0,1
train_acc,▁▂▃▃▃▄▅▅▆█
train_loss,█▇▇▆▆▅▅▄▂▁
val_acc,▁▂▄▆▇▇▇█▇▇

0,1
train_acc,72.625
train_loss,0.81369
val_acc,34.11706


[34m[1mwandb[0m: Agent Starting Run: zoq72ymd with config:
[34m[1mwandb[0m: 	activation_fn: relu
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	conv_filters: [128, 128, 128, 128, 128]
[34m[1mwandb[0m: 	dropout: 0
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	fc_units: [1024]
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	use_augmentation: True
[34m[1mwandb[0m: 	use_batchnorm: True


Epoch 1/10
  Training Loss: 2.0499 | Training Accuracy: 26.38%
  Validation Accuracy: 29.61%

Epoch 2/10
  Training Loss: 1.8539 | Training Accuracy: 34.52%
  Validation Accuracy: 28.31%

Epoch 3/10
  Training Loss: 1.7249 | Training Accuracy: 40.08%
  Validation Accuracy: 34.87%

Epoch 4/10
  Training Loss: 1.6088 | Training Accuracy: 44.21%
  Validation Accuracy: 33.47%

Epoch 5/10
  Training Loss: 1.4835 | Training Accuracy: 49.01%
  Validation Accuracy: 34.62%

Epoch 6/10
  Training Loss: 1.3505 | Training Accuracy: 54.31%
  Validation Accuracy: 38.57%

Epoch 7/10
  Training Loss: 1.2017 | Training Accuracy: 59.96%
  Validation Accuracy: 35.92%

Epoch 8/10
  Training Loss: 1.0141 | Training Accuracy: 67.38%
  Validation Accuracy: 34.22%

Epoch 9/10
  Training Loss: 0.8089 | Training Accuracy: 75.40%
  Validation Accuracy: 37.77%

Epoch 10/10
  Training Loss: 0.6180 | Training Accuracy: 82.56%
  Validation Accuracy: 36.92%



0,1
train_acc,▁▂▃▃▄▄▅▆▇█
train_loss,█▇▆▆▅▅▄▃▂▁
val_acc,▂▁▅▅▅█▆▅▇▇

0,1
train_acc,82.5625
train_loss,0.61804
val_acc,36.91846


[34m[1mwandb[0m: Agent Starting Run: sis3m5vs with config:
[34m[1mwandb[0m: 	activation_fn: gelu
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	conv_filters: [32, 64, 64, 128, 128]
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	fc_units: [512, 256]
[34m[1mwandb[0m: 	lr: 0.0001
[34m[1mwandb[0m: 	use_augmentation: False
[34m[1mwandb[0m: 	use_batchnorm: True
