<a href="https://colab.research.google.com/github/atharvasatishchaudhari/First_Model/blob/main/Advanced_Convolutions%2C_Data_Augmentation_and_Visualization.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# model.py

from __future__ import print_function
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import CIFAR10
from PIL import Image
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torchsummary import summary
from tqdm import tqdm

# =============================================================================
# Set random seed for reproducibility and check for CUDA
# =============================================================================
SEED = 1
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

# =============================================================================
# Define Albumentations-based CIFAR10 Dataset wrapper
# =============================================================================
class AlbumentationsCIFAR10(CIFAR10):
    """
    A custom CIFAR10 dataset that applies albumentations transforms.
    The original CIFAR10 __getitem__ returns a PIL image when no transform is provided.
    Here we convert the image to a numpy array for albumentations.
    """
    def __init__(self, root, train=True, transform=None, **kwargs):
        # Do not pass a transform to the parent class – we will handle it here.
        super().__init__(root, train=train, download=True, transform=None)
        self.albu_transform = transform

    def __getitem__(self, index):
        # Get image and label from CIFAR10's internal data
        image, label = self.data[index], int(self.targets[index])
        # Convert the image (which is a numpy array in CIFAR10) to uint8 if needed
        image = np.array(image)
        # Apply albumentations transform if provided
        if self.albu_transform:
            augmented = self.albu_transform(image=image)
            image = augmented['image']
        return image, label

# =============================================================================
# Define Albumentations transformations for training and testing
# =============================================================================
# CIFAR10 mean and std (for normalization)
CIFAR10_MEAN = (0.4914, 0.4822, 0.4465)
CIFAR10_STD  = (0.2023, 0.1994, 0.2010)

# Training transform: horizontal flip, shiftScaleRotate, coarse dropout, normalization, and conversion to tensor
train_transform = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.ShiftScaleRotate(shift_limit=0.0625, scale_limit=0.1, rotate_limit=15, p=0.5),
    A.CoarseDropout(max_holes=1, max_height=16, max_width=16, min_holes=1,
                    min_height=16, min_width=16, fill_value=np.array(CIFAR10_MEAN)*255, p=0.5),
    A.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD),
    ToTensorV2()
])

# Test transform: only normalization and conversion to tensor
test_transform = A.Compose([
    A.Normalize(mean=CIFAR10_MEAN, std=CIFAR10_STD),
    ToTensorV2()
])

# =============================================================================
# Define the custom network architecture meeting the assignment criteria
# =============================================================================
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        # --------------------
        # Block C1: Standard Convolutions
        # Two convolutional layers with 7x7 kernels.
        # We use padding=3 to preserve spatial dimensions.
        # Input: 3 channels, output: 16 channels.
        # This block boosts the receptive field early.
        # --------------------
        self.block1 = nn.Sequential(
            nn.Conv2d(in_channels=3, out_channels=16, kernel_size=7, stride=1, padding=3, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=7, stride=1, padding=3, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        )

        # --------------------
        # Block C2: Depthwise Separable Convolution
        # First, a depthwise convolution (groups equal to number of input channels)
        # followed by a pointwise (1x1) convolution to increase the channels.
        # This block changes channel dimension from 16 to 32.
        # --------------------
        self.block2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=3, stride=1, padding=1, groups=16, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=1, stride=1, padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

        # --------------------
        # Block C3: Dilated Convolution Block
        # Three convolution layers with dilation=4 and 3x3 kernels.
        # Using dilation increases the receptive field without increasing the number of parameters significantly.
        # Padding is set equal to dilation (i.e. 4) to preserve the feature map size.
        # Input and output channels are 32.
        # --------------------
        self.block3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=4, dilation=4, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=4, dilation=4, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=1, padding=4, dilation=4, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        )

        # --------------------
        # Block C40: Downsampling Convolution Block
        # A convolution layer with a 7x7 kernel and stride=2 (no max-pooling) to downsample the feature maps.
        # It outputs 40 channels (hence “C40”) and further increases the receptive field.
        # Padding=3 is used to keep the spatial size appropriate.
        # --------------------
        self.block4 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=40, kernel_size=7, stride=2, padding=3, bias=False),
            nn.BatchNorm2d(40),
            nn.ReLU()
        )

        # --------------------
        # Global Average Pooling (GAP) and Fully Connected Layer
        # GAP reduces each feature map to a single value.
        # The final FC layer then maps the 40 features to 10 target classes.
        # --------------------
        self.gap = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(40, 10)

    def forward(self, x):
        # Pass the input through each block sequentially.
        x = self.block1(x)  # Block C1
        x = self.block2(x)  # Block C2 (Depthwise Separable)
        x = self.block3(x)  # Block C3 (Dilated Convolutions)
        x = self.block4(x)  # Block C40 (Downsampling via conv with stride 2)
        x = self.gap(x)     # Global Average Pooling (output shape: [batch, 40, 1, 1])
        x = x.view(x.size(0), -1)  # Flatten the output to [batch, 40]
        x = self.fc(x)      # Final fully connected layer to get logits for 10 classes
        return F.log_softmax(x, dim=1)

# =============================================================================
# Instantiate the network, print a summary, and prepare DataLoaders
# =============================================================================
model = Net().to(device)
print("Model summary:")
summary(model, input_size=(3, 32, 32))  # Prints the torchsummary of the model

# Define DataLoader arguments (use a larger batch if CUDA is available)
dataloader_args = dict(shuffle=True, batch_size=128, num_workers=4, pin_memory=True) if torch.cuda.is_available() else dict(shuffle=True, batch_size=64)

# Create training and test datasets using our Albumentations wrapper
train_dataset = AlbumentationsCIFAR10(root='./data', train=True, transform=train_transform)
test_dataset  = AlbumentationsCIFAR10(root='./data', train=False, transform=test_transform)

train_loader = DataLoader(train_dataset, **dataloader_args)
test_loader  = DataLoader(test_dataset, **dataloader_args)

# =============================================================================
# Training and Testing Functions
# =============================================================================
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    running_loss = 0.0
    correct = 0
    processed = 0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch} [Train]")
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()  # Zero the gradients
        output = model(data)   # Forward pass
        loss = F.nll_loss(output, target)  # Compute negative log-likelihood loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update weights

        running_loss += loss.item() * data.size(0)
        pred = output.argmax(dim=1)
        correct += pred.eq(target).sum().item()
        processed += len(data)
        pbar.set_postfix(loss=loss.item(), accuracy=100.*correct/processed)

    epoch_loss = running_loss / processed
    epoch_acc = 100. * correct / processed
    print(f"Epoch {epoch} Train Loss: {epoch_loss:.4f} Accuracy: {epoch_acc:.2f}%")
    return epoch_loss, epoch_acc

def test(model, device, test_loader, epoch):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # Sum losses over batch
            pred = output.argmax(dim=1)
            correct += pred.eq(target).sum().item()
    test_loss /= len(test_loader.dataset)
    test_acc = 100. * correct / len(test_loader.dataset)
    print(f"Epoch {epoch} Test Loss: {test_loss:.4f} Accuracy: {test_acc:.2f}%\n")
    return test_loss, test_acc

# =============================================================================
# Main Training Loop
# =============================================================================
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 20  # Adjust as needed to achieve 85% accuracy

train_losses = []
train_accuracies = []
test_losses = []
test_accuracies = []

for epoch in range(1, EPOCHS + 1):
    train_loss, train_acc = train(model, device, train_loader, optimizer, epoch)
    test_loss, test_acc = test(model, device, test_loader, epoch)
    train_losses.append(train_loss)
    train_accuracies.append(train_acc)
    test_losses.append(test_loss)
    test_accuracies.append(test_acc)

# =============================================================================
# (For assignment QnA)
# =============================================================================
# 1. Model code from model.py file: (see above)
# 2. Torch summary output: The output printed above by torchsummary.
# 3. Albumentations transformation code: (see train_transform and test_transform above)
# 4. Training log: The training & testing losses/accuracy printed each epoch.
# 5. README.md: Please see the GitHub repository README at:
#    https://github.com/yourusername/S9-Assignment-Solution


Using device: cuda


  original_init(self, **validated_kwargs)
  A.CoarseDropout(max_holes=1, max_height=16, max_width=16, min_holes=1,


Model summary:
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 32, 32]           2,352
       BatchNorm2d-2           [-1, 16, 32, 32]              32
              ReLU-3           [-1, 16, 32, 32]               0
            Conv2d-4           [-1, 16, 32, 32]          12,544
       BatchNorm2d-5           [-1, 16, 32, 32]              32
              ReLU-6           [-1, 16, 32, 32]               0
            Conv2d-7           [-1, 16, 32, 32]             144
       BatchNorm2d-8           [-1, 16, 32, 32]              32
              ReLU-9           [-1, 16, 32, 32]               0
           Conv2d-10           [-1, 32, 32, 32]             512
      BatchNorm2d-11           [-1, 32, 32, 32]              64
             ReLU-12           [-1, 32, 32, 32]               0
           Conv2d-13           [-1, 32, 32, 32]           9,216
      BatchNorm2d-14    

100%|██████████| 170M/170M [00:05<00:00, 28.6MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Epoch 1 [Train]: 100%|██████████| 391/391 [00:29<00:00, 13.16it/s, accuracy=37.8, loss=1.42]

Epoch 1 Train Loss: 1.6767 Accuracy: 37.80%





Epoch 1 Test Loss: 1.5883 Accuracy: 40.15%



Epoch 2 [Train]: 100%|██████████| 391/391 [00:21<00:00, 18.56it/s, accuracy=52.4, loss=0.98]

Epoch 2 Train Loss: 1.3104 Accuracy: 52.44%





Epoch 2 Test Loss: 1.3916 Accuracy: 51.76%



Epoch 3 [Train]: 100%|██████████| 391/391 [00:21<00:00, 18.58it/s, accuracy=58.9, loss=0.862]

Epoch 3 Train Loss: 1.1393 Accuracy: 58.92%





Epoch 3 Test Loss: 1.1390 Accuracy: 58.44%



Epoch 4 [Train]: 100%|██████████| 391/391 [00:19<00:00, 20.05it/s, accuracy=63, loss=0.942]

Epoch 4 Train Loss: 1.0302 Accuracy: 63.00%





Epoch 4 Test Loss: 1.0689 Accuracy: 62.19%



Epoch 5 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.73it/s, accuracy=65.8, loss=0.908]

Epoch 5 Train Loss: 0.9608 Accuracy: 65.77%





Epoch 5 Test Loss: 0.9909 Accuracy: 64.46%



Epoch 6 [Train]: 100%|██████████| 391/391 [00:19<00:00, 19.70it/s, accuracy=67.5, loss=0.846]

Epoch 6 Train Loss: 0.9110 Accuracy: 67.52%





Epoch 6 Test Loss: 0.8918 Accuracy: 68.35%



Epoch 7 [Train]: 100%|██████████| 391/391 [00:19<00:00, 19.93it/s, accuracy=69.8, loss=0.919]

Epoch 7 Train Loss: 0.8603 Accuracy: 69.80%





Epoch 7 Test Loss: 0.9218 Accuracy: 67.75%



Epoch 8 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.94it/s, accuracy=70.9, loss=0.823]

Epoch 8 Train Loss: 0.8208 Accuracy: 70.93%





Epoch 8 Test Loss: 0.8377 Accuracy: 69.94%



Epoch 9 [Train]: 100%|██████████| 391/391 [00:19<00:00, 20.14it/s, accuracy=72, loss=0.774]

Epoch 9 Train Loss: 0.7931 Accuracy: 72.01%





Epoch 9 Test Loss: 0.8593 Accuracy: 69.94%



Epoch 10 [Train]: 100%|██████████| 391/391 [00:19<00:00, 19.77it/s, accuracy=73.3, loss=0.698]

Epoch 10 Train Loss: 0.7616 Accuracy: 73.35%





Epoch 10 Test Loss: 0.8748 Accuracy: 69.52%



Epoch 11 [Train]: 100%|██████████| 391/391 [00:20<00:00, 19.04it/s, accuracy=74.2, loss=0.859]

Epoch 11 Train Loss: 0.7384 Accuracy: 74.20%





Epoch 11 Test Loss: 0.7129 Accuracy: 75.11%



Epoch 12 [Train]: 100%|██████████| 391/391 [00:19<00:00, 19.77it/s, accuracy=74.9, loss=0.673]

Epoch 12 Train Loss: 0.7159 Accuracy: 74.85%





Epoch 12 Test Loss: 0.7461 Accuracy: 74.40%



Epoch 13 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.71it/s, accuracy=75.4, loss=0.737]

Epoch 13 Train Loss: 0.6955 Accuracy: 75.45%





Epoch 13 Test Loss: 0.7370 Accuracy: 74.80%



Epoch 14 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.93it/s, accuracy=76.1, loss=0.695]

Epoch 14 Train Loss: 0.6814 Accuracy: 76.13%





Epoch 14 Test Loss: 0.8075 Accuracy: 72.52%



Epoch 15 [Train]: 100%|██████████| 391/391 [00:19<00:00, 19.78it/s, accuracy=76.5, loss=0.795]

Epoch 15 Train Loss: 0.6684 Accuracy: 76.47%





Epoch 15 Test Loss: 0.6569 Accuracy: 77.42%



Epoch 16 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.86it/s, accuracy=77.2, loss=0.511]

Epoch 16 Train Loss: 0.6522 Accuracy: 77.21%





Epoch 16 Test Loss: 0.6603 Accuracy: 77.00%



Epoch 17 [Train]: 100%|██████████| 391/391 [00:20<00:00, 19.23it/s, accuracy=77.6, loss=0.724]

Epoch 17 Train Loss: 0.6384 Accuracy: 77.60%





Epoch 17 Test Loss: 0.6926 Accuracy: 76.23%



Epoch 18 [Train]: 100%|██████████| 391/391 [00:20<00:00, 19.45it/s, accuracy=78.2, loss=0.559]

Epoch 18 Train Loss: 0.6268 Accuracy: 78.21%





Epoch 18 Test Loss: 0.6834 Accuracy: 76.17%



Epoch 19 [Train]: 100%|██████████| 391/391 [00:20<00:00, 18.74it/s, accuracy=78.5, loss=0.587]

Epoch 19 Train Loss: 0.6174 Accuracy: 78.50%





Epoch 19 Test Loss: 0.6760 Accuracy: 76.65%



Epoch 20 [Train]: 100%|██████████| 391/391 [00:20<00:00, 19.49it/s, accuracy=78.8, loss=0.676]

Epoch 20 Train Loss: 0.6079 Accuracy: 78.79%





Epoch 20 Test Loss: 0.7708 Accuracy: 73.33%

