1. The code such that it uses GPU and
2. The architecture to C1C2C3C40 (No MaxPooling, but 3 3x3 layers with stride of 2 instead) 
3. Total RF must be more than 44
4. One of the layers must use Depthwise Separable Convolution
5. One of the layers must use Dilated Convolution

**Note** -: use GAP (compulsory):- add FC after GAP to target #of classes (optional)

use albumentation library and apply:

a. Horizontal flip

b. ShiftScaleRotate

c. CoarseDropout (max_holes = 1, max_height=16px, max_width=1, min_holes = 1, min_height=16px, min_width=16px, fill_value=(mean of your dataset), mask_fill_value = None)


d. Achieve 85% accuracy, as many epochs as you want. Total Params to be less than 200k.

# Import Libraries

In [26]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
import albumentations as A
from albumentations.pytorch import ToTensorV2
import numpy as np 

In [27]:
class Cifar10SearchDataset(torchvision.datasets.CIFAR10):
    def __init__(self, root="~/data/cifar10", train=True, download=True, transform=None):

        super().__init__(root=root, train=train, download=download, transform=transform)

    def __getitem__(self, index):

        image, label = self.data[index], self.targets[index]

        if self.transform is not None:
            transformed = self.transform(image=image)
            image = transformed["image"]
        return image, label

class args():

    def __init__(self,device = 'cpu' ,use_cuda = False) -> None:

        self.batch_size = 128

        self.device = device

        self.use_cuda = use_cuda

        self.kwargs = {'num_workers': 1, 'pin_memory': True} if self.use_cuda else {}

## Data Transformations

We first start with defining our data transformations. We need to think what our data is and how can we augment it to correct represent images which it might not see otherwise. 


In [44]:
train_transforms = A.Compose(
    [
        A.HorizontalFlip(p=0.5),
            A.ShiftScaleRotate(
               shift_limit=0.0625, scale_limit=0.1, 
                rotate_limit=45, interpolation=1, 
                border_mode=4, p=0.2
            ),
            A.CoarseDropout(
                max_holes=2, max_height=8, 
                max_width=8, p=0.1
            ),
        A.Normalize(
            mean = (0.491, 0.482, 0.447),
            std = (0.247, 0.243, 0.262)           
        ),
        ToTensorV2()
    ]
)

test_transforms = A.Compose(
    [
        A.Normalize(
            mean = (0.491, 0.482, 0.447),
            std = (0.247, 0.243, 0.262)
        ),
        ToTensorV2()
    ]
)

# Dataset and Creating Train/Test Split

In [29]:
SEED = 1

# CUDA?
cuda = torch.cuda.is_available()
print("CUDA Available?", cuda)

# For reproducibility
torch.manual_seed(SEED)

if cuda:
    torch.cuda.manual_seed(SEED)

trainset = Cifar10SearchDataset(root='./data', train=True, download=True, transform=train_transforms)

testset = Cifar10SearchDataset(root='./data', train=False,download=True, transform=test_transforms)

CUDA Available? True
Files already downloaded and verified
Files already downloaded and verified


# Dataloader Arguments & Test/Train Dataloaders


In [30]:
trainloader = torch.utils.data.DataLoader(trainset, batch_size=args().batch_size, shuffle=True, **args().kwargs)

testloader = torch.utils.data.DataLoader(testset, batch_size=args().batch_size, shuffle=False, **args().kwargs)     

classes = ('plane', 'car', 'bird', 'cat','deer', 'dog', 'frog', 'horse', 'ship', 'truck')

Import Model

In [45]:
import model

#Creating instance of all the 3 Models by passing Normalization Type as a Parameter

In [38]:
!pip install torchsummary
from torchsummary import summary
#import model
#from model import Net
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(3, 32, 32))

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 10, 30, 30]             280
       BatchNorm2d-2           [-1, 10, 30, 30]              20
              ReLU-3           [-1, 10, 30, 30]               0
            Conv2d-4           [-1, 32, 28, 28]           2,912
       BatchNorm2d-5           [-1, 32, 28, 28]              64
              ReLU-6           [-1, 32, 28, 28]               0
            Conv2d-7           [-1, 64, 26, 26]          18,496
       BatchNorm2d-8           [-1, 64, 26, 26]             128
              ReLU-9           [-1, 64, 26, 26]               0
           Conv2d-10           [-1, 64, 22, 22]          36,928
      BatchNorm2d-11           [-1, 64, 22, 22]             128
             ReLU-12           [-1, 64, 22, 22]               0

# Training 
Let's write train function

In [19]:
def train(model, device, train_loader, optimizer, epoch, L1):
    model.train()
    epoch_loss = 0
    correct = 0

    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)

        if L1:
          L1_loss = nn.L1Loss(size_average=None, reduce=None, reduction='mean')
          reg_loss = 0 
          for param in model.parameters():
            zero_vector = torch.rand_like(param) * 0
            reg_loss += L1_loss(param,zero_vector)
          loss += .001 * reg_loss

        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()
        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

    print(f'Train set: Average loss: {loss.item():.4f}, Accuracy: {100. * correct/len(train_loader.dataset):.2f}')
    train_loss = epoch_loss / len(train_loader)
    train_acc=100.*correct/len(train_loader.dataset)
    return train_loss, train_acc

# Testing
Let's write testing function

In [20]:
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    test_pred = torch.LongTensor()
    target_pred = torch.LongTensor()
    target_data = torch.LongTensor()

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            pred_cpu = output.cpu().data.max(dim=1, keepdim=True)[1]
            correct += pred.eq(target.view_as(pred)).sum().item()
            test_pred = torch.cat((test_pred, pred_cpu), dim=0)
            target_pred = torch.cat((target_pred, target.cpu()), dim=0)
            target_data = torch.cat((target_data, data.cpu()), dim=0)


    test_loss /= len(test_loader.dataset)
    test_acc = 100.*correct/len(test_loader.dataset)
    print(f'\nTest set: Average loss: {test_loss:.3f}, Accuracy: {100. * correct/len(test_loader.dataset):.2f}')
    return test_loss, test_acc, test_pred, target_pred, target_data

# A main function calling test and train functions

#Input Params

*   EPOCHS
*   model
*   device
*   train_loader
*   test_loader
*   optimizer
*   L1 (Lasso Regression is true or false)

#OutPut Params
* train_loss_values
* test_loss_values
* train_acc_values
* test_acc_values
* test_pred
* target_pred
* target_data


In [22]:
def main(EPOCHS, model, device, train_loader, test_loader, optimizer, L1):
  train_loss_values = []
  test_loss_values = []
  train_acc_values = []
  test_acc_values = []

  for epoch in range(EPOCHS):
      print('\nEpoch {} : '.format(epoch))
      # train the model
      train_loss, train_acc = train(model, device, train_loader, optimizer, epoch, L1)
      test_loss, test_acc, test_pred, target_pred, target_data  = test(model, device, test_loader)

      train_loss_values.append(train_loss)
      test_loss_values.append(test_loss)

      train_acc_values.append(train_acc)
      test_acc_values.append(test_acc)

  return train_loss_values, test_loss_values, train_acc_values, test_acc_values, test_pred, target_pred, target_data

# Calling the Model with 75 Epochs

In [25]:
EPOCHS = 75
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.04, momentum=0.9)
l1reg = False
train_loss, test_loss, train_acc, test_acc , test_pred, target_pred, target_data = main(EPOCHS, model, device, trainloader, testloader, optimizer, l1reg)



Epoch 0 : 
Train set: Average loss: 1.6668, Accuracy: 36.99

Test set: Average loss: 1.439, Accuracy: 47.88

Epoch 1 : 
Train set: Average loss: 1.1627, Accuracy: 50.12

Test set: Average loss: 1.317, Accuracy: 53.22

Epoch 2 : 
Train set: Average loss: 1.3828, Accuracy: 57.03

Test set: Average loss: 1.080, Accuracy: 61.90

Epoch 3 : 
Train set: Average loss: 1.0919, Accuracy: 61.69

Test set: Average loss: 1.049, Accuracy: 63.08

Epoch 4 : 
Train set: Average loss: 0.8020, Accuracy: 64.39

Test set: Average loss: 0.951, Accuracy: 66.01

Epoch 5 : 
Train set: Average loss: 0.9759, Accuracy: 66.70

Test set: Average loss: 0.868, Accuracy: 70.25

Epoch 6 : 
Train set: Average loss: 0.9657, Accuracy: 68.48

Test set: Average loss: 0.838, Accuracy: 71.01

Epoch 7 : 
Train set: Average loss: 0.8676, Accuracy: 70.01

Test set: Average loss: 0.787, Accuracy: 72.48

Epoch 8 : 
Train set: Average loss: 0.7883, Accuracy: 71.41

Test set: Average loss: 0.805, Accuracy: 71.62

Epoch 9 : 
Train s