<a href="https://colab.research.google.com/github/ckgpeace/EVA5B2/blob/main/S4/Session4_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Importing the required libraries

In [1]:
# Importing the required libraries

from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Defining the model architecture


1.   Sequential Block of Convolution layer has been used
2. Only one layer of Maxpool is used
3. BatchNorm, GAP and Drop out has been used. PLease check their position. You can play with their poistion to undertand the change in accuracy values
4. Check for all the dimensions before and after Convolution layers
5. Special attention on the last block - No RELU, No BactchNorm, No Dropout before prediction layer



In [2]:
# Model 6:

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        
        # Convolution Block1:
        self.conv1 = nn.Sequential(
        nn.Conv2d(1, 8, 3,  padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(8),

        nn.Conv2d(8, 8, 3,  padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(8),

        nn.Conv2d(8, 16, 3, padding=1),
        nn.ReLU(),
        nn.BatchNorm2d(16),
        nn.MaxPool2d(2, 2),                 # Maxpool Layer
        nn.Dropout(0.25)
        ) # in = 28, out = 14, RF = 14

        # Convolution Block2:   
        self.conv2 = nn.Sequential(
        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        nn.BatchNorm2d(16),

        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        nn.BatchNorm2d(16),

        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        nn.BatchNorm2d(16),
        nn.Dropout(0.25)
        ) # in = 14, out = 5, RF = 20

        # Convolution Block3:
        self.conv3 = nn.Sequential(
        nn.Conv2d(16, 16, 3),
        nn.ReLU(),
        nn.BatchNorm2d(16),

        nn.Conv2d(16, 32, 3),
        nn.ReLU(),
        nn.BatchNorm2d(32),

        nn.Conv2d(32, 10, 1)
        ) # in = 7, out = 4, RF = 26

        # GAP layer
        self.gap = nn.AvgPool2d(4) 
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.gap(x)
        x = x.view(-1, 10)
        return F.log_softmax(x)

## Model Summary

In [3]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
            Conv2d-4            [-1, 8, 28, 28]             584
              ReLU-5            [-1, 8, 28, 28]               0
       BatchNorm2d-6            [-1, 8, 28, 28]              16
            Conv2d-7           [-1, 16, 28, 28]           1,168
              ReLU-8           [-1, 16, 28, 28]               0
       BatchNorm2d-9           [-1, 16, 28, 28]              32
        MaxPool2d-10           [-1, 16, 14, 14]               0
          Dropout-11           [-1, 16, 14, 14]               0
           Conv2d-12           [-1, 16, 12, 12]           2,320
             ReLU-13           [-1, 16, 12, 12]               0
      BatchNorm2d-14           [-1, 16,



### Data loader - MNIST dataset

In [4]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


### TRAIN and TEST functions to evaluate the model
Negative Log Loss is used as loss function

In [5]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

Finally!! Training the model and measuring loss and accuracy on test data

In [6]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)

for epoch in range(1, 20):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

loss=0.06360013037919998 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.00it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0592, Accuracy: 9831/10000 (98%)



loss=0.02867789752781391 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.51it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0371, Accuracy: 9889/10000 (99%)



loss=0.027006983757019043 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.03it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0283, Accuracy: 9911/10000 (99%)



loss=0.025313591584563255 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.33it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0257, Accuracy: 9920/10000 (99%)



loss=0.010297711007297039 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.24it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0227, Accuracy: 9927/10000 (99%)



loss=0.02375417947769165 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 32.10it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0207, Accuracy: 9935/10000 (99%)



loss=0.06187198683619499 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.58it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0212, Accuracy: 9934/10000 (99%)



loss=0.003474951721727848 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.46it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0210, Accuracy: 9921/10000 (99%)



loss=0.09437204152345657 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.73it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0159, Accuracy: 9946/10000 (99%)



loss=0.010342560708522797 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.48it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0169, Accuracy: 9948/10000 (99%)



loss=0.031973764300346375 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 31.27it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0195, Accuracy: 9933/10000 (99%)



loss=0.06317823380231857 batch_id=468: 100%|██████████| 469/469 [00:16<00:00, 29.31it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0186, Accuracy: 9943/10000 (99%)



loss=0.01414125319570303 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.32it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0165, Accuracy: 9945/10000 (99%)



loss=0.0018174051074311137 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.08it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0166, Accuracy: 9943/10000 (99%)



loss=0.003661229507997632 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.47it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0191, Accuracy: 9943/10000 (99%)



loss=0.002132546389475465 batch_id=468: 100%|██████████| 469/469 [00:14<00:00, 31.96it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0153, Accuracy: 9949/10000 (99%)



loss=0.005010238382965326 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 29.74it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0187, Accuracy: 9940/10000 (99%)



loss=0.004511484410613775 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.02it/s]
  0%|          | 0/469 [00:00<?, ?it/s]


Test set: Average loss: 0.0162, Accuracy: 9950/10000 (100%)



loss=0.005666978191584349 batch_id=468: 100%|██████████| 469/469 [00:15<00:00, 30.76it/s]



Test set: Average loss: 0.0172, Accuracy: 9950/10000 (100%)

