# Assignment 6 - Part B

#### 1. Less than 20K parameters
#### 2. Less than 20 epochs
#### 3. At least 99.4% accuracy
#### 4. Have used batch normalization and dropout

In [1]:
#Import all necessary libraries
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Neural network architecture



1.   1st Convolution layer with 1 input and 16 output channels
2.   This is followed by a batch normalization and a max pool layer
3. A dropout layer is added after this
4. Another block of exact same layers (convolution, batch norm, max pool and dropout). This has 16 channels as input and 32 channels as output
5. Third convolution layer with 32 input channels and 32 output channels. This helps in increasing our receptive field while using small number of parameters
6. Final convolution layer/output layer, which has 32 inputs and 10 output channels (corresponding to our 10 classes)
7. We use a ReLU activation after each of our convolulation layer
8. We unroll the final output into a 1X10 output array and apply log_softmax to get out final output



In [15]:
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(1, 16, 3)
    self.norm1 = nn.BatchNorm2d(16)
    self.pool1 = nn.MaxPool2d(2, 2)
    self.drop1 = nn.Dropout(0.10)
    self.conv2 = nn.Conv2d(16, 32, 3)
    self.norm2 = nn.BatchNorm2d(32)
    self.pool2 = nn.MaxPool2d(2, 2)
    self.drop2 = nn.Dropout(0.10)
    self.conv3 = nn.Conv2d(32, 32, 3)
    self.conv4 = nn.Conv2d(32, 10, 3)

  def forward(self, x):
    # After 1st conv -> n_in = 28, p = 0, s = 1, k = 3, n_out = 26, j_in = 1, j_out = 1, r_in = 1, r_out = 3
    # After 1st Max Pool -> n_in = 26, p = 0, s = 2, k = 2, n_out = 13, j_in = 1, j_out = 2, r_in = 3, r_out = 4
    x = self.drop1(self.pool1(self.norm1(F.relu(self.conv1(x)))))
    # After 2nd conv -> n_in = 13, p = 0, s = 1, k = 3, n_out = 11, j_in = 2, j_out = 2, r_in = 4, r_out = 8
    # After 2nd Max Pool -> n_in = 11, p = 0, s = 2, k = 2, n_out = 5, j_in = 2, j_out = 4, r_in = 8, r_out = 10
    x = self.drop2(self.pool2(self.norm2(F.relu(self.conv2(x)))))
    # After 3rd conv -> n_in = 5, p = 0, s = 1, k = 3, n_out = 3, j_in = 4, j_out = 4, r_in = 10, r_out = 18
    # After 4th conv -> n_in = 13, p = 0, s = 1, k = 3, n_out = 11, j_in = 2, j_out = 2, r_in = 4, r_out = 26
    x = F.relu(self.conv4(F.relu(self.conv3(x))))
    x = x.view(-1,10)
    return F.log_softmax(x, dim=1)




In [25]:
#Creating model summary
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             160
       BatchNorm2d-2           [-1, 16, 26, 26]              32
         MaxPool2d-3           [-1, 16, 13, 13]               0
           Dropout-4           [-1, 16, 13, 13]               0
            Conv2d-5           [-1, 32, 11, 11]           4,640
       BatchNorm2d-6           [-1, 32, 11, 11]              64
         MaxPool2d-7             [-1, 32, 5, 5]               0
           Dropout-8             [-1, 32, 5, 5]               0
            Conv2d-9             [-1, 32, 3, 3]           9,248
           Conv2d-10             [-1, 10, 1, 1]           2,890
Total params: 17,034
Trainable params: 17,034
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.28
Params size (MB): 0.06
Estimated Tot

## Creating Dataloader object for training and testing model

In [26]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.RandomAffine(degrees=20, translate=(0.1,0.1), scale=(0.9, 1.1)),
                        transforms.ColorJitter(brightness=0.2, contrast=0.2),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


## Creating training and testing methods

In [27]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() #Preventing gradient accumulation
        output = model(data)
        loss = F.nll_loss(output, target) #Negative log likelihood loss
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

## Train and test the model

In [28]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=15, gamma=0.1, verbose=True)

for epoch in range(1, 19):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
    scheduler.step()

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.3436274230480194 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.38it/s]



Test set: Average loss: 0.0529, Accuracy: 9829/10000 (98.29%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.14382390677928925 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.57it/s]



Test set: Average loss: 0.0419, Accuracy: 9873/10000 (98.73%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.14485183358192444 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.55it/s]



Test set: Average loss: 0.0369, Accuracy: 9878/10000 (98.78%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.0814553052186966 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.45it/s]



Test set: Average loss: 0.0331, Accuracy: 9890/10000 (98.90%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.08535409718751907 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.48it/s]



Test set: Average loss: 0.0311, Accuracy: 9894/10000 (98.94%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.01914999820291996 batch_id=468: 100%|██████████| 469/469 [00:56<00:00,  8.35it/s]



Test set: Average loss: 0.0259, Accuracy: 9912/10000 (99.12%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.1687787026166916 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.60it/s]



Test set: Average loss: 0.0270, Accuracy: 9914/10000 (99.14%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.1027824878692627 batch_id=468: 100%|██████████| 469/469 [00:53<00:00,  8.69it/s]



Test set: Average loss: 0.0245, Accuracy: 9922/10000 (99.22%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.07965689897537231 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.38it/s]



Test set: Average loss: 0.0269, Accuracy: 9919/10000 (99.19%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.07354914397001266 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.59it/s]



Test set: Average loss: 0.0233, Accuracy: 9926/10000 (99.26%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.0401674248278141 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.46it/s]



Test set: Average loss: 0.0205, Accuracy: 9934/10000 (99.34%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.04188725724816322 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.62it/s]



Test set: Average loss: 0.0237, Accuracy: 9923/10000 (99.23%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.04236917197704315 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.58it/s]



Test set: Average loss: 0.0228, Accuracy: 9928/10000 (99.28%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.07118179649114609 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.43it/s]



Test set: Average loss: 0.0196, Accuracy: 9938/10000 (99.38%)

Adjusting learning rate of group 0 to 1.0000e-02.


loss=0.10333048552274704 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.52it/s]



Test set: Average loss: 0.0233, Accuracy: 9932/10000 (99.32%)

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.025090403854846954 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.50it/s]



Test set: Average loss: 0.0185, Accuracy: 9943/10000 (99.43%)

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.08566814661026001 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.57it/s]



Test set: Average loss: 0.0182, Accuracy: 9943/10000 (99.43%)

Adjusting learning rate of group 0 to 1.0000e-03.


loss=0.07856596261262894 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.53it/s]



Test set: Average loss: 0.0180, Accuracy: 9945/10000 (99.45%)

Adjusting learning rate of group 0 to 1.0000e-03.
