# Assignment 6 - Part B

#### 1. Less than 20K parameters
#### 2. Less than 20 epochs
#### 3. At least 99.4% accuracy
#### 4. Have used batch normalization and dropout

In [1]:
#Import all necessary libraries
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

## Neural network architecture



1.   1st Convolution layer with 1 input and 16 output channels
2.   This is followed by a batch normalization and a max pool layer
3. A dropout layer is added after this
4. Another block of exact same layers (convolution, batch norm, max pool and dropout). This has 16 channels as input and 32 channels as output
5. Third convolution layer with 32 input channels and 32 output channels. This helps in increasing our receptive field while using small number of parameters
6. Final convolution layer/output layer, which has 32 inputs and 10 output channels (corresponding to our 10 classes)
7. We use a ReLU activation after each of our convolulation layer
8. We unroll the final output into a 1X10 output array and apply log_softmax to get out final output



In [2]:
class Net(nn.Module):
  def __init__(self):
    super(Net,self).__init__()
    self.conv1 = nn.Conv2d(1, 16, 3)
    self.norm1 = nn.BatchNorm2d(16)
    self.pool1 = nn.MaxPool2d(2, 2)
    self.drop1 = nn.Dropout(0.10)
    self.conv2 = nn.Conv2d(16, 32, 3)
    self.norm2 = nn.BatchNorm2d(32)
    self.pool2 = nn.MaxPool2d(2, 2)
    self.drop2 = nn.Dropout(0.10)
    self.conv3 = nn.Conv2d(32, 32, 3)
    self.conv4 = nn.Conv2d(32, 10, 3)

  def forward(self, x):
    # After 1st conv -> n_in = 28, p = 0, s = 1, k = 3, n_out = 26, j_in = 1, j_out = 1, r_in = 1, r_out = 3
    # After 1st Max Pool -> n_in = 26, p = 0, s = 2, k = 2, n_out = 13, j_in = 1, j_out = 2, r_in = 3, r_out = 4
    x = self.drop1(self.pool1(self.norm1(F.relu(self.conv1(x)))))
    # After 2nd conv -> n_in = 13, p = 0, s = 1, k = 3, n_out = 11, j_in = 2, j_out = 2, r_in = 4, r_out = 8
    # After 2nd Max Pool -> n_in = 11, p = 0, s = 2, k = 2, n_out = 5, j_in = 2, j_out = 4, r_in = 8, r_out = 10
    x = self.drop2(self.pool2(self.norm2(F.relu(self.conv2(x)))))
    # After 3rd conv -> n_in = 5, p = 0, s = 1, k = 3, n_out = 3, j_in = 4, j_out = 4, r_in = 10, r_out = 18
    # After 4th conv -> n_in = 13, p = 0, s = 1, k = 3, n_out = 11, j_in = 2, j_out = 2, r_in = 4, r_out = 26
    x = F.relu(self.conv4(F.relu(self.conv3(x))))
    x = x.view(-1,10)
    return F.log_softmax(x, dim=1)




In [3]:
#Creating model summary
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available() #Check if CUDA is available or not
device = torch.device("cuda" if use_cuda else "cpu") #Use CUDA if available
model = Net().to(device) #Load model to device
summary(model, input_size=(1, 28, 28)) #Create summary if input image is 28X28

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             160
       BatchNorm2d-2           [-1, 16, 26, 26]              32
         MaxPool2d-3           [-1, 16, 13, 13]               0
           Dropout-4           [-1, 16, 13, 13]               0
            Conv2d-5           [-1, 32, 11, 11]           4,640
       BatchNorm2d-6           [-1, 32, 11, 11]              64
         MaxPool2d-7             [-1, 32, 5, 5]               0
           Dropout-8             [-1, 32, 5, 5]               0
            Conv2d-9             [-1, 32, 3, 3]           9,248
           Conv2d-10             [-1, 10, 1, 1]           2,890
Total params: 17,034
Trainable params: 17,034
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.28
Params size (MB): 0.06
Estimated Tot

## Creating Dataloader object for training and testing model

In [4]:
torch.manual_seed(1) #Set seed for random number generator. Makes sure to generate the same numbers given the same input. Helps to keep code reproducible.
batch_size = 128 #Set batch size. Number of images which will be processed in one go.

kwargs = {'num_workers': 2, 'pin_memory': True} if use_cuda else {} #Set number of threads and optimize file copy operations
train_loader = torch.utils.data.DataLoader( #Create data loader objects and apply different compositions. Normalize all images.
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.RandomAffine(degrees=20, translate=(0.1,0.1), scale=(0.9, 1.1)),
                        transforms.ColorJitter(brightness=0.2, contrast=0.2),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader( #Dataloader object for test dataset. Normalize all images.
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 73139630.96it/s]


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 87715926.01it/s]


Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 24516960.77it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 16933803.35it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw






## Creating training and testing methods

In [5]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train() #Set model to train mode
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() #Preventing gradient accumulation
        output = model(data)
        loss = F.nll_loss(output, target) #Negative log likelihood loss
        loss.backward() #Backpropagation. Weight calculation.
        optimizer.step() #Update parameter values
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')


def test(model, device, test_loader):
    model.eval() #Set model to test
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

## Train and test the model

In [6]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) #Using SGD optimizer

for epoch in range(1, 19): #Run for 18 epochs
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

loss=0.33171555399894714 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.44it/s]



Test set: Average loss: 0.0588, Accuracy: 9807/10000 (98.07%)



loss=0.14228419959545135 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.58it/s]



Test set: Average loss: 0.0472, Accuracy: 9852/10000 (98.52%)



loss=0.1556026041507721 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.55it/s]



Test set: Average loss: 0.0372, Accuracy: 9878/10000 (98.78%)



loss=0.07359858602285385 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.59it/s]



Test set: Average loss: 0.0344, Accuracy: 9883/10000 (98.83%)



loss=0.04077104106545448 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.43it/s]



Test set: Average loss: 0.0340, Accuracy: 9885/10000 (98.85%)



loss=0.015617698431015015 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.57it/s]



Test set: Average loss: 0.0277, Accuracy: 9906/10000 (99.06%)



loss=0.17869256436824799 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.59it/s]



Test set: Average loss: 0.0299, Accuracy: 9898/10000 (98.98%)



loss=0.09060642123222351 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.45it/s]



Test set: Average loss: 0.0249, Accuracy: 9924/10000 (99.24%)



loss=0.06253983825445175 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.53it/s]



Test set: Average loss: 0.0275, Accuracy: 9912/10000 (99.12%)



loss=0.0598558634519577 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.47it/s]



Test set: Average loss: 0.0246, Accuracy: 9917/10000 (99.17%)



loss=0.051030904054641724 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.58it/s]



Test set: Average loss: 0.0212, Accuracy: 9929/10000 (99.29%)



loss=0.030841229483485222 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.56it/s]



Test set: Average loss: 0.0234, Accuracy: 9924/10000 (99.24%)



loss=0.029306868091225624 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.44it/s]



Test set: Average loss: 0.0233, Accuracy: 9929/10000 (99.29%)



loss=0.061530470848083496 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.58it/s]



Test set: Average loss: 0.0209, Accuracy: 9936/10000 (99.36%)



loss=0.09780261665582657 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.55it/s]



Test set: Average loss: 0.0216, Accuracy: 9940/10000 (99.40%)



loss=0.06268691271543503 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.50it/s]



Test set: Average loss: 0.0220, Accuracy: 9925/10000 (99.25%)



loss=0.1130494698882103 batch_id=468: 100%|██████████| 469/469 [00:54<00:00,  8.55it/s]



Test set: Average loss: 0.0220, Accuracy: 9927/10000 (99.27%)



loss=0.09432690590620041 batch_id=468: 100%|██████████| 469/469 [00:55<00:00,  8.46it/s]



Test set: Average loss: 0.0217, Accuracy: 9935/10000 (99.35%)

