## EVA5 Session4 Neural Architechture Basics
#### Group Assignment
### Training MNIST digit dataset with model parameter and epochs restriction.

In [16]:
# Importing supporting modules/libraries
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [17]:
class Net(nn.Module):    # Defining our CNN class
    def __init__(self):    # Defining Initilizing method with all the required convolutions
        super(Net, self).__init__()    # Calling parent class constructor
        self.convblock1 = nn.Sequential(
                    # Input: (30*30*1) Output: (28*28*8) RF: 3*3
                    nn.Conv2d(1, 8, 3, padding=1),
                    nn.ReLU(),
                    nn.BatchNorm2d(8),
                    # Input: (30*30*8) Output: (28*28*16) RF: 5*5
                    nn.Conv2d(8, 16, 3, padding=1),
                    nn.ReLU(),
                    nn.BatchNorm2d(16),
                    nn.Dropout(0.1),
                    # Input: (30*30*16) Output: (28*28*16) RF: 7*7
                    nn.Conv2d(16, 16, 3, padding=1),
                    nn.ReLU(),
                    nn.BatchNorm2d(16),
                    nn.Dropout(0.1),
                    # Input: (28*28*16) Output: (26*26*32) RF: 9*9
                    nn.Conv2d(16, 32, 3),
                    nn.ReLU(),
                    nn.BatchNorm2d(32)
        )
        self.transition1 = nn.Sequential(
                    # Input: (26*26*32) Output: (13*13*32) RF: 18*18
                    nn.MaxPool2d(2, 2),
                    # Input: (13*13*32) Output: (13*13*8) RF: 18*18
                    nn.Conv2d(32, 8, 1),
                    nn.ReLU(),
                    nn.BatchNorm2d(8)
        )
        self.convblock2 = nn.Sequential(
                    # Input: (13*13*8) Output: (11*11*16) RF: 20*20
                    nn.Conv2d(8, 16, 3),
                    nn.ReLU(),
                    nn.BatchNorm2d(16),
                    nn.Dropout(0.1),
                    # Input: (11*11*16) Output: (9*9*16) RF: 22*22
                    nn.Conv2d(16, 16, 3),
                    nn.ReLU(),
                    nn.BatchNorm2d(16),
                    nn.Dropout(0.1),
                    # Input: (9*9*16) Output: (7*7*32) RF: 24*24
                    nn.Conv2d(16, 32, 3),
                    nn.ReLU(),
                    nn.BatchNorm2d(32),
                    # Input: (7*7*32) Output: (5*5*10) RF: 26*26
                    nn.Conv2d(32, 10, 3)
        )
                          # Input: (5*5*10) Output: (1*1*10) RF: 26*26
        self.gap_output = nn.AdaptiveAvgPool2d((1,1))

### Below Model achieves 99.46 validation accuracy in 12 epochs in 17.4k model parameters

        # self.convblock1 = nn.Sequential(
        #             nn.Conv2d(1, 8, 3, padding=1),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(8),
        #             nn.Conv2d(8, 16, 3, padding=1),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(16),
        #             nn.Dropout(0.1),
        #             nn.Conv2d(16, 32, 3),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(32),
        #             nn.Dropout(0.1)
        # )
        # self.transition1 = nn.Sequential(
        #             nn.MaxPool2d(2, 2),
        #             nn.Conv2d(32, 8, 1),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(8)
        # )
        # self.convblock2 = nn.Sequential(
        #             nn.Conv2d(8, 16, 3),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(16),
        #             nn.Dropout(0.1), 
        #             nn.Conv2d(16, 16, 3),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(16),
        #             nn.Dropout(0.1),
        #             nn.Conv2d(16, 32, 3),
        #             nn.ReLU(),
        #             nn.BatchNorm2d(32),
        #             nn.Conv2d(32, 10, 3)
        # )
      
    def forward(self, x):    # Forward Pass of the model
        x = self.convblock1(x)
        x = self.transition1(x)
        x = self.convblock2(x)
        x = self.gap_output(x) # Classification Layer
        x = x.view(-1, 10)
        return F.log_softmax(x)    # Log Softmax for enhanching the output to 
                                   # particular classification 
                                   # (not probability but probability like)

In [18]:
# Installing libraries for getting model informations
!pip install torchsummary
from torchsummary import summary

# Checking for GPU avalibility
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")

# Transferring the model run to GPU instance
model = Net().to(device)

# Getting the model summary
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
            Conv2d-4           [-1, 16, 28, 28]           1,168
              ReLU-5           [-1, 16, 28, 28]               0
       BatchNorm2d-6           [-1, 16, 28, 28]              32
           Dropout-7           [-1, 16, 28, 28]               0
            Conv2d-8           [-1, 16, 28, 28]           2,320
              ReLU-9           [-1, 16, 28, 28]               0
      BatchNorm2d-10           [-1, 16, 28, 28]              32
          Dropout-11           [-1, 16, 28, 28]               0
           Conv2d-12           [-1, 32, 26, 26]           4,640
             ReLU-13           [-1, 32, 26, 26]               0
      BatchNorm2d-14           [-1, 32,



In [19]:
# Setting seed for consistency
torch.manual_seed(1)

# Setting batch size i.e the number of images we will look at in 1 forward pass
batch_size = 512

# Assinging number of workers for parallel computation/fail safe working
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}

# Loading MNIST Train/Test Dataset from torch datasets library
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),    
                        # Changing the array to Teansor format
                        transforms.Normalize((0.1307,), (0.3081,))   
                        # Normalizing all channels of all images with fixed train 
                        # dataset mean and std
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)

test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                        # Normalizing all channels of all images of test with
                        # fixed train dataset mean and std
                    ])),
    batch_size=batch_size, shuffle=True, **kwargs)


In [20]:
from tqdm import tqdm

# Function defination for train the model
def train(model, device, train_loader, optimizer, epoch):
    running_loss = 0
    model.train()
    pbar = tqdm(train_loader)
    # Iterating over all images batchwise
    for batch_idx, (data, target) in enumerate(pbar):
        # Loading train image and label to GPU 
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        # Computing model over data
        output = model(data)
        # Calculating negative log likelihood loss
        loss = F.nll_loss(output, target)
        # Backward Loss Propogation of batch
        loss.backward()
        optimizer.step()
        pbar.set_description(desc= f'loss={loss.item()} batch_id={batch_idx}')
    
def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            # Loading test image and label to GPU 
            data, target = data.to(device), target.to(device)
            output = model(data)
            # Summing up batch loss over all test dataset
            test_loss += F.nll_loss(output, target, reduction='sum').item()  
            # Get the index of the max log-probability
            pred = output.argmax(dim=1, keepdim=True)  
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.5f}, Accuracy: {}/{} ({:.3f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

In [21]:
# Loading model to GPU
model = Net().to(device)

# Defining Stochastic Gradient Decent optimiser with LR and momentum
optimizer = optim.SGD(model.parameters(), lr=0.05, momentum=0.9)

# Actual training for mentioned epochs range
for epoch in range(1, 13):
    print("EPOCH: ", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/118 [00:00<?, ?it/s]

EPOCH:  1


loss=0.033397164195775986 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.29it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.05837, Accuracy: 9814/10000 (98.140%)

EPOCH:  2


loss=0.016654614359140396 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.27it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.03876, Accuracy: 9873/10000 (98.730%)

EPOCH:  3


loss=0.049862828105688095 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  6.96it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.04037, Accuracy: 9868/10000 (98.680%)

EPOCH:  4


loss=0.01443264540284872 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.31it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02869, Accuracy: 9909/10000 (99.090%)

EPOCH:  5


loss=0.031058140099048615 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.34it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02694, Accuracy: 9905/10000 (99.050%)

EPOCH:  6


loss=0.00789621938019991 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.35it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02448, Accuracy: 9914/10000 (99.140%)

EPOCH:  7


loss=0.06326224654912949 batch_id=117: 100%|██████████| 118/118 [00:15<00:00,  7.45it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02205, Accuracy: 9929/10000 (99.290%)

EPOCH:  8


loss=0.007270460948348045 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.22it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.01939, Accuracy: 9940/10000 (99.400%)

EPOCH:  9


loss=0.03460037335753441 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.32it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02055, Accuracy: 9926/10000 (99.260%)

EPOCH:  10


loss=0.034471478313207626 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.17it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.02443, Accuracy: 9927/10000 (99.270%)

EPOCH:  11


loss=0.007330236490815878 batch_id=117: 100%|██████████| 118/118 [00:15<00:00,  7.40it/s]
  0%|          | 0/118 [00:00<?, ?it/s]


Test set: Average loss: 0.01932, Accuracy: 9933/10000 (99.330%)

EPOCH:  12


loss=0.004413722548633814 batch_id=117: 100%|██████████| 118/118 [00:16<00:00,  7.32it/s]



Test set: Average loss: 0.01805, Accuracy: 9941/10000 (99.410%)



## Achieved 99.4 mark twice in 12 epochs under 20k (~19.77K) model parameters.
#### - 99.40 Validation Accuracy in epoch 8
#### - 99.41 Validation Accuracy in epoch 12