# Import Libraries

Let's first import all the necessary libraries

In [12]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

# Let's visualize some of the images
%matplotlib inline
import matplotlib.pyplot as plt

# Defining Model
 Create a CNN Model Skeleton

In [25]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Input Block
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=16, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU()
        ) # output_size = 26  RF 3 Jout - 1

        # CONVOLUTION BLOCK 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        ) # output_size = 24 RF 5 Jout -1

        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(64),
            nn.ReLU()
        ) # output_size = 22 RF 7 Jout - 1

        # TRANSITION BLOCK 1
        self.pool1 = nn.MaxPool2d(2, 2) # output_size = 11  RF 8 Jout - 2

        self.convblock4 = nn.Sequential(
            nn.Conv2d(in_channels=64, out_channels=32, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        ) # output_size = 11  RF 8 Jout- 2

        # CONVOLUTION BLOCK 2
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        ) # output_size = 9  RF - 12 ,Jout - 2
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=32, kernel_size=(3, 3), padding=0, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU()
        ) # output_size = 7 RF - 16  Jout - 2

        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=10, kernel_size=(1, 1), padding=0, bias=False),
            nn.BatchNorm2d(10),
            nn.ReLU()
        ) # output_size = 7  RF - 16  Jout - 2  
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=7))

         # Fully Connected layer - Used 1x1 
        self.convblock8 = nn.Sequential(

            # 1x1 convolution
            nn.Conv2d(10, 10, 1,bias=False), # 11x11 output - 9x9 RF 18x18
            nn.ReLU(),
            nn.BatchNorm2d(10),
            nn.Dropout2d(0.1), 

            nn.Conv2d(10, 10, 1,bias=False), # 11x11 output - 9x9 RF 18x18
        )
        
                
        
        self.dropout = nn.Dropout(0.10)
        #self.convblock8 = nn.Sequential(
           # nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(7, 7), padding=0, bias=False),
           
         # output_size = 1  RF -28  Jout - 2 

    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.dropout(x)
        x = self.pool1(x)
        x = self.convblock4(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.dropout(x)
        x = self.convblock7(x)
        x = self.gap(x)
        x = self.convblock8(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)

# Model Summary
 To view and to understand Model Trainable parameteres

In [26]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 16, 26, 26]             144
       BatchNorm2d-2           [-1, 16, 26, 26]              32
              ReLU-3           [-1, 16, 26, 26]               0
            Conv2d-4           [-1, 32, 24, 24]           4,608
       BatchNorm2d-5           [-1, 32, 24, 24]              64
              ReLU-6           [-1, 32, 24, 24]               0
            Conv2d-7           [-1, 64, 22, 22]          18,432
       BatchNorm2d-8           [-1, 64, 22, 22]             128
              ReLU-9           [-1, 64, 22, 22]               0
          Dropout-10           [-1, 64, 22, 22]               0
        MaxPool2d-11           [-1, 64, 11, 11]               0
           Conv2d-12           [-1, 32, 11, 11]           2,048
      BatchNorm2d-13           [-1, 32, 11, 11]              64
             ReLU-14           [-1, 32,

# The Model


In [27]:
model.eval()

Net(
  (convblock1): Sequential(
    (0): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (convblock2): Sequential(
    (0): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (convblock3): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (convblock4): Sequential(
    (0): Conv2d(64, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
  )
  (convblock5): Sequential(
    (0): Conv2d(32, 32, kernel_size=(3, 3), stride

## Load and Prepare Dataset

MNIST contains 70,000 images of handwritten digits: 60,000 for training and 10,000 for testing. The images are grayscale, 28x28 pixels

We load the PIL images using torchvision.datasets.MNIST, while loading the image we transform he data to tensor and normalize the images with mean and std deviation of MNIST images.

In [28]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train = datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

test = datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, **kwargs)


# Training & Testing Functions
 Creating Training and Testing functions.

In [29]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    epoch_loss=0
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

        pbar.set_description(desc= f'epoch={epoch} Loss={loss.item()} batch_id={batch_idx:05d}')


    train_loss = epoch_loss / len(train_loader.dataset)

    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    train_acc=100.*correct/len(train_loader.dataset)
    return train_loss,train_acc


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc=100. * correct / len(test_loader.dataset)
    return test_loss,test_acc

# Train & Test our Model
 Let's train and test our model

In [30]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/469 [00:00<?, ?it/s]

EPOCH: 0


epoch=0 Loss=0.1149832233786583 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.58it/s]

Train set: Average loss: 0.0035, Accuracy: 52654/60000 (87.76%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0934, Accuracy: 9746/10000 (97.46%)

EPOCH: 1


epoch=1 Loss=0.11349499225616455 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.15it/s]

Train set: Average loss: 0.0010, Accuracy: 57817/60000 (96.36%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0742, Accuracy: 9776/10000 (97.76%)

EPOCH: 2


epoch=2 Loss=0.1092948243021965 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.27it/s]

Train set: Average loss: 0.0007, Accuracy: 58335/60000 (97.22%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0538, Accuracy: 9823/10000 (98.23%)

EPOCH: 3


epoch=3 Loss=0.06279542297124863 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.76it/s]

Train set: Average loss: 0.0006, Accuracy: 58643/60000 (97.74%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0368, Accuracy: 9892/10000 (98.92%)

EPOCH: 4


epoch=4 Loss=0.04463322460651398 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.31it/s]

Train set: Average loss: 0.0005, Accuracy: 58799/60000 (98.00%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0295, Accuracy: 9911/10000 (99.11%)

EPOCH: 5


epoch=5 Loss=0.050039708614349365 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.69it/s]

Train set: Average loss: 0.0004, Accuracy: 59047/60000 (98.41%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0298, Accuracy: 9919/10000 (99.19%)

EPOCH: 6


epoch=6 Loss=0.06695207208395004 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.88it/s]

Train set: Average loss: 0.0004, Accuracy: 59071/60000 (98.45%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0321, Accuracy: 9906/10000 (99.06%)

EPOCH: 7


epoch=7 Loss=0.03722727671265602 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.80it/s]

Train set: Average loss: 0.0004, Accuracy: 59157/60000 (98.59%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0243, Accuracy: 9929/10000 (99.29%)

EPOCH: 8


epoch=8 Loss=0.09088233858346939 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.38it/s]

Train set: Average loss: 0.0003, Accuracy: 59203/60000 (98.67%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0381, Accuracy: 9893/10000 (98.93%)

EPOCH: 9


epoch=9 Loss=0.03412767872214317 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.76it/s]

Train set: Average loss: 0.0003, Accuracy: 59249/60000 (98.75%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0202, Accuracy: 9938/10000 (99.38%)

EPOCH: 10


epoch=10 Loss=0.03591694310307503 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.48it/s]

Train set: Average loss: 0.0003, Accuracy: 59315/60000 (98.86%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0235, Accuracy: 9923/10000 (99.23%)

EPOCH: 11


epoch=11 Loss=0.012825402431190014 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.70it/s]

Train set: Average loss: 0.0003, Accuracy: 59322/60000 (98.87%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0229, Accuracy: 9920/10000 (99.20%)

EPOCH: 12


epoch=12 Loss=0.04044414684176445 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.49it/s]

Train set: Average loss: 0.0003, Accuracy: 59394/60000 (98.99%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0211, Accuracy: 9938/10000 (99.38%)

EPOCH: 13


epoch=13 Loss=0.05121802166104317 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.61it/s]

Train set: Average loss: 0.0002, Accuracy: 59440/60000 (99.07%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0263, Accuracy: 9923/10000 (99.23%)

EPOCH: 14


epoch=14 Loss=0.03827768191695213 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 40.36it/s]

Train set: Average loss: 0.0002, Accuracy: 59410/60000 (99.02%)






Test set: Average loss: 0.0242, Accuracy: 9928/10000 (99.28%)

