Target:

MNIST Basic Neural Network model included with Batch Normalization and GAP layers.

Results:



*   Parameters: 5,088
*   Best Train Accuracy: 99.03%
*   Best Test Accuracy: 99.04%

Analysis:

# Import Libraries

Let's first import all the necessary libraries

In [2]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

# Let's visualize some of the images
%matplotlib inline
import matplotlib.pyplot as plt

# Defining Model
 Create a CNN Model Skeleton

In [23]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()


        #Block 1
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 8, 3,padding=0,bias=False),  # 28x28 output 28x28 RF : 3x3
            nn.BatchNorm2d(8),
            nn.ReLU(),

            nn.Conv2d(8, 16, 3,padding=0,bias=False), # 28x28 output 28x28 RF : 5x5
            nn.BatchNorm2d(16),
            nn.ReLU(),

                    
        )

        #Transition Block (MaxPool + 1x1)
        self.trans1 = nn.Sequential(
            nn.MaxPool2d(2, 2),
            # 1x1 convolution
            nn.Conv2d(16, 8, 1,bias=False), # 26x26 output - 26x26 RF 14x14
            nn.BatchNorm2d(8),
            nn.ReLU(),

              # 26x26 output - 13x13 RF 14x14

        )

        #Block 2
        self.conv2 =  nn.Sequential(

            nn.Conv2d(8, 10, 3,padding=0, bias=False), # 13x13 output - 11x11 RF 16x16
            nn.BatchNorm2d(10),
            nn.ReLU(),

            nn.Conv2d(10, 16, 3,padding=0, bias=False),  # 11x11 output - 9x9 RF 18x18
            nn.BatchNorm2d(16),
            nn.ReLU(),

            nn.Conv2d(16, 10, 3,padding=0, bias=False), # 9x9 output - 7x7 RF 20x20
            nn.BatchNorm2d(10),
            nn.ReLU(),

            
            
        )
        self.avgpool2d = nn.AvgPool2d(kernel_size=6)

        

    def forward(self, x):
        x = self.conv1(x)
        x = self.trans1(x)
        x = self.conv2(x)
        x = self.avgpool2d(x)
        #x = self.conv3(x)
        x = x.view(-1,10)

        return F.log_softmax(x,dim=1)

# Model Summary
 To view and to understand Model Trainable parameteres

In [24]:
!pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
model = Net().to(device)
summary(model, input_size=(1, 28, 28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 26, 26]              72
       BatchNorm2d-2            [-1, 8, 26, 26]              16
              ReLU-3            [-1, 8, 26, 26]               0
            Conv2d-4           [-1, 16, 24, 24]           1,152
       BatchNorm2d-5           [-1, 16, 24, 24]              32
              ReLU-6           [-1, 16, 24, 24]               0
         MaxPool2d-7           [-1, 16, 12, 12]               0
            Conv2d-8            [-1, 8, 12, 12]             128
       BatchNorm2d-9            [-1, 8, 12, 12]              16
             ReLU-10            [-1, 8, 12, 12]               0
           Conv2d-11           [-1, 10, 10, 10]             720
      BatchNorm2d-12           [-1, 10, 10, 10]              20
             ReLU-13           [-1, 10, 10, 10]               0
           Conv2d-14             [-1, 1

# The Model


In [25]:
model.eval()

Net(
  (conv1): Sequential(
    (0): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU()
  )
  (trans1): Sequential(
    (0): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (1): Conv2d(16, 8, kernel_size=(1, 1), stride=(1, 1), bias=False)
    (2): BatchNorm2d(8, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
  )
  (conv2): Sequential(
    (0): Conv2d(8, 10, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (1): BatchNorm2d(10, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU()
    (3): Conv2d(10, 16, kernel_size=(3, 3), stride=(1, 1), bias=False)
    (4): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, tr

## Load and Prepare Dataset

MNIST contains 70,000 images of handwritten digits: 60,000 for training and 10,000 for testing. The images are grayscale, 28x28 pixels

We load the PIL images using torchvision.datasets.MNIST, while loading the image we transform he data to tensor and normalize the images with mean and std deviation of MNIST images.

In [26]:
torch.manual_seed(1)
batch_size = 128

kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train = datasets.MNIST('../data', train=True, download=True,
                    transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))

test = datasets.MNIST('../data', train=False, transform=transforms.Compose([
                        transforms.ToTensor(),
                        transforms.Normalize((0.1307,), (0.3081,))
                    ]))
train_loader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=False, **kwargs)


# Training & Testing Functions
 Creating Training and Testing functions.

In [27]:
from tqdm import tqdm
def train(model, device, train_loader, optimizer, epoch):
    model.train()
    epoch_loss=0
    correct = 0
    pbar = tqdm(train_loader)
    for batch_idx, (data, target) in enumerate(pbar):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        epoch_loss += loss.item()
        loss.backward()
        optimizer.step()

        pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
        correct += pred.eq(target.view_as(pred)).sum().item()

        pbar.set_description(desc= f'epoch={epoch} Loss={loss.item()} batch_id={batch_idx:05d}')


    train_loss = epoch_loss / len(train_loader.dataset)

    print('Train set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        train_loss, correct, len(train_loader.dataset),
        100. * correct / len(train_loader.dataset)))
    train_acc=100.*correct/len(train_loader.dataset)
    return train_loss,train_acc


def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()  # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('Test set: Average loss: {:.4f}, Accuracy: {}/{} ({:.2f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))
    test_acc=100. * correct / len(test_loader.dataset)
    return test_loss,test_acc

# Train & Test our Model
 Let's train and test our model

In [28]:
model =  Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS = 15
for epoch in range(EPOCHS):
    print("EPOCH:", epoch)
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

  0%|          | 0/469 [00:00<?, ?it/s]

EPOCH: 0


epoch=0 Loss=0.23534537851810455 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.59it/s]

Train set: Average loss: 0.0051, Accuracy: 53612/60000 (89.35%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.2363, Accuracy: 9521/10000 (95.21%)

EPOCH: 1


epoch=1 Loss=0.1264253705739975 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.38it/s]


Train set: Average loss: 0.0013, Accuracy: 58299/60000 (97.17%)



  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.1294, Accuracy: 9753/10000 (97.53%)

EPOCH: 2


epoch=2 Loss=0.10109394788742065 batch_id=00468: 100%|██████████| 469/469 [00:10<00:00, 42.76it/s]


Train set: Average loss: 0.0009, Accuracy: 58671/60000 (97.78%)



  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0941, Accuracy: 9828/10000 (98.28%)

EPOCH: 3


epoch=3 Loss=0.08147398382425308 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.47it/s]

Train set: Average loss: 0.0007, Accuracy: 58881/60000 (98.14%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0824, Accuracy: 9820/10000 (98.20%)

EPOCH: 4


epoch=4 Loss=0.0488554984331131 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.60it/s]

Train set: Average loss: 0.0006, Accuracy: 58988/60000 (98.31%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0904, Accuracy: 9782/10000 (97.82%)

EPOCH: 5


epoch=5 Loss=0.014701624400913715 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.42it/s]

Train set: Average loss: 0.0006, Accuracy: 59062/60000 (98.44%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0624, Accuracy: 9852/10000 (98.52%)

EPOCH: 6


epoch=6 Loss=0.10077115893363953 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.60it/s]


Train set: Average loss: 0.0005, Accuracy: 59147/60000 (98.58%)



  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0671, Accuracy: 9841/10000 (98.41%)

EPOCH: 7


epoch=7 Loss=0.08814585208892822 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.47it/s]

Train set: Average loss: 0.0005, Accuracy: 59177/60000 (98.63%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0509, Accuracy: 9875/10000 (98.75%)

EPOCH: 8


epoch=8 Loss=0.04032563045620918 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.01it/s]

Train set: Average loss: 0.0004, Accuracy: 59227/60000 (98.71%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0526, Accuracy: 9867/10000 (98.67%)

EPOCH: 9


epoch=9 Loss=0.059296976774930954 batch_id=00468: 100%|██████████| 469/469 [00:10<00:00, 42.89it/s]

Train set: Average loss: 0.0004, Accuracy: 59243/60000 (98.74%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0529, Accuracy: 9867/10000 (98.67%)

EPOCH: 10


epoch=10 Loss=0.06356145441532135 batch_id=00468: 100%|██████████| 469/469 [00:10<00:00, 42.94it/s]

Train set: Average loss: 0.0004, Accuracy: 59341/60000 (98.90%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0523, Accuracy: 9864/10000 (98.64%)

EPOCH: 11


epoch=11 Loss=0.08982154726982117 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.10it/s]

Train set: Average loss: 0.0004, Accuracy: 59339/60000 (98.90%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0478, Accuracy: 9879/10000 (98.79%)

EPOCH: 12


epoch=12 Loss=0.02868678607046604 batch_id=00468: 100%|██████████| 469/469 [00:10<00:00, 42.96it/s]

Train set: Average loss: 0.0004, Accuracy: 59362/60000 (98.94%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0463, Accuracy: 9882/10000 (98.82%)

EPOCH: 13


epoch=13 Loss=0.08350250869989395 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 42.56it/s]

Train set: Average loss: 0.0004, Accuracy: 59376/60000 (98.96%)




  0%|          | 0/469 [00:00<?, ?it/s]

Test set: Average loss: 0.0410, Accuracy: 9894/10000 (98.94%)

EPOCH: 14


epoch=14 Loss=0.0625385269522667 batch_id=00468: 100%|██████████| 469/469 [00:11<00:00, 41.48it/s]

Train set: Average loss: 0.0003, Accuracy: 59420/60000 (99.03%)






Test set: Average loss: 0.0394, Accuracy: 9904/10000 (99.04%)



# Results in Graphical View
  Grpahs plotting using  Matplot library to view the results