In [33]:
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [34]:
train_transforms = transforms.Compose([transforms.RandomRotation((-5.0, 5.0), fill=(1,)),
                                       transforms.ToTensor(),
                                       transforms.Normalize((0.1307), (0.3081))
                                       ])
test_transforms = transforms.Compose([transforms.ToTensor(),
                                      transforms.Normalize((0.1307),(0.3081))
    
])

In [35]:
train= datasets.MNIST('./data', train=True, download=True, transform = train_transforms)
test = datasets.MNIST('./data', train=True, download=True, transform = test_transforms)

In [36]:
SEED =1
cuda = torch.cuda.is_available()
print("CUDA Available", cuda)
torch.manual_seed(SEED)

if cuda:
  torch.cuda.manual_seed(SEED)

dataloader_args = dict(shuffle=True, batch_size=128, num_workers=2, pin_memory=True)  if cuda else dict(shuffle=True, batch_size=64)

train_loader = torch.utils.data.DataLoader(train, **dataloader_args)
test_loader = torch.utils.data.DataLoader(test, **dataloader_args)


CUDA Available True


In [37]:
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Inputblock
        self.convblock1 = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=32, kernel_size=(3,3), padding=1, bias=False),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Dropout(0.05)
        )

        # Conv block 1
        self.convblock2 = nn.Sequential(
            nn.Conv2d(in_channels=32, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.05)
        )
        # Conv block 1
        self.convblock3 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(1,1), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.05)
        )
        
        self.pool1 = nn.MaxPool2d(2,2)


        # Conv block 2
        self.convblock5 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=16, kernel_size=(3,3), padding=0, bias=False),
            nn.BatchNorm2d(16),
            nn.ReLU(),
            nn.Dropout(0.05)
        )

        # Conv block 2
        self.convblock6 = nn.Sequential(
            nn.Conv2d(in_channels=16, out_channels=10, kernel_size=(3,3), padding=0, bias=False),
            nn.BatchNorm2d(10),
            nn.ReLU(),
            nn.Dropout(0.05)
        )
        # OUTPUT BLOCK
        self.convblock7 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(3, 3), padding=0, bias=False)
        ) # output_size = 7
        self.gap = nn.Sequential(
            nn.AvgPool2d(kernel_size=5)
        ) # output_size = 1
                # OUTPUT BLOCK
        self.convblock8 = nn.Sequential(
            nn.Conv2d(in_channels=10, out_channels=10, kernel_size=(1, 1), padding=0, bias=False)
        ) # output_size = 7

 
    def forward(self, x):
        x = self.convblock1(x)
        x = self.convblock2(x)
        x = self.convblock3(x)
        x = self.pool1(x)
        x = self.convblock5(x)
        x = self.convblock6(x)
        x = self.convblock7(x)
        x = self.gap(x)
        x = self.convblock8(x)
        x = x.view(-1, 10)
        return F.log_softmax(x, dim=-1)


In [38]:
! pip install torchsummary
from torchsummary import summary
use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)
model = Net().to(device)
summary(model, input_size=(1,28,28),batch_size=32)

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [32, 32, 28, 28]             288
       BatchNorm2d-2           [32, 32, 28, 28]              64
              ReLU-3           [32, 32, 28, 28]               0
           Dropout-4           [32, 32, 28, 28]               0
            Conv2d-5           [32, 16, 26, 26]           4,608
       BatchNorm2d-6           [32, 16, 26, 26]              32
              ReLU-7           [32, 16, 26, 26]               0
           Dropout-8           [32, 16, 26, 26]               0
            Conv2d-9           [32, 16, 26, 26]             256
      BatchNorm2d-10           [32, 16, 26, 26]              32
             ReLU-11           [32, 16, 26, 26]               0
          Dropout-12           [32, 16, 26, 26]               0

In [39]:
from tqdm import tqdm
train_losses = []
test_losses = []
train_acc = []
test_acc = []

def train(model, device, train_loader, optimizer, epoch):
  model.train()
  pbar = tqdm(train_loader)
  correct = 0
  processed = 0
  for batch_idx, (data, target) in enumerate(pbar):
    data, target = data.to(device), target.to(device)
    optimizer.zero_grad()
    y_pred = model(data)
    loss = F.nll_loss(y_pred, target)
    train_losses.append(loss)
    loss.backward()
    optimizer.step()
    pred = y_pred.argmax(dim=1, keepdim = True)
    correct += pred.eq(target.view_as(pred)).sum().item()
    processed+=len(data)

    pbar.set_description(desc=f'Loss={loss.item()} Batch_id = {batch_idx} Accuracy = {100*correct/processed:0.2f}')
    train_acc.append(100*correct/processed)

def test(model, device , test_loader):
  model.eval()
  test_loss=0
  correct=0
  with torch.no_grad():
    for data, target in test_loader:
      data, target = data.to(device), target.to(device)
      output=model(data)
      test_loss += F.nll_loss(output, target, reduction='sum').item()
      pred = output.argmax(dim=1, keepdim=True)
      correct += pred.eq(target.view_as(pred)).sum().item()

  test_loss /= len(test_loader.dataset)
  test_losses.append(test_loss)

  print("\n Test set: Avergae loss: {:4f}, Accuracy = {}/{}({:.2f}%)\n".format(test_loss, correct, len(test_loader.dataset), 100. *correct/len(test_loader.dataset)))
  test_acc.append(100. * correct/len(test_loader.dataset))

In [40]:
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
EPOCHS=15
for epoch in range(EPOCHS):
  print("EPOCH:", epoch)
  train(model, device, train_loader, optimizer, epoch)
  test(model, device, test_loader)

EPOCH: 0


Loss=0.1698882132768631 Batch_id = 468 Accuracy = 82.35: 100%|██████████| 469/469 [00:21<00:00, 21.76it/s]



 Test set: Avergae loss: 0.146832, Accuracy = 57518/60000(95.86%)

EPOCH: 1


Loss=0.06604643166065216 Batch_id = 468 Accuracy = 96.76: 100%|██████████| 469/469 [00:17<00:00, 26.94it/s]



 Test set: Avergae loss: 0.077510, Accuracy = 58610/60000(97.68%)

EPOCH: 2


Loss=0.15583419799804688 Batch_id = 468 Accuracy = 97.71: 100%|██████████| 469/469 [00:17<00:00, 27.46it/s]



 Test set: Avergae loss: 0.090818, Accuracy = 58334/60000(97.22%)

EPOCH: 3


Loss=0.07469593733549118 Batch_id = 468 Accuracy = 98.02: 100%|██████████| 469/469 [00:17<00:00, 26.49it/s]



 Test set: Avergae loss: 0.044653, Accuracy = 59240/60000(98.73%)

EPOCH: 4


Loss=0.07657995820045471 Batch_id = 468 Accuracy = 98.10: 100%|██████████| 469/469 [00:17<00:00, 27.37it/s]



 Test set: Avergae loss: 0.051080, Accuracy = 59044/60000(98.41%)

EPOCH: 5


Loss=0.013612709939479828 Batch_id = 468 Accuracy = 98.41: 100%|██████████| 469/469 [00:17<00:00, 27.47it/s]



 Test set: Avergae loss: 0.041320, Accuracy = 59216/60000(98.69%)

EPOCH: 6


Loss=0.10125892609357834 Batch_id = 468 Accuracy = 98.48: 100%|██████████| 469/469 [00:16<00:00, 27.70it/s]



 Test set: Avergae loss: 0.034326, Accuracy = 59367/60000(98.94%)

EPOCH: 7


Loss=0.04876561835408211 Batch_id = 468 Accuracy = 98.55: 100%|██████████| 469/469 [00:17<00:00, 26.96it/s]



 Test set: Avergae loss: 0.040179, Accuracy = 59251/60000(98.75%)

EPOCH: 8


Loss=0.03257971629500389 Batch_id = 468 Accuracy = 98.62: 100%|██████████| 469/469 [00:17<00:00, 27.22it/s]



 Test set: Avergae loss: 0.029812, Accuracy = 59461/60000(99.10%)

EPOCH: 9


Loss=0.0138598857447505 Batch_id = 468 Accuracy = 98.71: 100%|██████████| 469/469 [00:17<00:00, 27.07it/s]



 Test set: Avergae loss: 0.026126, Accuracy = 59528/60000(99.21%)

EPOCH: 10


Loss=0.014238663949072361 Batch_id = 468 Accuracy = 98.72: 100%|██████████| 469/469 [00:17<00:00, 27.11it/s]



 Test set: Avergae loss: 0.028585, Accuracy = 59451/60000(99.08%)

EPOCH: 11


Loss=0.0320780873298645 Batch_id = 468 Accuracy = 98.75: 100%|██████████| 469/469 [00:17<00:00, 27.36it/s]



 Test set: Avergae loss: 0.030636, Accuracy = 59416/60000(99.03%)

EPOCH: 12


Loss=0.02422669716179371 Batch_id = 468 Accuracy = 98.93: 100%|██████████| 469/469 [00:17<00:00, 27.28it/s]



 Test set: Avergae loss: 0.025157, Accuracy = 59545/60000(99.24%)

EPOCH: 13


Loss=0.13206680119037628 Batch_id = 468 Accuracy = 98.91: 100%|██████████| 469/469 [00:16<00:00, 27.63it/s]



 Test set: Avergae loss: 0.031748, Accuracy = 59416/60000(99.03%)

EPOCH: 14


Loss=0.034962479025125504 Batch_id = 468 Accuracy = 98.87: 100%|██████████| 469/469 [00:16<00:00, 27.76it/s]



 Test set: Avergae loss: 0.023997, Accuracy = 59558/60000(99.26%)



**Target:**\
Use Max pooling at proper place\
Use Image transforms (rotation)

**Results:**\
Parameters: 10076\
Best Training Accuracy: 98.93\
Best Test Accuracy: 99.26

**Analysis:**\
Model is not performing as expected Accuracy is reaching till 99.26 percent only