### 5.3 Programming Task: Digit recognition using CNNs

In [18]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [19]:
class ConvNet(nn.Module):
    def __init__(self):
      super().__init__()
      self.conv1 = nn.Conv2d(1,20,5,stride=1)
      self.relu = nn.ReLU()
      self.max_pool = nn.MaxPool2d(2,stride=2)
      self.flatten = nn.Flatten()
      self.fully1 = nn.Linear(20*12*12,100)
      self.fully2 = nn.Linear(100,10)
    def forward(self, x):
      x = self.conv1(x)
      x = self.relu(x)
      x = self.max_pool(x)
      x = self.flatten(x)
      x = self.fully1(x)
      x = self.relu(x)
      y = self.fully2(x)
      return y


Show the net.

In [20]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (relu): ReLU()
  (max_pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fully1): Linear(in_features=2880, out_features=100, bias=True)
  (fully2): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [21]:
# Set hyper parameters.
lr = 0.01

In [22]:
# Load the MNIST data set.
from torch.utils.data import DataLoader
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [25]:
# Set the loss function and the optimization criteria
loss_criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr=lr)

In [26]:
# Run the main training loop
for epoch in range(10):
  net.train()
  for input,label in train_loader:
    optimizer.zero_grad()
    out = net(input)
    loss = loss_criterion(out,label)
    loss.backward()
    optimizer.step()


In [27]:
# Run the testing loop
total=0
correct=0
with torch.no_grad():
  for input, label in test_loader:
    out = net(input)
    _,pred = torch.max(out,1)
    total+=label.size(0)
    correct+=(pred==label).sum().item()
accuracy = correct/total
print(accuracy)

0.9766


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [32]:
from torchsummary import summary
summary(net,input_size=(1,28,28))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 20, 24, 24]             520
              ReLU-2           [-1, 20, 24, 24]               0
         MaxPool2d-3           [-1, 20, 12, 12]               0
           Flatten-4                 [-1, 2880]               0
            Linear-5                  [-1, 100]         288,100
              ReLU-6                  [-1, 100]               0
            Linear-7                   [-1, 10]           1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.22
Params size (MB): 1.10
Estimated Total Size (MB): 1.33
----------------------------------------------------------------
