### 5.3 Programming Task: Digit recognition using CNNs

In [1]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [2]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, stride=1)
        self.maxpool1 = nn.MaxPool2d(2, 2)
        self.flatten = nn.Flatten(1)
        self.fc1 = nn.Linear(20 * 12*12,100)
        self.predict = nn.Linear(100, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.maxpool1(x)
        x = self.flatten(x)
        x = F.relu( self.fc1(x) )
        return F.log_softmax(x,dim=1) 
    

Show the net.

In [3]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (maxpool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (fc1): Linear(in_features=2880, out_features=100, bias=True)
  (predict): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [4]:
# Set hyper parameters.
batch_size=200
learning_rate=0.01
epochs=10

In [5]:
# Load the MNIST data set.
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=batch_size, shuffle=True)

In [6]:
# Set the loss function and the optimization criteria

# create a stochastic gradient descent optimizer
optimizer = optim.SGD(net.parameters(), lr=learning_rate, momentum=0.9)

# create a loss function
criterion = nn.NLLLoss()

In [7]:
# Run the main training loop
# run the main training loop
for epoch in range(epochs):
    total_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        
        # reshape data from (batch_size, 1, 28, 28) to (batch_size, 28*28)
        # data = data.view(-1, 1, 28, 28)
        
        optimizer.zero_grad()
        
        net_out = net(data)
        
        loss = criterion(net_out, target)
        total_loss+=loss.data
        
        loss.backward()
        optimizer.step()

    print('Train Epoch: {} Avg. Training Loss: {:.6f}\n'.format(
                epoch+1,
                total_loss/len(train_loader.dataset)))

Train Epoch: 1 Avg. Training Loss: 0.001708

Train Epoch: 2 Avg. Training Loss: 0.000454

Train Epoch: 3 Avg. Training Loss: 0.000338

Train Epoch: 4 Avg. Training Loss: 0.000286

Train Epoch: 5 Avg. Training Loss: 0.000245

Train Epoch: 6 Avg. Training Loss: 0.000223

Train Epoch: 7 Avg. Training Loss: 0.000204

Train Epoch: 8 Avg. Training Loss: 0.000183

Train Epoch: 9 Avg. Training Loss: 0.000172

Train Epoch: 10 Avg. Training Loss: 0.000159



In [8]:
# Run the testing loop

test_loss = 0.0
correct = 0.0

for data, target in test_loader:
    
    
    
    net_out = net(data)
    
    # sum up batch loss
    test_loss += criterion(net_out, target).data
    
    pred = net_out.data.max(dim=1)[1]  # get the index of the max log probability. Here [1] returns the argmax. 
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}/{} ({:.2%})\n'.format(
    test_loss, correct, len(test_loader.dataset),correct / len(test_loader.dataset)))


Test set: Average loss: 0.0002, Accuracy: 9852/10000 (98.52%)



iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [9]:
summary(net, input_size=(1, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [1, 100]                  1,010
├─Conv2d: 1-1                            [1, 20, 24, 24]           520
├─MaxPool2d: 1-2                         [1, 20, 12, 12]           --
├─Flatten: 1-3                           [1, 2880]                 --
├─Linear: 1-4                            [1, 100]                  288,100
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (M): 0.59
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 1.15
Estimated Total Size (MB): 1.25