### 5.3 Programming Task: Digit recognition using CNNs

In [1]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [2]:

        
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        self.fc1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1)
        self.fc2 = nn.MaxPool2d(kernel_size=2)
        self.fc3 = nn.Linear(in_features=20*12*12, out_features=100)
        # [(28 + 2*0 - 5)/1] + 1 = 24
        # Since we applied maxpooling with kernel_size=2 and stride=2
        # (24 -2 / 2) + 1 = 12
        # output_channel=20 of conv layer
        self.fc4 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x) 
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc3(x))
        x = self.fc4(x)
        return F.log_softmax(x,dim=1) 
    

    

Show the net.

In [3]:
net = ConvNet()
print(net)

ConvNet(
  (fc1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (fc2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc3): Linear(in_features=2880, out_features=100, bias=True)
  (fc4): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [4]:
# Set hyper parameters.
batch_size=200
learning_rate=0.01
epochs=10



In [5]:
# Load the MNIST data set.
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=True, download=True,
                   transform=transforms.Compose([
                   transforms.ToTensor(),
                   transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=batch_size, shuffle=True)


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('../data', train=False, transform=transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
        ])),
        batch_size=batch_size, shuffle=True)

In [6]:
# Set the loss function and the optimization criteria
optimiser = optim.SGD(net.parameters(), lr=0.01)
criterion = nn.NLLLoss()

In [7]:
# Run the main training loop

for epoch in range(epochs):
    net.train()
    total_loss = 0.0
    for batch_idx, (data, target) in enumerate(train_loader):
        
        optimiser.zero_grad()
        
        net_out = net(data)
        
        loss = criterion(net_out, target)
        total_loss+=loss.data
        
        loss.backward()
        optimiser.step()

    print('Train Epoch: {} Avg. Training Loss: {:.6f}\n'.format(
                epoch+1,
                total_loss/len(train_loader.dataset)))


Train Epoch: 1 Avg. Training Loss: 0.003934

Train Epoch: 2 Avg. Training Loss: 0.001483

Train Epoch: 3 Avg. Training Loss: 0.001185

Train Epoch: 4 Avg. Training Loss: 0.000989

Train Epoch: 5 Avg. Training Loss: 0.000846

Train Epoch: 6 Avg. Training Loss: 0.000733

Train Epoch: 7 Avg. Training Loss: 0.000647

Train Epoch: 8 Avg. Training Loss: 0.000577

Train Epoch: 9 Avg. Training Loss: 0.000523

Train Epoch: 10 Avg. Training Loss: 0.000476



In [8]:
test_loss = 0.0
correct = 0.0

for data, target in test_loader:    
    net_out = net(data)
    
    # sum up batch loss
    test_loss += criterion(net_out, target).data
    
    pred = net_out.data.max(dim=1)[1]   
    correct += pred.eq(target.data).sum()

test_loss /= len(test_loader.dataset)
print('\nTest set: Average loss: {:.4f}, Accuracy: {:.0f}/{} ({:.2%})\n'.format(
    test_loss, correct, len(test_loader.dataset),correct / len(test_loader.dataset)))
print("Comparable accuracy compared to feed-forward network")


Test set: Average loss: 0.0004, Accuracy: 9741/10000 (97.41%)

Comparable accuracy compared to feed-forward network


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [9]:
summary(net, input_size=(200, 1, 28, 28))

Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [200, 10]                 --
├─Conv2d: 1-1                            [200, 20, 24, 24]         520
├─MaxPool2d: 1-2                         [200, 20, 12, 12]         --
├─Linear: 1-3                            [200, 100]                288,100
├─Linear: 1-4                            [200, 10]                 1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (M): 117.73
Input size (MB): 0.63
Forward/backward pass size (MB): 18.61
Params size (MB): 1.16
Estimated Total Size (MB): 20.39

In [11]:
#Layer 1 -- Convolutional Layer
N=28 
F=5 
S=1
output_channnels = 20
Out_shape_1 = [batch_size, output_channnels, (int)(((N-F)/S) + 1), (int)(((N-F)/S) + 1)]
print("Layer 1 -- Convolutional Layer")
print("Output Shape")
print(Out_shape_1)
print("Parameters")
Parameters_1 = ((F*F)+1)* output_channnels
print(Parameters_1)

#Layer 2 -- MaxPool Layer
N=24 
F=2 
S=2
output_channnels = 20
Out_shape_2 = [batch_size, 20, (int)(((N-F)/S) + 1), (int)(((N-F)/S) + 1)]
print("\nLayer 2 -- MaxPool Layer")
print("Output Shape")
print(Out_shape_2)
print("No Parameters due to no Back propagation learning")

#Layer 3 -- Fully Connected Layer(Linear)
# 100 Neurons
Out_shape_3 = [batch_size, 100]
print("\nLayer 3 -- Fully Connected Layer(Linear)")
print("Output Shape")
print(Out_shape_3)
print("Parameters")
input_3 = 20 * 12 * 12
output_3 = 100
Parameters_3 = ( input_3 * output_3 ) + 100
print(Parameters_3)

#Layer 4 -- Fully Connected Layer(Linear)
# 10 Classes
Out_shape_4 = [batch_size, 10]
print("\nLayer 4 -- Fully Connected Layer(Linear) ")
print("Output Shape")
print(Out_shape_4)
print("Parameters")
input_4 = 100
output_4 = 10
Parameters_4 = ( input_4 * output_4 ) + 10
print(Parameters_4)


Layer 1 -- Convolutional Layer
Output Shape
[200, 20, 24, 24]
Parameters
520

Layer 2 -- MaxPool Layer
Output Shape
[200, 20, 12, 12]
No Parameters due to no Back propagation learning

Layer 3 -- Fully Connected Layer(Linear)
Output Shape
[200, 100]
Parameters
288100

Layer 4 -- Fully Connected Layer(Linear) 
Output Shape
[200, 10]
Parameters
1010
