### 5.3 Programming Task: Digit recognition using CNNs

In [1]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [2]:
class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # First Convolutional Layer
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1)
        # Max Pooling Layer
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # Fully Connected Layers
        self.fc1 = nn.Linear(in_features=20 * 12 * 12, out_features=100)
        self.fc2 = nn.Linear(in_features=100, out_features=10)

    def forward(self, x):
        # Convolution Layer followed by ReLU and Max Pooling
        x = self.pool(F.relu(self.conv1(x)))
        # Flattening the output for the fully connected layer
        x = x.view(-1, 20 * 12 * 12)
        # First Fully Connected Layer followed by ReLU
        x = F.relu(self.fc1(x))
        # Second Fully Connected Layer for output
        x = self.fc2(x)
        return x


Show the net.

In [3]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=2880, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [4]:
# Set hyper parameters.
learning_rate = 0.001
batch_size = 64
epochs = 10

In [11]:
# Load the MNIST data set.
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
train_dataset = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
# Set the loss function and the optimization criteria
model = ConvNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [13]:
# Run the main training loop
for epoch in range(epochs):
    for data, target in train_loader:
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{epochs}, Loss: {loss.item()}')

Epoch 1/10, Loss: 0.02759472280740738
Epoch 2/10, Loss: 0.01233962643891573
Epoch 3/10, Loss: 0.0009598737233318388
Epoch 4/10, Loss: 0.0056108091957867146
Epoch 5/10, Loss: 0.02495700493454933
Epoch 6/10, Loss: 0.002915570978075266
Epoch 7/10, Loss: 0.02783704549074173
Epoch 8/10, Loss: 0.0027116776909679174
Epoch 9/10, Loss: 0.00019108146079815924
Epoch 10/10, Loss: 0.0015016812831163406


In [14]:
# Run the testing loop
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for data, target in test_loader:
        output = model(data)
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy: {accuracy}%')

Accuracy: 98.83%


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [15]:
from torchinfo import summary

model = ConvNet()  # Replace ConvNet with your model class
summary(model, input_size=(batch_size, 1, 28, 28))


Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [64, 10]                  --
├─Conv2d: 1-1                            [64, 20, 24, 24]          520
├─MaxPool2d: 1-2                         [64, 20, 12, 12]          --
├─Linear: 1-3                            [64, 100]                 288,100
├─Linear: 1-4                            [64, 10]                  1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 37.67
Input size (MB): 0.20
Forward/backward pass size (MB): 5.95
Params size (MB): 1.16
Estimated Total Size (MB): 7.31