### 5.3 Programming Task: Digit recognition using CNNs

In [26]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary
from tqdm import tqdm


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [27]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(1, 20, 5, 1),
            nn.ReLU(),
        )
        self.pool = nn.MaxPool2d(2, 2)
        self.fc = nn.Sequential(
            nn.Linear(20 * 12 * 12, 100),
            nn.ReLU(),
        )
        self.pred = nn.Linear(100, 10)


    def forward(self, x):
        x = self.conv1(x)
        x = self.pool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        x = self.pred(x)
        return x
    

Show the net.

In [28]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Sequential(
    (0): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
  )
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc): Sequential(
    (0): Linear(in_features=2880, out_features=100, bias=True)
    (1): ReLU()
  )
  (pred): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [29]:
# Set hyper parameters.
lr = 0.001
num_epochs = 5
batch_size = 100
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

net = net.to(device)

In [30]:
# Load the MNIST data set.
train_dataset = datasets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transforms.ToTensor(), download=True)

train_loader = Data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
test_loader = Data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [31]:
# Set the loss function and the optimization criteria
loss_fn = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(net.parameters(), lr=lr)

In [32]:
# Run the main training loop
def train(model, dataloader, loss_fn, optimizer, num_epochs=10, device='cpu'):
    model.train()
    for i in range(num_epochs):
        loop = tqdm(enumerate(dataloader), total=len(dataloader))
        for batch_idx, (data, target) in loop:
            data, target = data.to(device), target.to(device)
            output = model(data)
            loss = loss_fn(output, target)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            loop.set_description(f'Epoch [{i+1}/{num_epochs}]')
            loop.set_postfix(loss=loss.item())
            
            
train(net, train_loader, loss_fn, optimizer, num_epochs, device)

Epoch [1/5]: 100%|██████████| 600/600 [00:04<00:00, 125.56it/s, loss=0.0752]
Epoch [2/5]: 100%|██████████| 600/600 [00:04<00:00, 130.90it/s, loss=0.039]  
Epoch [3/5]: 100%|██████████| 600/600 [00:04<00:00, 135.23it/s, loss=0.0331] 
Epoch [4/5]: 100%|██████████| 600/600 [00:04<00:00, 130.70it/s, loss=0.0344] 
Epoch [5/5]: 100%|██████████| 600/600 [00:04<00:00, 131.83it/s, loss=0.0592] 


In [42]:
# Run the testing loop
def test(model, dataloader, device='cpu'):
    model.eval()
    num_correct = 0
    num_samples = 0
    loop = tqdm(enumerate(dataloader), total=len(dataloader))
    with torch.no_grad():
        for batch_idx, (data, target) in loop:
            data, target = data.to(device), target.to(device)
            output = model(data)
            _, predictions = output.max(1)
            num_correct += (predictions == target).sum()
            num_samples += predictions.size(0)
            loop.set_description(f'Testing')
            loop.set_postfix(accuracy=(float(num_correct)/float(num_samples))*100)
    print(f'Got {num_correct} / {num_samples} with accuracy {float(num_correct)/float(num_samples)*100:.2f}')
    

test(net, test_loader, device)

Testing: 100%|██████████| 100/100 [00:00<00:00, 145.18it/s, accuracy=98.8]

Got 9880 / 10000 with accuracy 98.80





iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [45]:
input_shape = (1, 28, 28)

# Convolutional layer weights: kernel_size * kernel_size * in_channels * out_channels + biases (out_channels)
conv_params = 5 * 5 * 1 * 20 + 20
# Convolutional layer output shape: (in_channels, out_channels, (input_height + 2 * padding - kernel_size) / stride + 1, (input_width + 2 * padding - kernel_size) / stride + 1)
conv_shape = (20, (28 + 0 - 5) / 1 + 1, (28 + 0 - 5) / 1 + 1)

# no params for ReLu, MaxPool2d
# MaxPool2d output shape: (in_channels, out_channels, (input_height + 2 * padding - kernel_size) / stride + 1, (input_width + 2 * padding - kernel_size) / stride + 1)
pool_shape = (20, (conv_shape[1] + 0 - 2) / 2 + 1, (conv_shape[2] + 0 - 2) / 2 + 1)

# Fully connected layer weights: in_features * out_features + biases (out_features)
fc_params = 20 * 12 * 12 * 100 + 100
# Fully connected layer output shape: (out_features)
fc_shape = (100)

# final layer params: in_features * out_features + biases (out_features)
pred_params = 100 * 10 + 10
# final layer output shape: (out_features)
pred_shape = (10)

total_params = conv_params + fc_params + pred_params

print(f'Convolutional layer parameters: {conv_params}')
print(f'Covolutional layer output shape: {conv_shape}')
print(f'MaxPool2d output shape: {pool_shape}')
print(f'Fully connected layer parameters: {fc_params}')
print(f'Fully connected layer output shape: {fc_shape}')
print(f'Final layer parameters: {pred_params}')
print(f'Final layer output shape: {pred_shape}')
print(f'Total number of learnable parameters: {total_params}')

summary(net, input_size=(1, 1, 28, 28))

Convolutional layer parameters: 520
Covolutional layer output shape: (20, 24.0, 24.0)
MaxPool2d output shape: (20, 12.0, 12.0)
Fully connected layer parameters: 288100
Fully connected layer output shape: 100
Final layer parameters: 1010
Final layer output shape: 10
Total number of learnable parameters: 289630


Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [1, 10]                   --
├─Sequential: 1-1                        [1, 20, 24, 24]           --
│    └─Conv2d: 2-1                       [1, 20, 24, 24]           520
│    └─ReLU: 2-2                         [1, 20, 24, 24]           --
├─MaxPool2d: 1-2                         [1, 20, 12, 12]           --
├─Sequential: 1-3                        [1, 100]                  --
│    └─Linear: 2-3                       [1, 100]                  288,100
│    └─ReLU: 2-4                         [1, 100]                  --
├─Linear: 1-4                            [1, 10]                   1,010
Total params: 289,630
Trainable params: 289,630
Non-trainable params: 0
Total mult-adds (M): 0.59
Input size (MB): 0.00
Forward/backward pass size (MB): 0.09
Params size (MB): 1.16
Estimated Total Size (MB): 1.25