### 5.3 Programming Task: Digit recognition using CNNs

In [6]:
import torch
import torch.utils.data as Data
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
from torchvision import datasets, transforms
from torchinfo import summary
from torch.utils.data import DataLoader


%matplotlib inline

i. Complete the code for the ConvNet class given below using the network description from supplement pdf.

In [2]:
import torch
import torch.nn as nn

class ConvNet(nn.Module):
    def __init__(self):
        super(ConvNet, self).__init__()
        # Convolutional layer: kernel size 5x5, stride 1, 20 feature maps
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=20, kernel_size=5, stride=1, padding=2)
        # ReLU activation
        self.relu1 = nn.ReLU()
        # Max pooling: pooling window 2x2, stride 2
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        # Fully connected layer with 100 neurons
        self.fc1 = nn.Linear(20 * 14 * 14, 100)  # 14x14 is the output size after pooling
        self.relu2 = nn.ReLU()

        # Final fully connected layer for 10 class probabilities
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        # Forward pass through the network
        x = self.conv1(x)  # Convolutional layer
        x = self.relu1(x)  # ReLU activation
        x = self.pool1(x)  # Max pooling

        # Flatten the max-pool output into a vector
        x = x.view(x.size(0), -1)  # Flatten

        x = self.fc1(x)  # Fully connected layer
        x = self.relu2(x)  # ReLU activation
        x = self.fc2(x)  # Final layer for class probabilities

        return x


Show the net.

In [3]:
net = ConvNet()
print(net)

ConvNet(
  (conv1): Conv2d(1, 20, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (relu1): ReLU()
  (pool1): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3920, out_features=100, bias=True)
  (relu2): ReLU()
  (fc2): Linear(in_features=100, out_features=10, bias=True)
)


ii. Train the CNN and observe the difference in performance in comparison to the feed-forward
network from the task 5.2.

In [4]:
# Set hyper parameters.
batch_size = 64
learning_rate = 0.01
epochs = 10

In [7]:
# Load the MNIST data set.
# Define transformations: Convert to tensor and normalize (mean=0.5, std=0.5 for grayscale images)
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load MNIST training and testing datasets
train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Create data loaders for batching
batch_size = 64
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Print sample size for verification
print(f"Training dataset size: {len(train_dataset)}")
print(f"Testing dataset size: {len(test_dataset)}")

# Example: Display a batch shape from the train loader
images, labels = next(iter(train_loader))
print(f"Batch shape: {images.shape}, Labels shape: {labels.shape}")

Training dataset size: 60000
Testing dataset size: 10000
Batch shape: torch.Size([64, 1, 28, 28]), Labels shape: torch.Size([64])


In [8]:
# Set the loss function and the optimization criteria
import torch.nn as nn
import torch.optim as optim

# Define the loss function
criterion = nn.CrossEntropyLoss()  # Suitable for multi-class classification problems

# Define the optimizer
learning_rate = 0.01
optimizer = optim.SGD(net.parameters(), lr=learning_rate)  # Stochastic Gradient Descent

# (Optional) If using Adam optimizer
# optimizer = optim.Adam(net.parameters(), lr=learning_rate)

In [9]:
# Run the main training loop
# Training loop
for epoch in range(epochs):
    net.train()  # Set the model to training mode
    total_loss = 0  # Track the total loss for the epoch
    
    for images, labels in train_loader:
        # Clear the gradients
        optimizer.zero_grad()
        
        # Forward pass
        outputs = net(images)
        
        # Compute the loss
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        loss.backward()
        optimizer.step()
        
        # Accumulate the loss
        total_loss += loss.item()
    
    # Print the average loss for this epoch
    print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(train_loader):.4f}")

# Save the trained model (optional)
torch.save(net.state_dict(), "cnn_mnist.pth")
print("Training complete. Model saved as 'cnn_mnist.pth'.")

Epoch [1/10], Loss: 0.5968
Epoch [2/10], Loss: 0.2027
Epoch [3/10], Loss: 0.1450
Epoch [4/10], Loss: 0.1149
Epoch [5/10], Loss: 0.0951
Epoch [6/10], Loss: 0.0832
Epoch [7/10], Loss: 0.0737
Epoch [8/10], Loss: 0.0668
Epoch [9/10], Loss: 0.0608
Epoch [10/10], Loss: 0.0559
Training complete. Model saved as 'cnn_mnist.pth'.


In [10]:
# Run the testing loop
# Function to evaluate the model
def evaluate_model(model, loader):
    model.eval()  # Set the model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():  # No gradient calculation during evaluation
        for images, labels in loader:
            outputs = model(images)  # Forward pass
            _, predicted = torch.max(outputs, 1)  # Get the predicted class
            total += labels.size(0)  # Total number of samples
            correct += (predicted == labels).sum().item()  # Count correct predictions
    accuracy = correct / total
    return accuracy

# Evaluate the model on the test dataset
test_accuracy = evaluate_model(net, test_loader)
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Test Accuracy: 98.38%


iii. Calculate the number of learnable parameters and the output shape in each layer. Verify your
answers with model summary. (Refer last cell of the tutorial notebook)

In [11]:
# Instantiate the ConvNet
#net = ConvNet()

# Generate a model summary
model_summary = summary(net, input_size=(64, 1, 28, 28))  # Batch size=64, single grayscale channel, input size=28x28
print(model_summary)


Layer (type:depth-idx)                   Output Shape              Param #
ConvNet                                  [64, 10]                  --
├─Conv2d: 1-1                            [64, 20, 28, 28]          520
├─ReLU: 1-2                              [64, 20, 28, 28]          --
├─MaxPool2d: 1-3                         [64, 20, 14, 14]          --
├─Linear: 1-4                            [64, 100]                 392,100
├─ReLU: 1-5                              [64, 100]                 --
├─Linear: 1-6                            [64, 10]                  1,010
Total params: 393,630
Trainable params: 393,630
Non-trainable params: 0
Total mult-adds (M): 51.25
Input size (MB): 0.20
Forward/backward pass size (MB): 8.08
Params size (MB): 1.57
Estimated Total Size (MB): 9.86
