In [None]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [4]:
# Convert MNIST dataset to PyTorch tensors of 4-Dimensions (# of images, # of channels, height, width)
transform = transforms.ToTensor()

In [5]:
# Train and test datasets
train_dataset = datasets.MNIST(root='MNIST_train_data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='MNIST_test_data', train=False, download=True, transform=transform)

100.0%
100.0%
100.0%
100.0%
100.0%
100.0%
100.0%
100.0%


In [8]:
train_dataset

Dataset MNIST
    Number of datapoints: 60000
    Root location: MNIST_train_data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [9]:
test_dataset

Dataset MNIST
    Number of datapoints: 10000
    Root location: MNIST_test_data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [11]:
# Create a small batch size for images
batch_size = 10
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

In [12]:
# Define the CNN model
# Describe convolutional layers and what it's doing (2 convolutional layers)
conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)  # 1 input channel (grayscale), 6 output channels
conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)  # 6 input channels, 16 output channels

In [13]:
# Grab 1 MNIST image/record
for i, (X_Train, y_Train) in enumerate(train_dataset):
    break

In [16]:
X_Train.shape  # Shape of the image tensor (1, 28, 28) - 1 channel, 28x28 pixels

torch.Size([1, 28, 28])

In [17]:
x = X_Train.view(1, 1, 28, 28)  # Reshape to (batch_size, channels, height, width)

In [19]:
# Perform our first convolution
x = F.relu(conv1(x))  # Apply ReLU activation function

In [21]:
x.shape  # Shape after first convolution (1, 6, 26, 26) - 6 channels, 26x26 pixels

torch.Size([1, 6, 26, 26])

In [22]:
# pass through pooling layer
x = F.max_pool2d(x, kernel_size=2, stride=2)

In [23]:
x.shape  # Shape after pooling (1, 6, 13, 13) - 6 channels, 13x13 pixels

torch.Size([1, 6, 13, 13])

In [24]:
# Do the second convolution
x = F.relu(conv2(x))  # Apply ReLU activation function

In [25]:
x.shape  # Shape after pooling (1, 6, 13, 13) - 6 channels, 13x13 pixels

torch.Size([1, 16, 11, 11])

In [26]:
# Pool the second convolution
x = F.max_pool2d(x, kernel_size=2, stride=2)

In [27]:
x.shape  # Shape after pooling (1, 6, 13, 13) - 6 channels, 13x13 pixels

torch.Size([1, 16, 5, 5])

In [28]:
# Model Class
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=3, stride=1)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=16, kernel_size=3, stride=1)

        # Fully connected layers
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # 10 output classes

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, kernel_size=2, stride=2)

        # Flatten the tensor
        x = x.view(-1, 16 * 5 * 5) # negative one means "infer batch size automatically"

        # Fully connected layers
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)

        return F.log_softmax(x, dim=1)  # Log softmax for multi-class classification

In [30]:
# Create an instance of the model
torch.manual_seed(42)  # For reproducibility
model = CNNModel()
model

CNNModel(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [32]:
# Loss function and optimizer
criterion = nn.CrossEntropyLoss()  # Cross-entropy loss for multi-class classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)  # Adam optimizer
# Note: the smaller the learning rate, the slower the model will learn