## Classification of Fashion MNIST with a Vanilla CNN Achitecture

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

## Set up the class for our Vanilla CNN

In [16]:
# Let us set up Vanilla CNN class.
class VanilllaCNN(nn.Module):
    def __init__(self, num_channels, num_classes):
        super(VanilllaCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=num_channels, out_channels=32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        # we don't define 2 max pool layes, since we could just reuse this (it just halves in size any arbitrary feature map)
        self.max_pool = nn.MaxPool2d(kernel_size=(2, 2), stride=(2, 2))
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        self.fc = nn.Linear(in_features=64*7*7, out_features=num_classes)
    
    def forward(self, x):
        # we could've made the following code more compact, but optimization is not our concern right now.
        x = F.relu(self.conv1(x))
        x = self.max_pool(x)
        x = F.relu(self.conv2(x))
        x = self.max_pool(x)
        x = x.flatten(start_dim=1)
        x = self.fc(x)
        return x

#### *Sanity check for the forward pass on the network*

In [17]:
# we should set up a sanity check for our network before testing it out an actual data.
sample_tensor = torch.randn(512, 1, 28, 28)
sample_net = VanilllaCNN(1, 10)
sample_net(sample_tensor).shape

# Looks Good! For our batch_size of 512, we get a prediction (a vector of length 10) for each of the 512 images.

torch.Size([512, 10])

## Let's set up the training data loader that'll feed in batches of data

In [11]:
# let us set up a data loader for our fashion mnist dataset.
train_data = datasets.FashionMNIST(root='./data',
                                   download=True,
                                   train=True, 
                                   transform=transforms.Compose([
                                       transforms.ToTensor()
                                   ]))

train_data_loader = DataLoader(dataset=train_data, 
                               shuffle=True,
                               batch_size=512)

## Device Assignment and Training Loop (Learning rates could fuck u up)

In [12]:
# let us find and set up the device which we'll later assign tensor computations to
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [18]:
# let us set up some hyperparameters and training loop
INPUT_CHANNELS = 1
NUM_CLASSES = 10

vanilla_cnn = VanilllaCNN(INPUT_CHANNELS, NUM_CLASSES).to(device=device)
loss_criterion = nn.CrossEntropyLoss()
# REALLY different results for lr=0.1, 0.01 and 0.001 (For both MNIST *and* FashionMNIST. 
# Ma boi andrew ng wasn't foolin' around!
optimizer = torch.optim.Adam(vanilla_cnn.parameters(), lr=0.001)

for epoch in range(10):
    for idx, (data, targets) in enumerate(train_data_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)
        predictions = vanilla_cnn(data)
        loss = loss_criterion(predictions, targets)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    print("Epoch: {}, loss: {}".format(epoch + 1, loss))
print("Done Learning!")

Epoch: 1, loss: 0.4126792848110199
Epoch: 2, loss: 0.2662435472011566
Epoch: 3, loss: 0.3614051640033722
Epoch: 4, loss: 0.31086310744285583
Epoch: 5, loss: 0.17994926869869232
Epoch: 6, loss: 0.23888492584228516
Epoch: 7, loss: 0.15774111449718475
Epoch: 8, loss: 0.1002868115901947
Epoch: 9, loss: 0.2108091562986374
Epoch: 10, loss: 0.2685917615890503
Done Learning!


## Testing data loader set up followed by "Accuracy Test"

In [14]:
# set up test data and test data loader
test_data = datasets.FashionMNIST(root='./data', 
                                  train=False,
                                  download=True,
                                  transform=transforms.Compose([
                                      transforms.ToTensor()
                                  ]))

test_data_loader = DataLoader(dataset=test_data,
                              shuffle=True,
                              batch_size=512)

In [19]:
num_correct = 0
num_samples = 0

vanilla_cnn.eval()
# disable gradient computation for test set
with torch.no_grad():
    for idx, (data, targets) in enumerate(test_data_loader):
        data = data.to(device=device)
        targets = targets.to(device=device)
        outputs = vanilla_cnn(data)
        values, idx_of_max_value = outputs.max(1)
        num_correct += (idx_of_max_value == targets).sum()
        num_samples += targets.shape[0]
    print(num_correct, num_samples)
    print("Accuracy of model on the test set: {}".format((num_correct.item() / num_samples) * 100))

tensor(9056, device='cuda:0') 10000
Accuracy of model on the test set: 90.56


## A slight increase in accuracy compared to the Vanilla FFN architecture ({87.18% v/s 90.56%} ~3.5% improvement)
    [This makes sense, the convolution operation makes use of spatial information and therefore has
     access to extra information that the feedforward network simply did not have].