## Imports

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

## Load MNIST dataset

In [2]:
transform = transforms.ToTensor()

In [3]:
train_data = datasets.MNIST(root='../Data', train=True, download=True, transform=transform)

  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [4]:
test_data = datasets.MNIST(root='../Data', train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: ../Data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: ../Data
    Split: Test
    StandardTransform
Transform: ToTensor()

## Create loaders

In [7]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [8]:
# 1 COLOR CHANNEL, 6 FILTER (OUTPUT CHANNELS), 3by3 KERNEL STRIDE
conv1 = nn.Conv2d(1,6,3,1)

# 6 Input filers (Conv1), 16 filters, 3by3 kernel, stide 1
conv2 = nn.Conv2d(6,16,3,1)

## View a sample of the train data
Note that train data will generate one single image [1,28,28] while train_loader will generate a 4D batch of 10 images [10,1,28,28] 
The 3D tensor can be viewed as a 4D tensor see below

In [9]:
for i, (X_train, y_train) in enumerate(train_data): 
    break
print(X_train.shape)

for i, (X_train_b, y_train) in enumerate(train_loader): 
    break
print(X_train_b.shape)


X_train = X_train.view(1,1,28,28)
print(X_train.shape)

torch.Size([1, 28, 28])
torch.Size([10, 1, 28, 28])
torch.Size([1, 1, 28, 28])


In [12]:
X_train.shape # Batch of 1 image

torch.Size([1, 1, 28, 28])

## Transform to 4D (batch of 1 with 1 greyscale and 28x28)

In [30]:
X_train.view(1,1,28,28).shape

torch.Size([1, 1, 28, 28])

In [31]:
x = X_train.view(1,1,28,28)

## Pass x into conv layer
Note that we lose information in the boarders, thus output is 26x26 instead of 28x28

In [28]:
x = F.relu(conv1(x))

In [29]:
x.shape

torch.Size([1, 6, 26, 26])

## Pass through pooling layer
note that 13x13 comes from poolin which divides the shape by 2. (28-2 = 26 / 2 = 13)

In [18]:
x = F.max_pool2d(x,2,2)

In [19]:
x.shape

torch.Size([1, 1, 1, 54])

In [20]:
x = F.relu(conv2(x))
x.shape

RuntimeError: Given groups=1, weight of size [16, 6, 3, 3], expected input[1, 1, 1, 54] to have 6 channels, but got 1 channels instead

## Perform 1 additional pooling
11/2 = 5.5 which is rounded to 5

In [20]:
x = F.max_pool2d(x,2,2)

In [21]:
x.shape

torch.Size([1, 16, 5, 5])

## Flatten

In [22]:
x = x.view(-1,16*5*5)
x.shape

torch.Size([1, 400])

## Transform to output of 10

In [23]:
fc1 = nn.Linear(5*5*16,10)

In [24]:
x = fc1(x)

In [25]:
x.shape

torch.Size([1, 10])

In [26]:
x

tensor([[ 0.0580, -0.0702,  0.0228, -0.0667,  0.0696,  0.0151, -0.0570,  0.0420,
          0.0793, -0.0163]], grad_fn=<AddmmBackward0>)