# MNIST with CNN

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

## Load Dataset

In [2]:
transform = transforms.ToTensor()

In [3]:
train_data = datasets.MNIST(root="/Dataset", train=True, download=True, transform=transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /Dataset/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 91931018.63it/s]


Extracting /Dataset/MNIST/raw/train-images-idx3-ubyte.gz to /Dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /Dataset/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 31785802.63it/s]


Extracting /Dataset/MNIST/raw/train-labels-idx1-ubyte.gz to /Dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /Dataset/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 24930935.60it/s]


Extracting /Dataset/MNIST/raw/t10k-images-idx3-ubyte.gz to /Dataset/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /Dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 3882316.85it/s]

Extracting /Dataset/MNIST/raw/t10k-labels-idx1-ubyte.gz to /Dataset/MNIST/raw






In [4]:
test_data = datasets.MNIST(root="/Dataset", train=False, download=True, transform=transform)

In [5]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: /Dataset
    Split: Train
    StandardTransform
Transform: ToTensor()

In [6]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: /Dataset
    Split: Test
    StandardTransform
Transform: ToTensor()

In [7]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)

In [8]:
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

## Define Model

In [9]:
# 1 COLOR CHANNE, 6 FILTERS (OUTPUT CHANNELS), 3*3 KERNEL, STRIDE=1
conv1 = nn.Conv2d(1,6,3,1)   # ----> 6 filters, ---> pooling ---> conv2

# 6 Input Filters Conv1, 16 filters, 3*3, stride=1
conv2 = nn.Conv2d(6,16,3,1)

In [10]:
# First batch
for b,(X_train, y_train) in enumerate(train_data):
  break

In [11]:
X_train.shape

torch.Size([1, 28, 28])

In [12]:
y_train

5

In [13]:
x = X_train.view(1,1,28,28)  # ----> 4d batch (batch of 1 image)

In [14]:
x = F.relu(conv1(x))

In [15]:
x.shape

torch.Size([1, 6, 26, 26])

In [16]:
x = F.max_pool2d(x,2,2)

In [17]:
x.shape

torch.Size([1, 6, 13, 13])

In [18]:
x = F.relu(conv2(x))

In [19]:
x.shape

torch.Size([1, 16, 11, 11])

In [20]:
x = F.max_pool2d(x,2,2)

In [21]:
x.shape

torch.Size([1, 16, 5, 5])

In [22]:
x.view(-1,16*5*5).shape

torch.Size([1, 400])

In [23]:
x.shape

torch.Size([1, 16, 5, 5])

## Convolutional Neural Network Class

In [24]:
class ConvolutionalNeuralNetwork(nn.Module):

  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 6, 3, 1)
    self.conv2 = nn.Conv2d(6, 16, 3, 1)
    self.fc1 = nn.Linear(5*5*16, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)

  def forward(self, X):
    X = F.relu(self.conv1(X))
    X = F.max_pool2d(X, 2, 2)
    X = F.relu(self.conv2(X))
    X = F.max_pool2d(X, 2, 2)
    X = X.view(-1, 16*5*5)       # Flatten output
    X = F.relu(self.fc1(X))
    X = F.relu(self.fc2(X))
    X = self.fc3(X)
    return F.log_softmax(X, dim=1)



## Create Model

In [25]:
model = ConvolutionalNeuralNetwork()

In [26]:
model

ConvolutionalNeuralNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [27]:
for param in model.parameters():
  print(param.numel())

54
6
864
16
48000
120
10080
84
840
10


## Optimizer

In [28]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

## Train Model

In [34]:
import time

start_time = time.time()

# Trackers
epochs = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

# For Loop Epochs
for epoch in range(epochs):

  trn_corr = 0
  tst_corr = 0

  # Train
  for btch,(X_train, y_train) in enumerate(train_loader):

    # b+=1

    y_pred = model(X_train)  # Not Flatten because 2d
    loss = criterion(y_pred, y_train)  # One hot encoded

    predicted = torch.max(y_pred, 1)[1]
    batch_corr = (predicted == y_train).sum()
    trn_corr += batch_corr

    # backward propagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if btch%600 == 0:
      print(f"EPOCH: {epoch+1} BATCH: {btch} LOSS: {loss.item()}")

  train_losses.append(loss)
  train_correct.append(trn_corr)

  # Test
  with torch.no_grad():
    for btch,(X_test, y_test) in enumerate(test_loader):

      y_val = model(X_test)
      # loss = criterion(y_val, y_test) no backward propagation needed

      predicted = torch.max(y_val, 1)[1]
      batch_corr = (predicted == y_test).sum()
      tst_corr += batch_corr

  loss = criterion(y_val, y_test)
  test_losses.append(loss)
  test_correct.append(tst_corr)


current_time = time.time()
total_time = current_time - start_time
print(f"Training took {total_time/60} minutes")

EPOCH: 1 BATCH: 0 LOSS: 0.0009495875565335155
EPOCH: 1 BATCH: 600 LOSS: 0.14716275036334991
EPOCH: 1 BATCH: 1200 LOSS: 0.01899706944823265
EPOCH: 1 BATCH: 1800 LOSS: 0.0037262968253344297
EPOCH: 1 BATCH: 2400 LOSS: 0.008511418476700783
EPOCH: 1 BATCH: 3000 LOSS: 0.0066335625015199184
EPOCH: 1 BATCH: 3600 LOSS: 0.0006997323944233358
EPOCH: 1 BATCH: 4200 LOSS: 0.09299765527248383
EPOCH: 1 BATCH: 4800 LOSS: 0.22573161125183105
EPOCH: 1 BATCH: 5400 LOSS: 0.0015759197995066643
EPOCH: 2 BATCH: 0 LOSS: 0.0004376330762170255
EPOCH: 2 BATCH: 600 LOSS: 0.09453371167182922
EPOCH: 2 BATCH: 1200 LOSS: 0.000893603079020977
EPOCH: 2 BATCH: 1800 LOSS: 0.0003536352014634758
EPOCH: 2 BATCH: 2400 LOSS: 0.033165983855724335
EPOCH: 2 BATCH: 3000 LOSS: 0.033429745584726334
EPOCH: 2 BATCH: 3600 LOSS: 0.0002842629619408399
EPOCH: 2 BATCH: 4200 LOSS: 0.10080528259277344
EPOCH: 2 BATCH: 4800 LOSS: 0.0013403043849393725
EPOCH: 2 BATCH: 5400 LOSS: 0.005271116737276316
EPOCH: 3 BATCH: 0 LOSS: 0.0002585815382190048