In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torchvision.utils import make_grid

In [2]:
import numpy as np
import pandas as pd
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
transform = transforms.ToTensor()

In [4]:
train_data = datasets.MNIST(root='../Data', train=True, download=True, transform=transform)

In [5]:
test_data = datasets.MNIST(root='../Data', train=False, download=True, transform=transform)

In [6]:
train_data


Dataset MNIST
    Number of datapoints: 60000
    Root location: ../Data
    Split: Train
    StandardTransform
Transform: ToTensor()

In [7]:
test_data


Dataset MNIST
    Number of datapoints: 10000
    Root location: ../Data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [8]:
train_loader = DataLoader(train_data, batch_size=10, shuffle=True)
test_loader = DataLoader(test_data, batch_size=10, shuffle=False)

In [9]:
# 1 Color channel
# 6 Filters (output channels)
# 3by3 Kernel
# Stride = 1
conv1 = nn.Conv2d(1, 6, 3, 1) # --> 6 filters --> pooling --> conv2

# 6 Input filters conv1
# 16 filters (arbitrary choice)
# 3by3
# Stride = 1
conv2 = nn.Conv2d(6, 16, 3, 1)

In [10]:
for i, (X_train, y_train) in enumerate(train_data):
    break

In [11]:
X_train.shape

torch.Size([1, 28, 28])

In [12]:
X_train.view(1, 1, 28, 28) # ------> 4D batch (batch of 1 image)

tensor([[[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [13]:
x = X_train.view(1, 1, 28, 28) # ------> 4D batch (batch of 1 image)

In [14]:
x = F.relu(conv1(x))

In [15]:
x.shape

torch.Size([1, 6, 26, 26])

In [16]:
x = F.max_pool2d(x, 2, 2)

In [17]:
x.shape

torch.Size([1, 6, 13, 13])

In [18]:
x = F.relu(conv2(x))

In [19]:
x.shape

torch.Size([1, 16, 11, 11])

In [20]:
x = F.max_pool2d(x, 2, 2)

In [21]:
x.shape

torch.Size([1, 16, 5, 5])

In [22]:
11 / 2

5.5

In [23]:
(((28-2) / 2) - 2) / 2

5.5

In [24]:
x.shape # 10

torch.Size([1, 16, 5, 5])

In [25]:
x.view(-1, 16*5*5).shape

torch.Size([1, 400])

In [26]:
class ConvolutionalNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 3, 1)
        self.conv2 = nn.Conv2d(6, 16, 3, 1)
        self.fc1 = nn.Linear(5*5*16, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, X):
        X = F.relu(self.conv1(X))
        X = F.max_pool2d(X, 2, 2)
        X = F.relu(self.conv2(X))
        X = F.max_pool2d(X, 2, 2)
        X = X.view(-1, 16*5*5)
        X = F.relu(self.fc1(X))
        X = F.relu(self.fc2(X))
        X = self.fc3(X)
        return F.log_softmax(X, dim=1)


In [27]:
torch.manual_seed(42)
model = ConvolutionalNetwork()
model


ConvolutionalNetwork(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)

In [28]:
for param in model.parameters():
    print(param.numel())


54
6
864
16
48000
120
10080
84
840
10


In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [30]:
import time
start_time = time.time()

# Variables (trackers)
epochs = 5
train_losses = []
test_losses = []
train_correct = []
test_correct = []

# For loop for epochs
for i in range(epochs):
    trn_corr = 0
    tst_corr = 0
    # Train
    for b, (X_train, y_train) in enumerate(train_loader):
        b+=1

        y_pred = model(X_train) # Not flatten!
        loss = criterion(y_pred, y_train)

        predicted = torch.max(y_pred.data, 1)[1]
        batch_corr = (predicted == y_train).sum() # True / False sum()
        trn_corr += batch_corr

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if b%600 == 0:
            print(f"Epoch: {i} - Batch: {b} - Loss: {loss.item()}")

    train_losses.append(loss.item())
    train_correct.append(trn_corr.item())

    # Test
    with torch.no_grad():
        for b, (X_test, y_test) in enumerate(test_loader):

            y_val = model(X_test)

            predicted = torch.max(y_val.data, 1)[1]
            tst_corr += (predicted == y_test).sum()
    loss = criterion(y_val, y_test)
    test_losses.append(loss)
    test_correct.append(tst_corr)

current_time = time.time()
total = current_time - start_time
print(f'Training took: {total/60} minutes')


Epoch: 0 - Batch: 600 - Loss: 0.040556274354457855
Epoch: 0 - Batch: 1200 - Loss: 0.08253476768732071
Epoch: 0 - Batch: 1800 - Loss: 0.36018961668014526
Epoch: 0 - Batch: 2400 - Loss: 0.0181870236992836
Epoch: 0 - Batch: 3000 - Loss: 0.00846516527235508
Epoch: 0 - Batch: 3600 - Loss: 0.0011429868172854185
Epoch: 0 - Batch: 4200 - Loss: 0.6116259098052979
Epoch: 0 - Batch: 4800 - Loss: 0.029025286436080933
Epoch: 0 - Batch: 5400 - Loss: 0.007433040998876095
Epoch: 0 - Batch: 6000 - Loss: 0.051957011222839355
Epoch: 1 - Batch: 600 - Loss: 0.004215870518237352
Epoch: 1 - Batch: 1200 - Loss: 0.04310125857591629
Epoch: 1 - Batch: 1800 - Loss: 0.0012016806285828352
Epoch: 1 - Batch: 2400 - Loss: 0.03806653246283531
Epoch: 1 - Batch: 3000 - Loss: 0.34830039739608765
Epoch: 1 - Batch: 3600 - Loss: 0.0027559201698750257
Epoch: 1 - Batch: 4200 - Loss: 0.0010476860916242003
Epoch: 1 - Batch: 4800 - Loss: 0.0009008402703329921
Epoch: 1 - Batch: 5400 - Loss: 0.0009703498217277229
Epoch: 1 - Batch: 