In [1]:
import torch 
import torch.nn as nn # Layers
import torch.nn.functional as F # activation functions
import torch.optim as optim # optimizer (SGD)

from torch.utils.data import DataLoader # Dataloader (for automatic batches)
from torchvision import datasets, transforms # datasets (MNIST) & transformations for pre-processing data (convert to Tensor, Normalization)


### Hyperparameter

In [2]:
num_classes = 10 # Digits [0,9]

# Hyperparameter
num_epochs = 3
batch_size = 64 * 2
learning_rate = 0.1

### Load Data

In [3]:
preprocessing_transformations = transforms.Compose([
    transforms.ToTensor(), 
    transforms.Normalize(0.1307, 0.3081)
    ])

train_data = datasets.MNIST(train=True, root='data-mnist',  download=True, transform=preprocessing_transformations)
test_data = datasets.MNIST(train=False, root='data-mnist', download=True, transform=preprocessing_transformations)

train_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

### Create Convolutional Neural Network

In [4]:
# get random images
dataiter = iter(train_dataloader)
images, _ = next(dataiter)

# COMPUTE INPUT SIZE of first fully-connected layer (therefore define all layers)
conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3) # because MNIST is black/white there is only 1 input_channel (RGB images have 3)
pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
conv2 = nn.Conv2d(32, 64, 3)
pool2 = nn.MaxPool2d(2,2)

img_shape = pool2(conv2(pool1(conv1(images)))).size()
fc1_input_size = img_shape[1]*img_shape[2]*img_shape[3]
print(f'''Shape is {img_shape}, 
so Fully-Connected Layer input layer has size {img_shape[1]}*{img_shape[2]}*{img_shape[3]}={img_shape[1]*img_shape[2]*img_shape[3]}''')


# Define CNN Architecture
class CNN(nn.Module):
    def __init__(self):
        '''Define Layers'''
        super(CNN, self).__init__()
        self.conv1 = conv1
        self.pool1 = pool1

        self.conv2 = conv2 
        self.pool2 = pool2 
        
        self.dropout = nn.Dropout2d(p=0.5)

        self.fc1 = nn.Linear(in_features=fc1_input_size, out_features=128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        '''Define order of computation & activation functions'''

        #  conv. & pool. layers
        x = self.pool1(F.relu(self.conv1(x)))
        x = self.pool2(F.relu(self.conv2(x)))
        x = self.dropout(x)

        # flatten data for fully-connected layers
        x = torch.flatten(x,1)

        # fully connected layers
        x = F.relu(self.fc1(x))
        output = self.fc2(x)
        return output

        

Shape is torch.Size([128, 64, 5, 5]), 
so Fully-Connected Layer input layer has size 64*5*5=1600


### Functions - Train & Test

In [5]:
def train(epochs:int, model:nn.Module, optimizer:optim.Optimizer, dataloader:DataLoader, loss_fn):
    # tell model to activate training mode (different behaviour of Dropout-Layer compared to when testing)
    model.train()
    n = len(dataloader)

    for i_epoch in range(epochs):
        for i_batch, (X,y) in enumerate(dataloader):
            # prediction
            pred = model(X)
            loss = loss_fn(pred, y) # pred is 1. arguement, y is 2. !!!

            # backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # print
            if i_batch % 10 == 0:
                print(f'Epoch\t{i_epoch+1}/{num_epochs}\t\tBatch {i_batch}\{n}\t({(100. * i_batch / n):.1f}%)\t\tLoss {loss:.4f}')


def test(model:nn.Module, dataloader:DataLoader):
    model.eval()

    num_samples = 0
    num_correct = 0

    with torch.no_grad():
        for _, (X,y) in enumerate(dataloader):
            # predict & calculate loss
            pred = model(X)
            _, pred = torch.max(pred, 1)
            
            # count
            num_samples += y.size(0)
            num_correct += (pred==y).sum().item()

        acc = 100 * num_correct / num_samples
        print(f'Accuracy is {acc:.2f}%') 



### Main - Train & Test/Evaluate the Model

In [6]:
model = CNN()
optim = optim.SGD(model.parameters(),lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [7]:
train(
    epochs=num_epochs,
    model=model,
    optimizer=optim, 
    loss_fn=criterion, 
    dataloader=train_dataloader
)

Epoch	1/3		Batch 0\469	(0.0%)		Loss 2.2915
Epoch	1/3		Batch 10\469	(2.1%)		Loss 1.8266
Epoch	1/3		Batch 20\469	(4.3%)		Loss 1.5113
Epoch	1/3		Batch 30\469	(6.4%)		Loss 0.8972
Epoch	1/3		Batch 40\469	(8.5%)		Loss 0.4946
Epoch	1/3		Batch 50\469	(10.7%)		Loss 0.5154
Epoch	1/3		Batch 60\469	(12.8%)		Loss 0.3556
Epoch	1/3		Batch 70\469	(14.9%)		Loss 0.3764
Epoch	1/3		Batch 80\469	(17.1%)		Loss 0.2105
Epoch	1/3		Batch 90\469	(19.2%)		Loss 0.3042
Epoch	1/3		Batch 100\469	(21.3%)		Loss 0.3798
Epoch	1/3		Batch 110\469	(23.5%)		Loss 0.2470
Epoch	1/3		Batch 120\469	(25.6%)		Loss 0.2667
Epoch	1/3		Batch 130\469	(27.7%)		Loss 0.1850
Epoch	1/3		Batch 140\469	(29.9%)		Loss 0.2269
Epoch	1/3		Batch 150\469	(32.0%)		Loss 0.2484
Epoch	1/3		Batch 160\469	(34.1%)		Loss 0.2348
Epoch	1/3		Batch 170\469	(36.2%)		Loss 0.1245
Epoch	1/3		Batch 180\469	(38.4%)		Loss 0.2526
Epoch	1/3		Batch 190\469	(40.5%)		Loss 0.1207
Epoch	1/3		Batch 200\469	(42.6%)		Loss 0.1685
Epoch	1/3		Batch 210\469	(44.8%)		Loss 0.1921
Epoc

In [8]:
test(model=model, dataloader=test_dataloader)
# 99.78

Accuracy is 98.84%
