# MNIST Example

In [3]:
# CNN Model Architecture
import torch
from torch import nn
from torch.nn import functional as F

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(
            in_channels=1, #pictures are greyscale
            out_channels=32, 
            kernel_size=3
        )
        self.conv2 = nn.Conv2d(
            in_channels=32, 
            out_channels=64, 
            kernel_size=3
        )
        self.dropout1 = nn.Dropout(p=0.25)
        self.dropout2 = nn.Dropout(p=0.5)
        self.fc1 = nn.Linear(9216, 128) # (28 - 2 - 2)^2 * 64
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.conv1(x) #layer 1
        x = F.relu(x) #activation function 1
        x = self.conv2(x) #layer 2
        x = F.relu(x) #activation function 2
        x = F.max_pool2d(x, 2) #layer 2 - weed out dimensions
        x = self.dropout1(x) #layer 2 - ease overfitting
        x = torch.flatten(x, 1) #transform to 1-d array
        x = self.fc1(x) #layer 3
        x = F.relu(x) #layer 3 - activation function
        x = self.dropout2(x) #layer 3 - ease overfitting
        x = self.fc2(x) #layer 4
        output = F.log_softmax(x, dim = 1) #layer 4 - final result
        return output

In [4]:
# Train Model
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

# create model instance and load MNIST data
model = CNN()

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5), (0.75))
])

train_data = datasets.MNIST(
    "../../../data", 
    train=True, 
    download=True, 
    transform=transform
)
test_data = datasets.MNIST(
    "../../../data", 
    train=False, 
    download=True, 
    transform=transform
)

train_data_loader = DataLoader(
    train_data, 
    batch_size=1000, 
    shuffle=True
)
test_data_loader = DataLoader(
    test_data,
    batch_size=1000,
    shuffle=True
)

# model training
device = torch.device("cpu") # train on cpu

learning_rate = 0.001
epochs = 10

optimizer = optim.Adam(model.parameters(), lr = learning_rate) #different optimizers may have significantly different results

for epoch in range(1, epochs + 1):
    print(f"Train Epoch: {epoch}")
    model.train()
    for batch_id, (data, target) in enumerate(train_data_loader): #for each batch, also get the index of batch
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data) #forward pass
        loss = F.nll_loss(output, target) #negative loglikelihood loss function
        loss.backward() #compute gradients
        optimizer.step() #update weights

        if batch_id % 10 == 0: #update on training iterations
            print("Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
                epoch, batch_id * len(data), len(train_data_loader.dataset),
                100. * batch_id / len(train_data_loader), loss.item()))
    
    model.eval() #test set
    test_loss = 0 #0 out losses
    correct = 0
    with torch.no_grad(): #disable gradient calculation
        for data, target in test_data_loader: #for each batch
            data, target = data.to(device), target.to(device)
            output = model(data) #forward pass
            test_loss += F.nll_loss(output, target, reduction="sum").item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_data_loader.dataset)

    print("\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
        test_loss, correct, len(test_data_loader.dataset),
        100. * correct / len(test_data_loader.dataset)))

Train Epoch: 1

Test set: Average loss: 0.1527, Accuracy: 9550/10000 (96%)

Train Epoch: 2

Test set: Average loss: 0.0743, Accuracy: 9762/10000 (98%)

Train Epoch: 3

Test set: Average loss: 0.0541, Accuracy: 9828/10000 (98%)

Train Epoch: 4

Test set: Average loss: 0.0472, Accuracy: 9836/10000 (98%)

Train Epoch: 5

Test set: Average loss: 0.0386, Accuracy: 9867/10000 (99%)

Train Epoch: 6

Test set: Average loss: 0.0359, Accuracy: 9875/10000 (99%)

Train Epoch: 7

Test set: Average loss: 0.0330, Accuracy: 9885/10000 (99%)

Train Epoch: 8

Test set: Average loss: 0.0334, Accuracy: 9872/10000 (99%)

Train Epoch: 9

Test set: Average loss: 0.0314, Accuracy: 9885/10000 (99%)

Train Epoch: 10

Test set: Average loss: 0.0305, Accuracy: 9899/10000 (99%)

