In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# the input and output dimensions are specific to the MNIST dataset
# hidden size arbitrary - can tune this
input_size = 28 * 28 # input size of image
hidden_size = 512
output_size = 10 # number of MNIST classes


class NeuralNetwork(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNetwork, self).__init__()
        self.flatten = nn.Flatten() # turn input into 2D tensor
        self.layer1 = nn.Linear(input_size, hidden_size)  # first linear layer
        self.relu = nn.ReLU() # ReLU activation function
        self.dropout = nn.Dropout(0.5)  # dropout 50% of network
        self.layer2 = nn.Linear(hidden_size, output_size)  # second linear layer

    def forward(self, x):
        x = self.flatten(x)
        x = self.layer1(x)
        x = self.relu(x)
        x = self.dropout(x)  # Apply dropout
        x = self.layer2(x)
        return x

In [3]:
model = NeuralNetwork(input_size, hidden_size, output_size)
# common loss function for classification
criterion = nn.CrossEntropyLoss() # essentially returns mean loss per sample in batch
# stochastic gradient descent
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9) # learning rate and momentum can be tuned

In [4]:
# convert images to tensors and normalize
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

# training set
train = datasets.MNIST(root='./data', train = True, download = True, transform = transform)
# testing set
test = datasets.MNIST(root='./data', train = False, download = True, transform = transform)

# batch size is number of images to process at once
batch_size = 64
train_loader = DataLoader(train, batch_size = batch_size, shuffle=True)
test_loader = DataLoader(test, batch_size = batch_size, shuffle=False)


In [7]:
# training
for epoch in range(5): # num of iterations over training set
    model.train()
    cum_loss = 0
    num_samples = 0
    for input, target in train_loader:
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward pass
        output = model(input)
        loss = criterion(output, target)

        # backward pass + updates
        loss.backward()
        optimizer.step()

        # keep track of cumulative loss and num samples from batches
        cum_loss += loss.item()
        num_samples += input.size(0)

    avg_loss = cum_loss / num_samples
    print(f"Epoch: {epoch}, Avg Loss: {avg_loss}")

    # validation after each epoch
    model.eval()
    total, correct = 0, 0
    with torch.no_grad():
        for input, target in test_loader:
            output = model(input)
            _, predicted = torch.max(output.data, 1) # obtain max for class
            total += target.size(0) # add batch size
            correct += (predicted == target).sum().item() # add number of correct predictions

    accuracy = correct / total 
    print(f"Epoch: {epoch}, Accuracy: {accuracy}")

Epoch: 0, Avg Loss: 0.003855354121762017
Epoch: 0, Accuracy: 0.9518
Epoch: 1, Avg Loss: 0.0029968325522417823
Epoch: 1, Accuracy: 0.9579
Epoch: 2, Avg Loss: 0.0025862598524428906
Epoch: 2, Accuracy: 0.9657
Epoch: 3, Avg Loss: 0.002258502740599215
Epoch: 3, Accuracy: 0.9709
Epoch: 4, Avg Loss: 0.0020669390969599286
Epoch: 4, Accuracy: 0.9698
Epoch: 5, Avg Loss: 0.0019490503437196215
Epoch: 5, Accuracy: 0.9739
Epoch: 6, Avg Loss: 0.0017481244168166693
Epoch: 6, Accuracy: 0.9737
Epoch: 7, Avg Loss: 0.0016325693994139632
Epoch: 7, Accuracy: 0.977
Epoch: 8, Avg Loss: 0.0015399657093376543
Epoch: 8, Accuracy: 0.9764
Epoch: 9, Avg Loss: 0.0014834081544385601
Epoch: 9, Accuracy: 0.9786
