We'll start by selecting a device:

In [1]:
import torch
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

Next, we'll load and flatten the dataset:

In [2]:
from torchvision import datasets
from torchvision.transforms import ToTensor, Lambda, Compose

train_data = datasets.MNIST(
    root='data',
    train=True,
    transform=Compose(
        [ToTensor(),
         Lambda(lambda x: torch.flatten(x))]),
    download=True,
)
validation_data = datasets.MNIST(
    root='data',
    train=False,
    transform=Compose(
        [ToTensor(),
         Lambda(lambda x: torch.flatten(x))]),
)

Next, we'll create the data loaders:

In [3]:
from torch.utils.data import DataLoader

train_loader = DataLoader(
    train_data,
    batch_size=100,
    shuffle=True)

validation_loader = DataLoader(
    validation_data,
    batch_size=100,
    shuffle=True)

Then, we'll define the NN:

In [4]:
import torch

torch.manual_seed(1234)

hidden_units = 100
classes = 10

net = torch.nn.Sequential(
    torch.nn.Linear(28 * 28, hidden_units),
    torch.nn.BatchNorm1d(hidden_units),
    torch.nn.ReLU(),
    torch.nn.Linear(hidden_units, classes),
)

Next, we'll implement the `train_model` function:

In [5]:
def train_model(model, cost_function, optimizer, data_loader):
    # set model to training mode
    model.train()

    current_loss = 0.0
    current_acc = 0

    # iterate over the training data
    for i, (inputs, labels) in enumerate(data_loader):
        # send the input/labels to the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # zero the parameter gradients
        optimizer.zero_grad()

        with torch.set_grad_enabled(True):
            # forward
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = cost_function(outputs, labels)

            # backward
            loss.backward()
            optimizer.step()

        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)

    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)

    print('Train Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))

Then, we'll implement the `test_model` function:

In [6]:
def test_model(model, cost_function, data_loader):
    # set model in evaluation mode
    model.eval()

    current_loss = 0.0
    current_acc = 0

    # iterate over  the validation data
    for i, (inputs, labels) in enumerate(data_loader):
        # send the input/labels to the GPU
        inputs = inputs.to(device)
        labels = labels.to(device)

        # forward
        with torch.set_grad_enabled(False):
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            loss = cost_function(outputs, labels)

        # statistics
        current_loss += loss.item() * inputs.size(0)
        current_acc += torch.sum(predictions == labels.data)

    total_loss = current_loss / len(data_loader.dataset)
    total_acc = current_acc.double() / len(data_loader.dataset)

    print('Test Loss: {:.4f}; Accuracy: {:.4f}'.format(total_loss, total_acc))

    return total_loss, total_acc

Next, let's define the optimizer:

In [7]:
cost_func = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters())

We'll train for 20 epochs:

In [8]:
epochs = 20
for epoch in range(epochs):
    print('Epoch {}/{}'.format(epoch + 1, epochs))
    train_model(net, cost_func, optimizer, train_loader)

Epoch 1/20
Train Loss: 0.3272; Accuracy: 0.9175
Epoch 2/20
Train Loss: 0.1421; Accuracy: 0.9604
Epoch 3/20
Train Loss: 0.0999; Accuracy: 0.9721
Epoch 4/20
Train Loss: 0.0760; Accuracy: 0.9790
Epoch 5/20
Train Loss: 0.0611; Accuracy: 0.9828
Epoch 6/20
Train Loss: 0.0495; Accuracy: 0.9863
Epoch 7/20
Train Loss: 0.0422; Accuracy: 0.9879
Epoch 8/20
Train Loss: 0.0358; Accuracy: 0.9898
Epoch 9/20
Train Loss: 0.0309; Accuracy: 0.9909
Epoch 10/20
Train Loss: 0.0262; Accuracy: 0.9929
Epoch 11/20
Train Loss: 0.0228; Accuracy: 0.9936
Epoch 12/20
Train Loss: 0.0201; Accuracy: 0.9948
Epoch 13/20
Train Loss: 0.0182; Accuracy: 0.9950
Epoch 14/20
Train Loss: 0.0174; Accuracy: 0.9952
Epoch 15/20
Train Loss: 0.0160; Accuracy: 0.9954
Epoch 16/20
Train Loss: 0.0131; Accuracy: 0.9967
Epoch 17/20
Train Loss: 0.0121; Accuracy: 0.9968
Epoch 18/20
Train Loss: 0.0113; Accuracy: 0.9968
Epoch 19/20
Train Loss: 0.0107; Accuracy: 0.9971
Epoch 20/20
Train Loss: 0.0101; Accuracy: 0.9974


Finally, we'll run the evaluation:

In [9]:
test_model(net, cost_func, validation_loader)

Test Loss: 0.0886; Accuracy: 0.9772


(0.08855911538470536, tensor(0.9772, dtype=torch.float64))