In [1]:
import sys
import torch
from torch.utils.data import DataLoader
from torch import nn
import torch.nn.functional as F

import torchvision.datasets as datasets
from torchvision.transforms import ToTensor

mnist_train = datasets.MNIST(root='./data', download=True, train=True, transform=ToTensor())
mnist_test  = datasets.MNIST(root='./data', download=True, train=False, transform=ToTensor())

train_dataloader = DataLoader(mnist_train, batch_size=32, shuffle=True)
test_dataloader  = DataLoader(mnist_test,  batch_size=32, shuffle=True)

model = nn.Sequential(
    nn.Linear(784, 100),
    nn.ReLU(),
    nn.Linear(100, 10)
)

# Switch the loss to CrossEntropyLoss (multi-class classification)
loss_fn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

for i in range(0, 10):
    loss_sum = 0
    for X, y in train_dataloader:
        X = X.reshape((-1, 784))
        # Keep labels as one-hot vectors with 10 classes (float32)
        y = F.one_hot(y, num_classes=10).type(torch.float32)

        optimizer.zero_grad()
        outputs = model(X)
        loss = loss_fn(outputs, y)
        loss.backward()
        optimizer.step()

        loss_sum += loss.item()

    print(loss_sum)

model.eval()
with torch.no_grad():
    accurate = 0
    total = 0
    for X, y in test_dataloader:
        X = X.reshape((-1, 784))
        # Use softmax over dim=1 on model outputs
        outputs = nn.functional.softmax(model(X), dim=1)
        # Take argmax to get predicted class and compare with y
        correct_pred = (y == outputs.max(dim=1).indices)
        total += correct_pred.size(0)
        accurate += correct_pred.type(torch.int).sum().item()
    print(accurate / total)

551.970934279263
248.82539145089686
173.81057177577168
131.74095066962764
105.97182370134396
83.66998014069395
69.98657952764188
56.78170456463704
48.44633479294134
42.04583683619421
0.9751
