In [None]:
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
from torchvision.datasets import MNIST
import torch
import torch.nn as nn

### device config

### hyper parameters
batch_size = 100
input_size = 784 # 28*28
hidden_size = 100
num_of_classes = 10 # ten digits
learning_rate = 0.01
num_epochs = 2

### mnist
train_dataset = MNIST(root="./data", train=True, transform=transforms.ToTensor(), download=True)
test_dataset = MNIST(root="./data", train=False, transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

### model
class MnistNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_of_classes):
        super(MnistNN, self).__init__()
        self.layer1 = nn.Linear(in_features=input_size, out_features=hidden_size)
        self.relu = nn.ReLU()
        self.layer2 = nn.Linear(in_features=hidden_size, out_features=num_of_classes)
    
    def forward(self, sample):
        out = self.layer1(sample)
        out = self.relu(out)
        out = self.layer2(out)
        # no softmax as we plan to use cross entropy loss which apploies the softmax
        return out

model = MnistNN(input_size=input_size, hidden_size=hidden_size, num_of_classes=num_of_classes)

### loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### training loop
for epoch in range(num_epochs):
    for i, (images, labels) in enumerate(train_loader):
        if i == 0:
            print(f"Shape of the x_train dataset: {images.shape}")
        # output array shape is [100, 1, 28, 28]) i.e. 100 rows with each row for one image
        # reshape to an array from 4d array to 2d array with number of columns being 28*28
        # that means each rwo will contain all pixels of one full image
        images = images.reshape(-1, 28*28)
        labels_predicted = model(images)
        loss = criterion(labels_predicted, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if i%500 == 0:
            print(f"epoch={epoch}, step=(i), loss={loss.item():4f}")

### evaluate
with torch.no_grad():
    n_samples = 0
    n_correct_inferences = 0
    for images, labels in test_loader:
        if n_samples == 0:
            print(f"Shape of the x_test dataset: {images.shape}")
        # output array shape is [100, 1, 28, 28]) i.e. 100 rows with each row for one image
        # reshape to an array from 4d array to 2d array with number of columns being 28*28
        # that means each rwo will contain all pixels of one full image
        images = images.reshape(-1, 28*28)
        labels_predicted = model(images)

        if n_correct_inferences == 0:
            print(f"Shape of the y_test dataset is: {labels_predicted.shape}")
        # ([100, 10])
        # Each prediction is an array of 10 elements, one corresponding to each class
        # torch.max selects the largest of them and reduces it to one dimension only
        _, predictions = torch.max(input=labels_predicted, dim=1)

        n_correct_inferences += (predictions == labels).sum().item()
        n_samples += labels.shape[0]

    accuracy = 100 * n_correct_inferences / n_samples

    print(f"Final accuracy in evaluation is {accuracy:4f}%")

        