In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
class MLP(nn.Module):
    def __init__(self, input_size: int, hidden_layer: int, output_size: int):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_layer)
        self.fc2 = nn.Linear(hidden_layer, hidden_layer)
        self.fc3 = nn.Linear(hidden_layer, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [5]:
NUM_EPOCHS = 5
BS = 128

if __name__ == "__main__": 
    transform = transforms.ToTensor()
    dataset = datasets.MNIST("data/", train=True, download=True, transform=transform)
    
    train_loader = DataLoader(
        dataset=dataset,
        batch_size=BS,
        shuffle=False,
    )
    test_loader = DataLoader(
        datasets.MNIST("data/", train=False, transform=transform),
        batch_size=64,
        shuffle=True,
    )

    model = MLP(input_size=28*28, hidden_layer=64, output_size=10)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    for epoch in range(NUM_EPOCHS):
        for i, (images, labels) in enumerate(train_loader):
            images = images.view(-1, 28 * 28)
            outputs = model(images)
            loss = criterion(outputs, labels)
            if(i % 99 == 0): 
                print(f'loss at step {i}: { loss}')
            # rescale the loss to be a mean over the global batch size instead of
            optimizer.zero_grad()
            # compute the gradients locally
            loss.backward()
            optimizer.step()
        
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.view(-1, 28 * 28)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        print(
            "Accuracy of the model on the {} test images: {} %".format(
                total, 100 * correct / total
            ),
        )


loss at step 0: 2.2983036041259766
loss at step 99: 0.5501077771186829
loss at step 198: 0.1603282392024994
loss at step 297: 0.2581077814102173
loss at step 396: 0.32321903109550476
loss at step 0: 0.1945410519838333
loss at step 99: 0.33502018451690674
loss at step 198: 0.06694156676530838
loss at step 297: 0.1342238485813141
loss at step 396: 0.20925651490688324
loss at step 0: 0.13462984561920166
loss at step 99: 0.2540038824081421
loss at step 198: 0.0426778569817543
loss at step 297: 0.08440106362104416
loss at step 396: 0.14905934035778046
loss at step 0: 0.10024844855070114
loss at step 99: 0.19364289939403534
loss at step 198: 0.03823189064860344
loss at step 297: 0.06804730743169785
loss at step 396: 0.11012671887874603
loss at step 0: 0.07603234797716141
loss at step 99: 0.14328692853450775
loss at step 198: 0.03689737617969513
loss at step 297: 0.052505794912576675
loss at step 396: 0.09012936800718307
Accuracy of the model on the 10000 test images: 96.47 %


In [None]:
%matplotlib inline



In [None]:
#Relu: 95.97% GeLu: 96.08%  Tanh + gelu 96.55%  tanh 96.67%