# MNIST Efficient Implementation

In [36]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.datasets import MNIST
from torchvision import transforms

In [9]:
train_dataset = MNIST(root="../datasets", train=True, download=True, transform=transforms.ToTensor())
test_dataset = MNIST(root="../datasets", train=False, download=False, transform=transforms.ToTensor())

In [22]:
# parameters
DEVICE = ("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS=10
BATCH_SIZE=32


NUM_LABELS = 10
NUM_FEATURES = 28*28
HIDDEN_SIZE_1 = 100
HIDDEN_SIZE_2 = 50
ALPHA = 0.01

In [11]:
print(DEVICE)

cuda:0


In [12]:
train_dataloader = DataLoader(dataset=train_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=4)

test_dataloader = DataLoader(dataset=test_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              drop_last=True,
                              num_workers=4)

In [30]:
class Model(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
                nn.Linear(NUM_FEATURES, HIDDEN_SIZE_1),
                nn.Sigmoid(),
                nn.Linear(HIDDEN_SIZE_1, HIDDEN_SIZE_2),
                nn.Sigmoid(),
                nn.Linear(HIDDEN_SIZE_2, NUM_LABELS),
                nn.LogSoftmax()
            )
    
    def forward(self, X):
        return self.layers(X)

In [41]:
model = Model().to(DEVICE)
criterion = nn.NLLLoss()
optimizer = optim.SGD(model.parameters(), lr=ALPHA)

In [47]:
for epoch in range(NUM_EPOCHS):
    loss_sum = 0
    batch_nums = 0
    for batch_idx, (features, labels) in enumerate(train_dataloader):
        # reshape features and move to gpu
        features = features.view(-1, NUM_FEATURES).to(DEVICE)
        # move label to GPU
        labels = labels.to(DEVICE)
                
        # ------ FORWARD PASS --------
        # first linear transformation
        probs = model(features)

        # ------CALCULATE LOSS --------
        #cross-entropy loss
        loss = criterion(probs, labels)

        # ------BACKPROPAGATION --------
        loss.backward()

        # ------GRADIENT DESCENT --------
        optimizer.step()

        # ------CLEAR GRADIENTS --------
        optimizer.zero_grad()
        
        # ------TRACK LOSS --------
        batch_nums += 1
        loss_sum += loss.detach().cpu()
    
    print(f'Epoch: {epoch+1} Loss: {loss_sum / batch_nums}')

Epoch: 1 Loss: 2.297351121902466
Epoch: 2 Loss: 2.2781248092651367
Epoch: 3 Loss: 2.213277578353882
Epoch: 4 Loss: 1.9112988710403442
Epoch: 5 Loss: 1.434522032737732
Epoch: 6 Loss: 1.104828119277954
Epoch: 7 Loss: 0.8919216990470886
Epoch: 8 Loss: 0.7575297951698303
Epoch: 9 Loss: 0.6585543155670166
Epoch: 10 Loss: 0.584967851638794


In [48]:
# test acccuracy
num_samples = 0
num_correct = 0
for batch_idx, (features, labels) in enumerate(test_dataloader):
    with torch.inference_mode():
        features = features.view(-1, NUM_FEATURES).to(DEVICE)
        labels = labels.to(DEVICE) 
        # ------ FORWARD PASS --------
        # first linear transformation
        probs = model(features)
        
        predictions = probs.argmax(dim=1)
        num_samples+=len(features)
        num_correct+=(labels == predictions).sum().detach().cpu().item()
        
accuracy = num_correct / num_samples
print(accuracy)

0.8500600961538461
