## Training MNIST on a vanilla Feedforward NN using PyTorch

In [1]:
import torch
import torchvision
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms
import torch.optim as optim

#### Building Network Architecture

In [2]:
class NN(nn.Module):
    
    def __init__(self):
        super(NN, self).__init__()
        self.l1 = nn.Linear(28*28, 256)
        self.l2 = nn.Linear(256, 64)
        self.lo = nn.Linear(64,10)
        
    def forward(self, x):
        x = F.relu(self.l1(x))
        x = F.relu(self.l2(x))
        x = F.softmax(self.lo(x), dim=1)
        return x

In [3]:
model = NN()

In [4]:
model

NN(
  (l1): Linear(in_features=784, out_features=256, bias=True)
  (l2): Linear(in_features=256, out_features=64, bias=True)
  (lo): Linear(in_features=64, out_features=10, bias=True)
)

### Dataset prep

#### Defining Transformation function

In [5]:
class flatten:
    def __init__(self, size):
        self.size = size
        
    def __call__(self, x):
        flattened = x.view(self.size)
        return flattened

In [6]:
trans_func = transforms.Compose([transforms.ToTensor(), flatten(784)])

In [7]:
dataset = torchvision.datasets.mnist.MNIST

In [8]:
traindata = dataset(root='./data', train=True, download=True, transform=trans_func)
testdata = dataset(root='./data', train=False, download=True, transform=trans_func)

In [9]:
trainloader = torch.utils.data.DataLoader(traindata, shuffle=True, batch_size=128)
testloader = torch.utils.data.DataLoader(testdata, shuffle=True, batch_size=128)

#### Defining Loss function

In [10]:
lr_ = 0.1
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr_, momentum=0.7)

#### Training the model

In [11]:
for epoch in range(1, 15):
    for idx, data in enumerate(trainloader):
        optimizer.zero_grad()
        inputs, labels = data
        outputs = model(inputs)
        loss = criterion(outputs, labels) #Computing loss
        loss.backward() #Backpropagation
        optimizer.step() #Weights update step
        
        if idx % 100 == 0:
            print(f'Epoch:{epoch}. idx:{idx}. Loss: {loss.item()}')
    if epoch %3 == 0:
        lr_ /= 5
        optimizer = optim.SGD(model.parameters(), lr=lr_, momentum=0.9)

Epoch:1. idx:0. Loss: 2.3025214672088623
Epoch:1. idx:100. Loss: 2.2672176361083984
Epoch:1. idx:200. Loss: 1.8878673315048218
Epoch:1. idx:300. Loss: 1.6922639608383179
Epoch:1. idx:400. Loss: 1.6322304010391235
Epoch:2. idx:0. Loss: 1.6018824577331543
Epoch:2. idx:100. Loss: 1.6078972816467285
Epoch:2. idx:200. Loss: 1.6434779167175293
Epoch:2. idx:300. Loss: 1.6004382371902466
Epoch:2. idx:400. Loss: 1.6082031726837158
Epoch:3. idx:0. Loss: 1.5440254211425781
Epoch:3. idx:100. Loss: 1.5227000713348389
Epoch:3. idx:200. Loss: 1.57520592212677
Epoch:3. idx:300. Loss: 1.5588634014129639
Epoch:3. idx:400. Loss: 1.5768158435821533
Epoch:4. idx:0. Loss: 1.5310235023498535
Epoch:4. idx:100. Loss: 1.5197468996047974
Epoch:4. idx:200. Loss: 1.5268651247024536
Epoch:4. idx:300. Loss: 1.521525263786316
Epoch:4. idx:400. Loss: 1.4969745874404907
Epoch:5. idx:0. Loss: 1.5312050580978394
Epoch:5. idx:100. Loss: 1.5276157855987549
Epoch:5. idx:200. Loss: 1.5090924501419067
Epoch:5. idx:300. Loss: 

In [12]:
correct = 0
total = 0
for idx, data in enumerate(testloader):
    inputs, labels = data
    predicted = model(inputs)
    predicted_prob, predicted_label = torch.max(predicted, 1)
    correct += (predicted_label == labels).sum().item()
    total += labels.size(0)

print(f'Test Accuracy of the model: {100*correct/total}%')

Test Accuracy of the model: 94.63%
