In [1]:
import torch
import numpy as np
from tqdm.notebook import tqdm
import torchvision

torch.manual_seed(42)

<torch._C.Generator at 0x1a47be50670>

In [2]:
scaled_mean= 0.13062754273414612 
scaled_std= 0.30810779333114624

In [3]:
train_dataset =  torchvision.datasets.MNIST('./data/files/', train=True, transform=torchvision.transforms.Compose([
                                                                                       torchvision.transforms.ToTensor(),
                                                                                       torchvision.transforms.Normalize(
                                                                                         (scaled_mean,), (scaled_std,))
                                                                                     ]))
test_dataset =   torchvision.datasets.MNIST('./data/files/', train=False, transform=torchvision.transforms.Compose([
                                                                                       torchvision.transforms.ToTensor(),
                                                                                       torchvision.transforms.Normalize(
                                                                                         (scaled_mean,), (scaled_std,))
                                                                                     ]))



In [4]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)

test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=64, shuffle=True)

In [5]:
print(f"Size of the train set: {len(train_loader.dataset)}, size of the test set: {len(test_loader.dataset)}")

Size of the train set: 60000, size of the test set: 10000


In [6]:
import matplotlib.pyplot as plt
plt.imshow(train_dataset[0][0].numpy().squeeze(), cmap='gray_r');

In [7]:
train_dataset[0][0].shape

torch.Size([1, 28, 28])

In [8]:
train_dataset[0][1]

5

In [9]:
import torch.nn as nn
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.linear1 = nn.Linear(28*28, 256) 
        self.linear2 = nn.Linear(256, 100) 
        self.final = nn.Linear(100, 10)
        self.relu = nn.ReLU()

    def forward(self, img): #convert + flatten
        x = img.view(-1, 28*28)
        x = self.relu(self.linear1(x))
        x = self.relu(self.linear2(x))
        x = self.final(x)
        return x


In [10]:
def train(model, dataloader, criterion, optimizer):
    for x_batch, y_batch in dataloader:
        y_pred = model(x_batch)
        
        loss = criterion(y_pred, y_batch)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [11]:
import torch.nn.functional as F

def test(model, dataloader):
    correct = 0.0
    total = 0.0

    for x_batch, y_batch in dataloader:
        with torch.no_grad():
            preds = model(x_batch)           # output of linear
            probs = F.softmax(preds, dim=1) # probability distribution
            preds = probs.argmax(dim=1)      # most probable class (for each sample in the batch)

            correct += (preds == y_batch).sum()
            total += len(preds)
            
    return correct / total # accuracy

## Constant Learning Rate

In [12]:
model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
for epoch in range(20):
    train(model, train_loader, torch.nn.CrossEntropyLoss(), optimizer)
    acc = test(model, test_loader)
    
    print(f'Epoch {epoch}: test accuracy {acc:.4}')

Epoch 0: test accuracy 0.9017
Epoch 1: test accuracy 0.9236
Epoch 2: test accuracy 0.9345
Epoch 3: test accuracy 0.9414
Epoch 4: test accuracy 0.9506
Epoch 5: test accuracy 0.9534
Epoch 6: test accuracy 0.9598
Epoch 7: test accuracy 0.9626
Epoch 8: test accuracy 0.9643
Epoch 9: test accuracy 0.9679
Epoch 10: test accuracy 0.969
Epoch 11: test accuracy 0.9693
Epoch 12: test accuracy 0.969
Epoch 13: test accuracy 0.9723
Epoch 14: test accuracy 0.9729
Epoch 15: test accuracy 0.9735
Epoch 16: test accuracy 0.9741
Epoch 17: test accuracy 0.9755
Epoch 18: test accuracy 0.9754
Epoch 19: test accuracy 0.9749


## Increasing Learning Rate

In [16]:
from torch.optim.lr_scheduler import StepLR

model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=5, gamma=2)

for epoch in range(20):
    train(model, train_loader, torch.nn.CrossEntropyLoss(), optimizer)
    acc = test(model, test_loader)
    
    scheduler.step()
    
    print(f'Epoch {epoch}: test accuracy {acc:.4}')

Epoch 0: test accuracy 0.9045
Epoch 1: test accuracy 0.9246
Epoch 2: test accuracy 0.9354
Epoch 3: test accuracy 0.9435
Epoch 4: test accuracy 0.9485
Epoch 5: test accuracy 0.9563
Epoch 6: test accuracy 0.9638
Epoch 7: test accuracy 0.9668
Epoch 8: test accuracy 0.9706
Epoch 9: test accuracy 0.9709
Epoch 10: test accuracy 0.9732
Epoch 11: test accuracy 0.97
Epoch 12: test accuracy 0.9708
Epoch 13: test accuracy 0.977
Epoch 14: test accuracy 0.978
Epoch 15: test accuracy 0.9722
Epoch 16: test accuracy 0.9702
Epoch 17: test accuracy 0.9746
Epoch 18: test accuracy 0.9804
Epoch 19: test accuracy 0.9809


## Decaying Learning Rate

In [20]:
from torch.optim.lr_scheduler import StepLR

model = Net()
optimizer = torch.optim.SGD(model.parameters(), lr=0.1)
scheduler = StepLR(optimizer, step_size=5, gamma=1/2)

for epoch in range(20):
    train(model, train_loader, torch.nn.CrossEntropyLoss(), optimizer)
    acc = test(model, test_loader)
    
    scheduler.step()
    
    print(f'Epoch {epoch}: test accuracy {acc:.4}')

Epoch 0: test accuracy 0.9628
Epoch 1: test accuracy 0.9706
Epoch 2: test accuracy 0.9671
Epoch 3: test accuracy 0.9765
Epoch 4: test accuracy 0.9726
Epoch 5: test accuracy 0.9827
Epoch 6: test accuracy 0.9824
Epoch 7: test accuracy 0.983
Epoch 8: test accuracy 0.9819
Epoch 9: test accuracy 0.9834
Epoch 10: test accuracy 0.9826
Epoch 11: test accuracy 0.9827
Epoch 12: test accuracy 0.9826
Epoch 13: test accuracy 0.9822
Epoch 14: test accuracy 0.9827
Epoch 15: test accuracy 0.9826
Epoch 16: test accuracy 0.9826
Epoch 17: test accuracy 0.9831
Epoch 18: test accuracy 0.9832
Epoch 19: test accuracy 0.9828


In [None]:
Epoch 10: test accuracy 0.9817

In [17]:
# Just to check how the scheduler will work
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
scheduler = StepLR(optimizer, step_size=5, gamma=2)
for epoch in range(1, 21):
    scheduler.step()
    print('Epoch-{0} lr: {1}'.format(epoch, optimizer.param_groups[0]['lr']))


Epoch-1 lr: 0.01
Epoch-2 lr: 0.01
Epoch-3 lr: 0.01
Epoch-4 lr: 0.01
Epoch-5 lr: 0.02
Epoch-6 lr: 0.02
Epoch-7 lr: 0.02
Epoch-8 lr: 0.02
Epoch-9 lr: 0.02
Epoch-10 lr: 0.04
Epoch-11 lr: 0.04
Epoch-12 lr: 0.04
Epoch-13 lr: 0.04
Epoch-14 lr: 0.04
Epoch-15 lr: 0.08
Epoch-16 lr: 0.08
Epoch-17 lr: 0.08
Epoch-18 lr: 0.08
Epoch-19 lr: 0.08
Epoch-20 lr: 0.16
