In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torchvision import transforms
from session_6 import Model4, train, create_mnist_data_loaders


In [2]:
_ = torch.manual_seed(1)

In [3]:
model = Model4()
model.summarize()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
            Conv2d-4            [-1, 8, 28, 28]             584
              ReLU-5            [-1, 8, 28, 28]               0
       BatchNorm2d-6            [-1, 8, 28, 28]              16
         MaxPool2d-7            [-1, 8, 14, 14]               0
           Dropout-8            [-1, 8, 14, 14]               0
            Conv2d-9           [-1, 12, 14, 14]             876
             ReLU-10           [-1, 12, 14, 14]               0
      BatchNorm2d-11           [-1, 12, 14, 14]              24
           Conv2d-12           [-1, 12, 14, 14]           1,308
             ReLU-13           [-1, 12, 14, 14]               0
      BatchNorm2d-14           [-1, 12,

In [4]:
epochs = 15
batch_size=128
loss_fn = F.nll_loss
optimizer = optim.Adam(model.parameters(), lr=0.01)
scheduler = lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.1)
data_path = "../data"
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.RandomAffine(
        degrees=(-10.0, 10.0),
        translate=(0.1, 0.1),
        fill=0,
    ),
    transforms.Normalize((0.1307,), (0.3081,))
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [5]:
train_loader, test_loader = create_mnist_data_loaders(batch_size, data_path, train_transform=train_transform, test_transform=test_transform)

In [6]:
train(model, epochs, train_loader, test_loader, loss_fn, optimizer, scheduler=scheduler)

epoch=01 loss=0.2531 batch_id=0468 accuracy=87.85%: 100%|██████████| 469/469 [00:13<00:00, 34.40it/s]


Test set: Average loss: 0.0880, Accuracy: 9738/10000 (97.38%)



epoch=02 loss=0.1234 batch_id=0468 accuracy=92.99%: 100%|██████████| 469/469 [00:09<00:00, 50.27it/s]


Test set: Average loss: 0.0452, Accuracy: 9863/10000 (98.63%)



epoch=03 loss=0.1608 batch_id=0468 accuracy=93.58%: 100%|██████████| 469/469 [00:09<00:00, 49.99it/s]


Test set: Average loss: 0.0587, Accuracy: 9821/10000 (98.21%)



epoch=04 loss=0.1358 batch_id=0468 accuracy=93.99%: 100%|██████████| 469/469 [00:09<00:00, 50.56it/s]


Test set: Average loss: 0.0341, Accuracy: 9891/10000 (98.91%)



epoch=05 loss=0.2339 batch_id=0468 accuracy=94.04%: 100%|██████████| 469/469 [00:09<00:00, 51.72it/s]


Test set: Average loss: 0.0298, Accuracy: 9905/10000 (99.05%)



epoch=06 loss=0.1302 batch_id=0468 accuracy=94.38%: 100%|██████████| 469/469 [00:09<00:00, 51.75it/s]


Test set: Average loss: 0.0277, Accuracy: 9924/10000 (99.24%)



epoch=07 loss=0.2253 batch_id=0468 accuracy=94.20%: 100%|██████████| 469/469 [00:09<00:00, 50.10it/s]


Test set: Average loss: 0.0259, Accuracy: 9920/10000 (99.20%)



epoch=08 loss=0.1130 batch_id=0468 accuracy=94.44%: 100%|██████████| 469/469 [00:09<00:00, 50.62it/s]


Test set: Average loss: 0.0397, Accuracy: 9877/10000 (98.77%)



epoch=09 loss=0.1194 batch_id=0468 accuracy=94.41%: 100%|██████████| 469/469 [00:09<00:00, 51.47it/s]


Test set: Average loss: 0.0263, Accuracy: 9923/10000 (99.23%)



epoch=10 loss=0.1101 batch_id=0468 accuracy=94.56%: 100%|██████████| 469/469 [00:09<00:00, 51.59it/s]


Test set: Average loss: 0.0292, Accuracy: 9906/10000 (99.06%)



epoch=11 loss=0.1033 batch_id=0468 accuracy=94.85%: 100%|██████████| 469/469 [00:09<00:00, 52.04it/s]


Test set: Average loss: 0.0194, Accuracy: 9942/10000 (99.42%)



epoch=12 loss=0.1362 batch_id=0468 accuracy=95.03%: 100%|██████████| 469/469 [00:09<00:00, 51.87it/s]


Test set: Average loss: 0.0186, Accuracy: 9948/10000 (99.48%)



epoch=13 loss=0.1360 batch_id=0468 accuracy=95.09%: 100%|██████████| 469/469 [00:09<00:00, 52.00it/s]


Test set: Average loss: 0.0182, Accuracy: 9947/10000 (99.47%)



epoch=14 loss=0.0578 batch_id=0468 accuracy=94.87%: 100%|██████████| 469/469 [00:08<00:00, 52.12it/s]


Test set: Average loss: 0.0182, Accuracy: 9944/10000 (99.44%)



epoch=15 loss=0.1090 batch_id=0468 accuracy=95.01%: 100%|██████████| 469/469 [00:09<00:00, 52.04it/s]


Test set: Average loss: 0.0185, Accuracy: 9947/10000 (99.47%)



**Target**

Target was to take the previous model, and improve the test accuracy to be above 99.4% in the last few epochs.

**Result**

The base model was taken from [`Model3`](../src/session_6/model_3.py). No other changes were made to the model. During training, StepLR was applied with step size as 10 and gamma as 0.1, same as with model 3. random rotation of -10 to 10 degrees and random translation of 10% was applied to the training data.

Parameter Count: 6174

Train Accuracy: 95.01%

Test Accuracy: 99.47%

**Analysis**

Augmentations helped the model to get consistent 99.4%+ in the last 5 epochs. The gap between train and test accuracy was increased a tiny bit, but within acceptable ranges.