In [1]:
import torch
import torch.nn.functional as F
import torch.optim as optim
from torchvision import transforms
from session_6 import Model2, train, create_mnist_data_loaders


In [2]:
_ = torch.manual_seed(1)

In [3]:
model = Model2()
model.summarize()

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1            [-1, 8, 28, 28]              80
              ReLU-2            [-1, 8, 28, 28]               0
       BatchNorm2d-3            [-1, 8, 28, 28]              16
            Conv2d-4            [-1, 8, 28, 28]             584
              ReLU-5            [-1, 8, 28, 28]               0
       BatchNorm2d-6            [-1, 8, 28, 28]              16
         MaxPool2d-7            [-1, 8, 14, 14]               0
           Dropout-8            [-1, 8, 14, 14]               0
            Conv2d-9           [-1, 12, 14, 14]             876
             ReLU-10           [-1, 12, 14, 14]               0
      BatchNorm2d-11           [-1, 12, 14, 14]              24
           Conv2d-12           [-1, 12, 14, 14]           1,308
             ReLU-13           [-1, 12, 14, 14]               0
      BatchNorm2d-14           [-1, 12,

In [4]:
epochs = 15
batch_size=128
loss_fn = F.nll_loss
optimizer = optim.Adam(model.parameters(), lr=0.01)
data_path = "../data"
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

In [5]:
train_loader, test_loader = create_mnist_data_loaders(batch_size, data_path, train_transform=transform, test_transform=transform)

In [6]:
train(model, epochs, train_loader, test_loader, loss_fn, optimizer)

epoch=01 loss=0.2709 batch_id=0468 accuracy=90.57%: 100%|██████████| 469/469 [00:05<00:00, 91.21it/s] 


Test set: Average loss: 0.0682, Accuracy: 9814/10000 (98.14%)



epoch=02 loss=0.1380 batch_id=0468 accuracy=94.18%: 100%|██████████| 469/469 [00:05<00:00, 91.66it/s]


Test set: Average loss: 0.0492, Accuracy: 9853/10000 (98.53%)



epoch=03 loss=0.1142 batch_id=0468 accuracy=94.63%: 100%|██████████| 469/469 [00:04<00:00, 95.07it/s]


Test set: Average loss: 0.0402, Accuracy: 9882/10000 (98.82%)



epoch=04 loss=0.1273 batch_id=0468 accuracy=94.82%: 100%|██████████| 469/469 [00:05<00:00, 92.77it/s]


Test set: Average loss: 0.0358, Accuracy: 9899/10000 (98.99%)



epoch=05 loss=0.1636 batch_id=0468 accuracy=94.83%: 100%|██████████| 469/469 [00:05<00:00, 92.83it/s]


Test set: Average loss: 0.0285, Accuracy: 9913/10000 (99.13%)



epoch=06 loss=0.0826 batch_id=0468 accuracy=95.24%: 100%|██████████| 469/469 [00:04<00:00, 94.12it/s]


Test set: Average loss: 0.0299, Accuracy: 9894/10000 (98.94%)



epoch=07 loss=0.0976 batch_id=0468 accuracy=95.25%: 100%|██████████| 469/469 [00:04<00:00, 95.14it/s]


Test set: Average loss: 0.0242, Accuracy: 9922/10000 (99.22%)



epoch=08 loss=0.0523 batch_id=0468 accuracy=95.25%: 100%|██████████| 469/469 [00:05<00:00, 92.66it/s]


Test set: Average loss: 0.0319, Accuracy: 9905/10000 (99.05%)



epoch=09 loss=0.0249 batch_id=0468 accuracy=95.16%: 100%|██████████| 469/469 [00:05<00:00, 92.52it/s]


Test set: Average loss: 0.0217, Accuracy: 9928/10000 (99.28%)



epoch=10 loss=0.2502 batch_id=0468 accuracy=95.31%: 100%|██████████| 469/469 [00:05<00:00, 91.88it/s]


Test set: Average loss: 0.0245, Accuracy: 9924/10000 (99.24%)



epoch=11 loss=0.0742 batch_id=0468 accuracy=95.43%: 100%|██████████| 469/469 [00:04<00:00, 95.46it/s]


Test set: Average loss: 0.0247, Accuracy: 9927/10000 (99.27%)



epoch=12 loss=0.1375 batch_id=0468 accuracy=95.45%: 100%|██████████| 469/469 [00:04<00:00, 93.85it/s]


Test set: Average loss: 0.0289, Accuracy: 9911/10000 (99.11%)



epoch=13 loss=0.2064 batch_id=0468 accuracy=95.45%: 100%|██████████| 469/469 [00:04<00:00, 94.78it/s]


Test set: Average loss: 0.0193, Accuracy: 9934/10000 (99.34%)



epoch=14 loss=0.1354 batch_id=0468 accuracy=95.50%: 100%|██████████| 469/469 [00:05<00:00, 92.56it/s]


Test set: Average loss: 0.0232, Accuracy: 9931/10000 (99.31%)



epoch=15 loss=0.0939 batch_id=0468 accuracy=95.47%: 100%|██████████| 469/469 [00:05<00:00, 92.76it/s]


Test set: Average loss: 0.0249, Accuracy: 9917/10000 (99.17%)



**Target**

Target was to take the previous model, and minimize train accuracy - test accuracy gap.

**Result**

The base model was taken from [`Model1`](../src/session_6/model_1.py). Probability of drop out in each block is set to 10% (compared to 20% in [`Model1`](../src/session_6/model_1.py))

Parameter Count: 6174

Train Accuracy: 95.47%

Test Accuracy: 99.17%

**Analysis**

Gap between train and test accuracy has reduced a lot, which is good. Overall accuracy still not reached 99.4%, and accuracies are not that stable in the last few epochs. Can try out LR schedulers to reduce the learning rate in the last few epochs, which should stablize the model.