# MNIST Experiments

In [3]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision as tv
import sys
import torch.nn as nn
import torch.nn.functional as F

sys.path.append('../../')
sys.path.append('../../src/')

import src.general as general
import src.interfaces.dataset_models as data
import src.metrics as metrics
import src.evaluation as eval
import src.plot as plot
import src.compression.distillation as distill
import src.compression.pruning as prune
import src.compression.quantization as quant


Files already downloaded and verified
Files already downloaded and verified


Found cached dataset imagenet-1k (/workspace/volume/cache/imagenet-1k/default-212aff79ee65f848/1.0.0/a1e9bfc56c3a7350165007d1176b15e9128fcaf9ab972147840529aed3ae52bc)


  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
dataset = data.supported_datasets["MNIST"]

## ResNet

In [4]:
resnet = tv.models.resnet.resnet50(pretrained=True)



In [5]:
resnet.fc = torch.nn.Linear(2048, 10)

In [34]:
resnet.conv1 = torch.nn.Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)

In [46]:
general.finetune(resnet, dataset, 99, save_path="/workspace/volume/models/resnet50_mnist.pt", patience=1)

Validate: 100%|██████████| 94/94 [00:01<00:00, 54.95it/s]


Test loss: 0.0654
Test score: 97.9222


Train: 100%|██████████| 1688/1688 [00:51<00:00, 32.79it/s]


Train loss: 0.0632
Train score: 98.1135


Validate: 100%|██████████| 94/94 [00:01<00:00, 58.29it/s]


Test loss: 0.0478
Test score: 98.6370


Train: 100%|██████████| 1688/1688 [00:51<00:00, 32.52it/s]


Train loss: 0.0469
Train score: 98.6263


Validate: 100%|██████████| 94/94 [00:01<00:00, 58.16it/s]


Test loss: 0.0446
Test score: 98.7367


Train: 100%|██████████| 1688/1688 [00:52<00:00, 32.25it/s]


Train loss: 0.0339
Train score: 99.0096


Validate: 100%|██████████| 94/94 [00:01<00:00, 57.98it/s]


Test loss: 0.0391
Test score: 98.9029


Train: 100%|██████████| 1688/1688 [00:52<00:00, 32.23it/s]


Train loss: 0.0258
Train score: 99.2391


Validate: 100%|██████████| 94/94 [00:01<00:00, 57.80it/s]


Test loss: 0.0374
Test score: 99.0525
Finetuning stopped due to reaching the target score
Finetuning finished after 4 iterations
Best score: 99.0525


ResNet(
  (conv1): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

## VGG

In [5]:
# Load the pre-trained VGG16 model
vgg = tv.models.vgg16(pretrained=True)

# Modify the first convolutional layer to accept single-channel input
vgg.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))

# Set the final classifier layer output to 10 classes for the MNIST dataset
vgg.classifier[6] = nn.Linear(in_features=4096, out_features=10, bias=True)




In [6]:
# Change data transform to make it work
data_transforms = tv.transforms.Compose([
    tv.transforms.Resize((224, 224)),
    tv.transforms.ToTensor(),
    tv.transforms.Normalize((0.1307,), (0.3081,))
])

dataset.set_transforms(data_transforms)

In [7]:
general.finetune(vgg, dataset, 99, save_path="/workspace/volume/models/mnist/vgg16_mnist.pt", patience=1)

Validate: 100%|██████████| 94/94 [00:12<00:00,  7.24it/s]


Test loss: 2.2927
Test score: 9.5246


Train: 100%|██████████| 844/844 [04:51<00:00,  2.90it/s]


Train loss: 0.2909
Train score: 90.6083


Validate: 100%|██████████| 94/94 [00:11<00:00,  7.92it/s]


Test loss: 0.0446
Test score: 98.5926


Train: 100%|██████████| 844/844 [04:52<00:00,  2.89it/s]


Train loss: 0.0568
Train score: 98.2098


Validate: 100%|██████████| 94/94 [00:12<00:00,  7.81it/s]


Test loss: 0.0296
Test score: 99.0304
Finetuning stopped due to reaching the target score
Finetuning finished after 2 iterations
Best score: 99.0304


VGG(
  (features): Sequential(
    (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

## EfficientNet

In [4]:
efficientnet = tv.models.efficientnet_b7(pretrained=True)



In [5]:
efficientnet.features[0][0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)

In [6]:
efficientnet.classifier[1] = torch.nn.Linear(2560, 10)

In [19]:
general.finetune(efficientnet, dataset, 99, save_path="/workspace/volume/models/efficientnet_mnist.pt", patience=1)

Validate: 100%|██████████| 94/94 [00:02<00:00, 31.57it/s]


Test loss: 0.0946
Test score: 97.1520


Train: 100%|██████████| 422/422 [00:50<00:00,  8.38it/s]


Train loss: 0.1277
Train score: 96.3305


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.85it/s]


Test loss: 0.0862
Test score: 97.4180


Train: 100%|██████████| 422/422 [00:49<00:00,  8.51it/s]


Train loss: 0.1180
Train score: 96.6060


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.92it/s]


Test loss: 0.0810
Test score: 97.4679


Train: 100%|██████████| 422/422 [00:49<00:00,  8.49it/s]


Train loss: 0.1113
Train score: 96.8147


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.87it/s]


Test loss: 0.0771
Test score: 97.7504


Train: 100%|██████████| 422/422 [00:49<00:00,  8.49it/s]


Train loss: 0.1033
Train score: 96.9879


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.86it/s]


Test loss: 0.0736
Test score: 97.8834


Train: 100%|██████████| 422/422 [00:49<00:00,  8.51it/s]


Train loss: 0.1002
Train score: 97.0850


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.82it/s]


Test loss: 0.0700
Test score: 97.9721


Train: 100%|██████████| 422/422 [00:49<00:00,  8.48it/s]


Train loss: 0.0926
Train score: 97.2714


Validate: 100%|██████████| 94/94 [00:02<00:00, 32.85it/s]

Test loss: 0.0677
Test score: 97.9721
Finetuning stopped due to early stopping with patience = 1
Finetuning finished after 6 iterations
Best score: 97.9721





EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(1, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=64, bias=False)
            (1): BatchNorm2d(64, eps=0.001, momentum=0.01, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(64, 16, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(16, 64, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormAct