# VGG ImageNet
This notebook shows some examples of compressions of models trained on the ImageNet dataset.

# Setup

### Imports

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
import torch.optim as optim
import torch.nn as nn
import torch.nn.functional as F
from tqdm import tqdm
import sys

sys.path.append('../../')
sys.path.append('../../src/')

import src.general as general
import src.dataset_models as data
import src.metrics as metrics
import src.evaluation as eval
import src.plot as plot
import src.compression.distillation as distill
import src.compression.pruning as prune


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw

Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified


### Dataset & Model

In [3]:
dataset = data.supported_datasets["CIFAR-10"]
dataset.cap = 100

In [21]:
dataset.criterion = F.cross_entropy
print(dataset.criterion.__name__)

cross_entropy


In [46]:
import models.resnet as rs

device = general.get_device()

resnet = rs.resnet18(device=device)
state_dict = torch.load('../models/resnet18.pt', map_location=device)
resnet.load_state_dict(state_dict)

<All keys matched successfully>

In [7]:
prune.get_layers_not_to_prune(resnet)

[Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
 Linear(in_features=512, out_features=10, bias=True),
 Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False),
 BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)]

In [55]:
general.test(model, dataset)

Test: 100it [00:01, 96.77it/s]

Test loss: 0.4217
Test score: 90.1562





(0.42165916830301287,
 90.15625,
 1034.7247123718262,
 10.347247123718262,
 0.16167573630809784)

In [56]:
iterations = 10
target = 98
score = 0

while score < target:
    metrics = general.train(resnet, dataset)
    score = metrics[1]

Train: 100%|██████████| 782/782 [00:17<00:00, 43.88it/s]


Train loss: 0.1335
Train score: 95.4923


Train: 100%|██████████| 782/782 [00:17<00:00, 44.12it/s]


Train loss: 0.1227
Train score: 95.8460


Train: 100%|██████████| 782/782 [00:17<00:00, 44.45it/s]


Train loss: 0.1191
Train score: 95.9499


Train: 100%|██████████| 782/782 [00:18<00:00, 43.02it/s]


Train loss: 0.1139
Train score: 96.0997


Train: 100%|██████████| 782/782 [00:18<00:00, 43.35it/s]


Train loss: 0.1045
Train score: 96.3835


Train: 100%|██████████| 782/782 [00:17<00:00, 44.18it/s]


Train loss: 0.1019
Train score: 96.4994


Train: 100%|██████████| 782/782 [00:18<00:00, 42.75it/s]


Train loss: 0.0985
Train score: 96.5953


Train: 100%|██████████| 782/782 [00:18<00:00, 43.27it/s]


Train loss: 0.0929
Train score: 96.8390


Train: 100%|██████████| 782/782 [00:17<00:00, 43.69it/s]


Train loss: 0.0859
Train score: 97.0748


Train: 100%|██████████| 782/782 [00:18<00:00, 43.04it/s]


Train loss: 0.0829
Train score: 97.2047


Train: 100%|██████████| 782/782 [00:18<00:00, 43.03it/s]


Train loss: 0.0834
Train score: 97.1367


Train: 100%|██████████| 782/782 [00:17<00:00, 43.74it/s]


Train loss: 0.0774
Train score: 97.3306


Train: 100%|██████████| 782/782 [00:17<00:00, 43.80it/s]


Train loss: 0.0746
Train score: 97.4045


Train: 100%|██████████| 782/782 [00:18<00:00, 43.11it/s]


Train loss: 0.0721
Train score: 97.5683


Train: 100%|██████████| 782/782 [00:18<00:00, 42.44it/s]


Train loss: 0.0716
Train score: 97.5364


Train: 100%|██████████| 782/782 [00:18<00:00, 42.78it/s]


Train loss: 0.0638
Train score: 97.8181


Train: 100%|██████████| 782/782 [00:18<00:00, 42.68it/s]


Train loss: 0.0650
Train score: 97.7681


Train: 100%|██████████| 782/782 [00:18<00:00, 42.36it/s]


Train loss: 0.0591
Train score: 97.9160


Train: 100%|██████████| 782/782 [00:18<00:00, 42.45it/s]


Train loss: 0.0583
Train score: 97.9640


Train: 100%|██████████| 782/782 [00:18<00:00, 42.20it/s]

Train loss: 0.0551
Train score: 98.1558





### Evaluation
Evaluate the model before any compression

In [30]:
device

device(type='cuda')

In [57]:
before_results = eval.get_results(resnet, dataset)
plot.print_results(**before_results)

Test: 100it [00:01, 97.01it/s]

Test loss: 0.3449
Test score: 91.3125
Could not calculate FLOPS
Loss: 0.344873
Score: 91.312500
Time per data point: 0.1613 ms
Model Size: 42.69 MB
Number of parameters: 11173962
Number of MACs: 140848128





# Compression

## Pruning

### Structured pruning example.

In [41]:
model = prune.magnitude_pruning_structured(resnet, dataset, sparsity=0.1, fineTune=False)

In [71]:
magnitude_pruned_results = eval.get_results(model, dataset)
plot.print_before_after_results(before_results, magnitude_pruned_results)

Test: 100it [00:01, 97.46it/s]

Test loss: 0.4038
Test score: 90.1562
Could not calculate FLOPS
Loss: 0.344873 -> 0.403823 (17.09%)
Score: 91.312500 -> 90.156250 (-1.27%)
Time per data point: 0.1613 ms -> 0.1606 ms (-0.41%)
Model Size: 42.69 MB -> 22.71 MB (-46.80%)
Number of parameters: 11173962 -> 5937110 (-46.87%)
Number of MACs: 140848128 -> 82681912 (-41.30%)





In [62]:
general.finetune(model, dataset, target=95, max_it=5)

Train: 100%|██████████| 782/782 [00:17<00:00, 44.20it/s]

Train loss: 0.0488
Train score: 98.2757





In [63]:
model = prune.magnitude_pruning_structured(resnet,dataset, sparsity=0.8, fineTune=True)

Train: 100%|██████████| 782/782 [00:17<00:00, 44.01it/s]


Train loss: 0.1939
Train score: 93.3344


Train: 100%|██████████| 782/782 [00:18<00:00, 43.05it/s]


Train loss: 0.4113
Train score: 86.3851


Train: 100%|██████████| 782/782 [00:17<00:00, 44.08it/s]

Train loss: 0.8620
Train score: 71.1737





ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(12, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [70]:
general.finetune(resnet, dataset, target=95)

Train: 100%|██████████| 782/782 [00:18<00:00, 42.61it/s]


Train loss: 0.2737
Train score: 90.4232


Train: 100%|██████████| 782/782 [00:18<00:00, 41.62it/s]


Train loss: 0.2744
Train score: 90.4352


Train: 100%|██████████| 782/782 [00:18<00:00, 43.03it/s]


Train loss: 0.2704
Train score: 90.6450


Train: 100%|██████████| 782/782 [00:18<00:00, 42.43it/s]


Train loss: 0.2680
Train score: 90.7169


Train: 100%|██████████| 782/782 [00:17<00:00, 43.45it/s]


Train loss: 0.2646
Train score: 90.7549


Train: 100%|██████████| 782/782 [00:17<00:00, 44.12it/s]


Train loss: 0.2654
Train score: 90.8028


Train: 100%|██████████| 782/782 [00:17<00:00, 43.69it/s]


Train loss: 0.2600
Train score: 91.0806


Train: 100%|██████████| 782/782 [00:18<00:00, 43.08it/s]

Train loss: 0.2606
Train score: 90.8748





## Distillation

In [18]:
settings = {
    # "performance_target": 99,
    "fineTune": False,
    "epochs": 3,
}

distilled_model = distill.perform_distillation(resnet, dataset, settings)

Settings: {'fineTune': False, 'epochs': 3}
Fine-tuning: False


Epoch: 0


Distillation Training:   1%|          | 5/782 [00:01<04:59,  2.59it/s]


KeyboardInterrupt: 

In [33]:
resnet

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (shortcut): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (1): Bottleneck(
      (