# 1. Load pretrained model

In [1]:
import warnings
warnings.filterwarnings('ignore')

import torch
import torch.nn as nn
from torchvision.models.vgg import vgg11, vgg13, vgg16, vgg19 
from torch.ao.quantization import QuantStub, DeQuantStub
from utils.utils import calculate_op
from utils.train_utils import create_dataset,compute_accuracy
from utils.pca import extract_primary_layers
from utils.quantization import quantize_model, print_size_of_model

x = torch.randn(1, 3, 224, 224)
_, _, trainloader, testloader = create_dataset(dataset = "cifar10", data_root = "./data", batch_size = 2048, num_workers = 2)

weight_path = "best_model_vgg11.pt"

model = vgg11()
# model.to(torch.device('cpu'))
model.load_state_dict(torch.load(weight_path, map_location="cpu"))

model.eval()

ori_model_size = print_size_of_model(model)

ori_test_acc = compute_accuracy(testloader, model, "cpu")
ori_macs, ori_params = calculate_op(x, model)


#modify based on vgg model, this return node is specified for vgg11
return_nodes = {
    "features.1": "layer1",
    "features.4": "layer2",
    "features.7": "layer3",
    "features.9": "layer4",
    "features.12": "layer5",
    "features.14": "layer6",
    "features.17": "layer7",
    "features.19": "layer8",
}



model

Files already downloaded and verified
Files already downloaded and verified


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (12): ReLU(inplace=True)
    (13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (14): ReLU(inplace=True)
    (15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
 

# 2. select primary layers

In [2]:
num_primary_layers = 2

primary_layers = extract_primary_layers(x, model, return_nodes, threshold = 0.99, num_layers = num_primary_layers, verbose = True)
primary_layers

features.1 analysis result
need at least 37 filter(s) out of 64 components to exceed threshold. So 57.81% of filters needed minimum to exceed threshold
features.4 analysis result
need at least 49 filter(s) out of 128 components to exceed threshold. So 38.28% of filters needed minimum to exceed threshold
features.7 analysis result
need at least 87 filter(s) out of 256 components to exceed threshold. So 33.98% of filters needed minimum to exceed threshold
features.9 analysis result
need at least 83 filter(s) out of 256 components to exceed threshold. So 32.42% of filters needed minimum to exceed threshold
features.12 analysis result
need at least 111 filter(s) out of 512 components to exceed threshold. So 21.68% of filters needed minimum to exceed threshold
features.14 analysis result
need at least 51 filter(s) out of 512 components to exceed threshold. So 9.96% of filters needed minimum to exceed threshold
features.17 analysis result
need at least 68 filter(s) out of 512 components to e

[('features.1', 0.578125), ('features.4', 0.3828125)]

# 3. quantize

In [3]:
quantize_model(model, primary_layers, trainloader, 1)
model

VGG(
  (features): Sequential(
    (0): Sequential(
      (0): Quantize(scale=tensor([0.0157]), zero_point=tensor([64]), dtype=torch.quint8)
      (1): QuantizedConvReLU2d(3, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.04875442758202553, zero_point=0, padding=(1, 1))
      (2): DeQuantize()
    )
    (1): Identity()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Sequential(
      (0): Quantize(scale=tensor([0.0536]), zero_point=tensor([0]), dtype=torch.quint8)
      (1): QuantizedConvReLU2d(64, 128, kernel_size=(3, 3), stride=(1, 1), scale=0.06528432667255402, zero_point=0, padding=(1, 1))
      (2): DeQuantize()
    )
    (4): Identity()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): 

# 4. Performance Evaluation

In [None]:
test_acc = compute_accuracy(testloader, model, "cpu")
q_macs, q_params = calculate_op(x, model)


quant_model_size = print_size_of_model(model)

print("test accuracy is changed from {:.04f} to {:.04f}".format(ori_test_acc, test_acc))
print("Number of parameters changed from {:.04f}M to {:.04f}M".format(ori_params/1e6, q_params/1e6))
print("Size of model changed from {:.04f}MB to {:.04f}MB".format(ori_model_size/1e6, quant_model_size/1e6))
print("MACs changed from {:.02f}G to {:.02f}G".format(ori_macs/1e9, q_macs/1e9))