# Import libs

In [1]:
import os
import numpy as np
from tqdm import tqdm
from datetime import datetime
import copy
# torch libs
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import pickle
import utils
from quant_utils import *
device = torch.device('cpu')

In [2]:
cfg = {
    'A' : [64,     'M', 128,      'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'B' : [64, 64, 'M', 128, 128, 'M', 256, 256,           'M', 512, 512,           'M', 512, 512,           'M'],
    'D' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256,      'M', 512, 512, 512,      'M', 512, 512, 512,      'M'],
    'E' : [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M']
}
class VGG(nn.Module):

    def __init__(self, features, num_class=100):
        super().__init__()
        self.features = features

        self.classifier = nn.Sequential(
            nn.Linear(512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, num_class)
        )

    def forward(self, x):
        output = self.features(x)
        output = output.view(output.size()[0], -1)
        output = self.classifier(output)

        return output

def make_layers(cfg, batch_norm=False):
    layers = []

    input_channel = 3
    for l in cfg:
        if l == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            continue

        layers += [nn.Conv2d(input_channel, l, kernel_size=3, padding=1)]

        if batch_norm:
            layers += [nn.BatchNorm2d(l)]

        layers += [nn.ReLU(inplace=True)]
        input_channel = l

    return nn.Sequential(*layers)
def vgg16_bn(num_classes):
    return VGG(make_layers(cfg['D'], batch_norm=True), num_class=num_classes)


In [3]:
train_loader, mean, std = utils.get_subtraining_dataloader_cifar100_intersect(
    propor=1.0, 
    batch_size=128, 
    num_workers=8, 
    shuffle=True, 
    sub_idx=1)
test_loader = utils.get_test_dataloader_cifar100(
    mean, std, 
    batch_size=128, num_workers=8, shuffle=False, pin_memory=False)

Files already downloaded and verified


In [4]:
model = vgg16_bn(num_classes=100)
model.load_state_dict(
    torch.load('/data1/checkpoint/hash/cifar100/vgg16_0.pth', map_location=device))
model.eval()
model.to(device)
print("Loaded model.")

Loaded model.


# Quantization

In [5]:
fused_model= copy.deepcopy(model)
model.to('cpu')
model.eval()
# The model has to be switched to evaluation mode before any layer fusion.
# Otherwise the quantization will not work correctly.
fused_model.eval()


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (2): ReLU(inplace=True)
    (3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (5): ReLU(inplace=True)
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (9): ReLU(inplace=True)
    (10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (12): ReLU(inplace=True)
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): Conv2d(128, 256

In [6]:
feature_fuse_list = [[0, 1, 2], [3, 4, 5], [7, 8, 9], [10, 11, 12], 
                     [14, 15, 16], [17, 18, 19], [20, 21, 22], [24, 25, 26], 
                     [27, 28, 29], [30, 31, 32], [34, 35, 36], [37, 38, 39], [40, 41, 42]
                    ]
classifier_fuse_list = [[0, 1], [3, 4]]
feature_fuse = [['features.{}'.format(idx) for idx in idx_group] for idx_group in feature_fuse_list]
classifier_fuse = [['classifier.{}'.format(idx) for idx in idx_group] for idx_group in classifier_fuse_list]

In [7]:
torch.quantization.fuse_modules(fused_model, feature_fuse, inplace=True)
torch.quantization.fuse_modules(fused_model, classifier_fuse, inplace=True)

VGG(
  (features): Sequential(
    (0): ConvReLU2d(
      (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (1): Identity()
    (2): Identity()
    (3): ConvReLU2d(
      (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (4): Identity()
    (5): Identity()
    (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (7): ConvReLU2d(
      (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (8): Identity()
    (9): Identity()
    (10): ConvReLU2d(
      (0): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (1): ReLU(inplace=True)
    )
    (11): Identity()
    (12): Identity()
    (13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (14): ConvReLU2d(
      (0): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)

In [8]:
quantized_model = QuantizedNetwork(fused_model)
quantized_model.eval()
quantization_config = torch.quantization.get_default_qconfig("fbgemm")
quantized_model.qconfig = quantization_config
print(quantized_model.qconfig)
torch.quantization.prepare(quantized_model, inplace=True)

QConfig(activation=functools.partial(<class 'torch.quantization.observer.HistogramObserver'>, reduce_range=True), weight=functools.partial(<class 'torch.quantization.observer.PerChannelMinMaxObserver'>, dtype=torch.qint8, qscheme=torch.per_channel_symmetric))




QuantizedNetwork(
  (quant): QuantStub(
    (activation_post_process): HistogramObserver()
  )
  (dequant): DeQuantStub()
  (model): VGG(
    (features): Sequential(
      (0): ConvReLU2d(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
        (activation_post_process): HistogramObserver()
      )
      (1): Identity()
      (2): Identity()
      (3): ConvReLU2d(
        (0): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
        (activation_post_process): HistogramObserver()
      )
      (4): Identity()
      (5): Identity()
      (6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (7): ConvReLU2d(
        (0): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): ReLU(inplace=True)
        (activation_post_process): HistogramObserver()
      )
      (8): Identity()
      (9): Identity()
      (10): ConvReLU2d(
 

In [9]:
%%time 
calibrate_model(model=quantized_model, loader=train_loader, device='cpu')
quantized_model = torch.quantization.convert(quantized_model, inplace=True)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor'). (Triggered internally at  /opt/conda/conda-bld/pytorch_1623448255797/work/aten/src/ATen/native/BinaryOps.cpp:467.)
  return torch.floor_divide(self, other)


CPU times: user 16min 9s, sys: 7.49 s, total: 16min 17s
Wall time: 53.5 s


In [10]:
quantized_model.eval()
# Print quantized model.
# print(quantized_model)
# Save quantized model.
save_torchscript_model(model=quantized_model, model_dir='/data1/checkpoint/hash/cifar100/', model_filename="vgg16_0_quant.pth")

In [11]:
print_size_of_model(model)

model   	 Size (KB): 136124.866


136124866

In [12]:
print_size_of_model(quantized_model)

model   	 Size (KB): 34274.687


34274687

In [13]:
_, int8_eval_accuracy = evaluate_model(model=quantized_model, test_loader=test_loader, device=device, criterion=None)
print("INT8 evaluation accuracy: {:.3f}".format(int8_eval_accuracy))


100%|██████████| 79/79 [00:03<00:00, 22.13it/s]

INT8 evaluation accuracy: 0.639





In [14]:
_, fp32_eval_accuracy = evaluate_model(model=model, test_loader=test_loader, device=device, criterion=None)
print("FP32 evaluation accuracy: {:.3f}".format(fp32_eval_accuracy))


100%|██████████| 79/79 [00:10<00:00,  7.48it/s]

FP32 evaluation accuracy: 0.640



