In [1]:
from PIL import Image
import cv2
import matplotlib.pyplot as plt
%matplotlib inline
import torch
import numpy as np
import pandas as pd
import pickle as pkl
from sklearn.metrics import jaccard_score as IOU
from torchvision import models, transforms, io
from torch.utils.data import Dataset, DataLoader
import torch.nn.utils.prune as prune
import utils
import os
import time
import copy

# Data Loading

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
DATASET_PATH = 'ADE20K_2021_17_01/'
index_file = 'index_ade20k.pkl'
with open('{}/{}'.format(DATASET_PATH, index_file), 'rb') as f:
    index_ade20k = pkl.load(f)

objects_mat = index_ade20k['objectPresence']

# Find 150 most common object IDs and non-common object IDs
total_object_counts = np.sum(objects_mat, axis=1)
object_count_ids = np.argsort(total_object_counts)[::-1]
most_common_obj_ids = object_count_ids[:150]
irrelevant_obj_ids = object_count_ids[150:]
# Find image IDs where no irrelevant objects appear
irrelevant_obj_counts = np.sum(objects_mat[irrelevant_obj_ids], axis=0)
good_image_ids = np.argwhere(irrelevant_obj_counts == 0).flatten()
# Only common objects included
common_objects_mat = objects_mat[np.ix_(most_common_obj_ids, good_image_ids)]

# Maps {obj_ids: 0-149}
obj_id_map = {sorted(most_common_obj_ids)[idx]: idx + 1 for idx in range(150)}
obj_id_map[-1] = 0

# Pick out images to train/evaluate on
train_image_ids = []
test_image_ids = []
for i in good_image_ids:
    if 'training' in index_ade20k['folder'][i]:
        train_image_ids.append(i)
    elif 'validation' in index_ade20k['folder'][i]:
        test_image_ids.append(i)
    else:
        raise Exception('Invalid folder name.')

In [3]:
class SegmentationDataset(Dataset):
    def __init__(self, image_ids, root_dir, index_mat, transform=None, target_transform=None):
        """
        Args:
            image_ids (list): list of image IDs from ADE20K
            root_dir (string): Directory with all the images.
            index_mat (array): object array from index_ade20k.pkl
            transform (callable, optional): Optional transform to be applied
                on a sample.
            target_transform (callable, optional): Optional transform to be applied
                on a sample segmentation label.
        """
        self.image_ids = image_ids
        self.root_dir = root_dir
        self.index_ade20k = index_mat
        self.transform = transform
        self.target_transform = target_transform

    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        img_name = os.path.join(self.root_dir, self.index_ade20k['folder'][image_id], 
                                self.index_ade20k['filename'][image_id])
        img_info = utils.loadAde20K(img_name)
        
        image = io.read_image(img_info['img_name']).float()
        class_mask = Image.fromarray(img_info['class_mask'], mode='I')
        
        if self.transform:
            image = self.transform(image)
        if self.target_transform:
            label = self.target_transform(class_mask)
            
        sample = (image, label)

        return sample

In [4]:
input_size = 224
transform = transforms.Compose([
                transforms.Resize(input_size),
                transforms.CenterCrop(input_size),
                transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
            ])

target_transform = transforms.Compose([
                transforms.Resize(input_size, interpolation=0),
                transforms.CenterCrop(input_size),
                transforms.ToTensor()
            ])

  "Argument interpolation should be of type InterpolationMode instead of int. "


In [5]:
num_samples = 4
batch_size = 2
training_data = SegmentationDataset(train_image_ids[:num_samples], './', index_ade20k, transform=transform, target_transform=target_transform)
train_dataloader = DataLoader(training_data, batch_size=batch_size, shuffle=False)
testing_data = SegmentationDataset(test_image_ids[:num_samples], './', index_ade20k, transform=transform, target_transform=target_transform)
test_dataloader = DataLoader(testing_data, batch_size=batch_size, shuffle=False)

# Load Pre-trained Model

In [12]:
def get_parameter_size(model):
    """
    Return model size in terms of parameters
    Each parameter is a float32 - 4 bytes
    """
    num_params = 0
    for p in model.parameters():
        num_params += torch.count_nonzero(p.flatten())
        
    total_bytes = num_params.item() / 4
    kb = total_bytes / 1000
    
    return {"# Params": num_params.item(),
            "Size in KB": kb}


def encode_label(label_arr, obj_id_map):
    """
    Encode labels for evaluating loss
    label_arr (tensor): B x 1 x H x W
    """
    convert_label_ids = lambda i: obj_id_map[i-1]
    vect_convert_label_ids = np.vectorize(convert_label_ids)
    
    encoded_label = vect_convert_label_ids(label_arr.squeeze().numpy())
    
    return torch.tensor(encoded_label, dtype=torch.long)

In [255]:
model = models.segmentation.fcn_resnet50(pretrained=False, num_classes=151).to(device=device)
model.load_state_dict(torch.load('../scene_seg_models/fcn_resnet_50/epochs_20_weights.pkl', map_location=torch.device('cpu')))

<All keys matched successfully>

In [14]:
print(get_parameter_size(model))

{'# Params': 33023703, 'Size in KB': 8255.92575}


### Quantize weights

In [83]:
from typing import Type, Any, Callable, Union, List, Optional
from torch import Tensor
import torch.nn as nn

class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        # Rename relu to relu1
        self.relu1 = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride
        self.skip_add = nn.quantized.FloatFunctional()
        # Remember to use two independent ReLU for layer fusion.
        self.relu2 = nn.ReLU(inplace=True)

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu1(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)
        
        # Use FloatFunctional for addition for quantization compatibility
        # out += identity
        out = self.skip_add.add(identity, out)
        out = self.relu2(out)

        return out

In [67]:
class QuantizedFCNResnet50(torch.nn.Module):
    def __init__(self, model_fp32):
        super(QuantizedFCNResnet50, self).__init__()
        # QuantStub converts tensors from floating point to quantized.
        # This will only be used for inputs.
        self.quant = torch.quantization.QuantStub()
        # DeQuantStub converts tensors from quantized to floating point.
        # This will only be used for outputs.
        self.dequant = torch.quantization.DeQuantStub()
        # FP32 model
        self.model_fp32 = model_fp32

    def forward(self, x):
        # manually specify where tensors will be converted from floating
        # point to quantized in the quantized model
        x = self.quant(x)
        x = self.model_fp32(x)
        # manually specify where tensors will be converted from quantized
        # to floating point in the quantized model
        x = self.dequant(x)
        return x

In [68]:
def calibrate_model(model, loader, device):

    model.to(device)
    model.eval()

    for inputs, labels in loader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        _ = model(inputs)

In [69]:
def model_equivalence(model_1, model_2, device, rtol=1e-05, atol=1e-08, num_tests=100, input_size=(1,3,224,224)):

    model_1.to(device)
    model_2.to(device)

    for _ in range(num_tests):
        x = torch.rand(size=input_size).to(device)
        y1 = model_1(x)['out'].detach().cpu().numpy()
        y2 = model_2(x)['out'].detach().cpu().numpy()
        if np.allclose(a=y1, b=y2, rtol=rtol, atol=atol, equal_nan=False) == False:
            print("Model equivalence test sample failed: ")
            print(y1)
            print(y2)
            return False

    return True

In [84]:
fused_model = copy.deepcopy(model)
model.eval()
fused_model.eval()

# Fuse the model in place rather manually.
fused_model.backbone = torch.quantization.fuse_modules(fused_model.backbone, [["conv1", "bn1", "relu"]], inplace=True)
for module_name, module in fused_model.backbone.named_children():
    if "layer" in module_name:
        for basic_block_name, basic_block in module.named_children():
            torch.quantization.fuse_modules(basic_block, [["conv1", "bn1", "relu"], ["conv2", "bn2"]], inplace=True)
            for sub_block_name, sub_block in basic_block.named_children():
                if sub_block_name == "downsample":
                    torch.quantization.fuse_modules(sub_block, [["0", "1"]], inplace=True)

In [85]:
model_equivalence(model, fused_model, device=device)

Model equivalence test sample failed: 
[[[[ 7.8572035   7.8572035   7.8572035  ...  7.9984016   7.9984016
     7.9984016 ]
   [ 7.8572035   7.8572035   7.8572035  ...  7.9984016   7.9984016
     7.9984016 ]
   [ 7.8572035   7.8572035   7.8572035  ...  7.9984016   7.9984016
     7.9984016 ]
   ...
   [ 5.8368645   5.8368645   5.8368645  ...  6.641162    6.6411614
     6.6411614 ]
   [ 5.8368645   5.8368645   5.8368645  ...  6.641162    6.6411614
     6.641162  ]
   [ 5.8368645   5.8368645   5.8368645  ...  6.641161    6.641162
     6.6411614 ]]

  [[-0.99252623 -0.99252623 -0.99252623 ... -1.1583534  -1.1583534
    -1.1583534 ]
   [-0.99252623 -0.99252623 -0.99252623 ... -1.1583534  -1.1583534
    -1.1583534 ]
   [-0.99252623 -0.99252623 -0.99252623 ... -1.1583534  -1.1583534
    -1.1583534 ]
   ...
   [-1.2109911  -1.2109911  -1.2109911  ... -1.2216486  -1.2216486
    -1.2216486 ]
   [-1.2109911  -1.2109911  -1.2109911  ... -1.2216486  -1.2216485
    -1.2216486 ]
   [-1.2109911  -1.210

False

In [126]:
quantized_model = QuantizedFCNResnet50(model_fp32=fused_model)
quantized_model.qconfig = torch.quantization.get_default_qconfig("fbgemm")
torch.quantization.prepare(quantized_model, inplace=True)
calibrate_model(model=quantized_model, loader=train_dataloader, device=device)
quantized_model = torch.quantization.convert(quantized_model, inplace=True)

RuntimeError: Could not run 'quantized::conv2d_relu.new' with arguments from the 'CPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'quantized::conv2d_relu.new' is only available for these backends: [QuantizedCPU, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, Tracer, Autocast, Batched, VmapMode].

QuantizedCPU: registered at ../aten/src/ATen/native/quantized/cpu/qconv.cpp:873 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Named: registered at ../aten/src/ATen/core/NamedRegistrations.cpp:7 [backend fallback]
AutogradOther: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:35 [backend fallback]
AutogradCPU: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:39 [backend fallback]
AutogradCUDA: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:43 [backend fallback]
AutogradXLA: fallthrough registered at ../aten/src/ATen/core/VariableFallbackKernel.cpp:47 [backend fallback]
Tracer: fallthrough registered at ../torch/csrc/jit/frontend/tracer.cpp:999 [backend fallback]
Autocast: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:250 [backend fallback]
Batched: registered at ../aten/src/ATen/BatchingRegistrations.cpp:1016 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]


In [87]:
quantized_model.eval()
print(quantized_model)


QuantizedFCNResnet50(
  (quant): Quantize(scale=tensor([8.9381]), zero_point=tensor([0]), dtype=torch.quint8)
  (dequant): DeQuantize()
  (model_fp32): FCN(
    (backbone): IntermediateLayerGetter(
      (conv1): QuantizedConvReLU2d(3, 64, kernel_size=(7, 7), stride=(2, 2), scale=0.02538936585187912, zero_point=0, padding=(3, 3))
      (bn1): Identity()
      (relu): Identity()
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): QuantizedConvReLU2d(64, 64, kernel_size=(1, 1), stride=(1, 1), scale=0.010219362564384937, zero_point=0)
          (bn1): Identity()
          (conv2): QuantizedConv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), scale=0.028914401307702065, zero_point=59, padding=(1, 1))
          (bn2): Identity()
          (conv3): QuantizedConv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), scale=0.02067003957927227, zero_point=56, bias=False)
          (bn3): Quantiz

In [88]:
print(get_parameter_size(model))
print(get_parameter_size(quantized_model))

{'# Params': 33023703, 'Size in KB': 8255.92575}
{'# Params': 31232, 'Size in KB': 7.808}


### Compression Sizes
Original: {'# Params': 33023703, 'Size in KB': 8255.92575}

Threshold: 0.1 ---
{'# Params': 76445, 'Size in KB': 19.11125} --- 99.8% compression

Threshold: 0.025 ---
{'# Params': 2211294, 'Size in KB': 552.8235} --- 93.3% compression

Threshold: 0.01 ---
{'# Params': 10571276, 'Size in KB': 2642.819} --- 68.0% compression

Threshold: 0.0025 --- 
{'# Params': 26041274, 'Size in KB': 6510.3185} --- 21.1% compression

Threshold: 0.001 ---
{'# Params': 30165420, 'Size in KB': 7541.355} --- 8.66% compression

In [75]:
torch.save(quantized_model, '../test_quantized_model.pkl')

In [76]:
test_load = torch.load('../test_pruned_model.pkl')

In [77]:
get_parameter_size(test_load)

{'# Params': 76445, 'Size in KB': 19.11125}

In [78]:
matching_flag = True
for p1, p2 in zip(quantized_model.parameters(), test_load.parameters()):
    if p1.data.ne(p2.data).sum() > 0:
        matching_flag = False
print("Copied weights" if matching_flag else "Not matching weights")

RuntimeError: The size of tensor a (256) must match the size of tensor b (7) at non-singleton dimension 3

In [285]:
def validate(model, test_dataloader):
    # testing pass
    test_start = time.time()
    running_accuracy = 0
    running_iou = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.to(device)
            output = model(images)['out']
            labels = encode_label(labels, obj_id_map).to(device)
            probs = torch.nn.functional.softmax(output, dim=1)
            preds = torch.argmax(probs, dim=1, keepdim=True).squeeze()
            num_correct = torch.sum((preds == labels).to(int)).item()
            iou = IOU(labels.detach().cpu().numpy().reshape(-1), preds.detach().cpu().numpy().reshape(-1), average='weighted')
            print('Testing accuracy: {}'.format(num_correct/(224*224*len(images))))
            print('Testing IOU score: {}'.format(iou))
            running_accuracy += num_correct/(224*224*len(images))
            running_iou += iou

    print("Testing time: {} seconds".format(time.time() - test_start))

    return {"Testing pixel accuracy": running_accuracy / len(test_dataloader),
            "Testing IOU accuracy": running_iou / len(test_dataloader)}

In [90]:
validate(quantized_model, test_dataloader)

RuntimeError: Could not run 'aten::add_.Tensor' with arguments from the 'QuantizedCPU' backend. This could be because the operator doesn't exist for this backend, or was omitted during the selective/custom build process (if using custom build). If you are a Facebook employee using PyTorch on mobile, please visit https://fburl.com/ptmfixes for possible resolutions. 'aten::add_.Tensor' is only available for these backends: [CPU, MkldnnCPU, SparseCPU, Meta, BackendSelect, Named, AutogradOther, AutogradCPU, AutogradCUDA, AutogradXLA, AutogradNestedTensor, UNKNOWN_TENSOR_TYPE_ID, AutogradPrivateUse1, AutogradPrivateUse2, AutogradPrivateUse3, Tracer, Autocast, Batched, VmapMode].

CPU: registered at aten/src/ATen/RegisterCPU.cpp:5925 [kernel]
MkldnnCPU: registered at aten/src/ATen/RegisterMkldnnCPU.cpp:284 [kernel]
SparseCPU: registered at aten/src/ATen/RegisterSparseCPU.cpp:557 [kernel]
Meta: registered at aten/src/ATen/RegisterMeta.cpp:414 [kernel]
BackendSelect: fallthrough registered at ../aten/src/ATen/core/BackendSelectFallbackKernel.cpp:3 [backend fallback]
Named: fallthrough registered at ../aten/src/ATen/core/NamedRegistrations.cpp:11 [kernel]
AutogradOther: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradCPU: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradCUDA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradXLA: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradNestedTensor: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
UNKNOWN_TENSOR_TYPE_ID: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradPrivateUse1: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradPrivateUse2: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
AutogradPrivateUse3: registered at ../torch/csrc/autograd/generated/VariableType_2.cpp:9122 [autograd kernel]
Tracer: registered at ../torch/csrc/autograd/generated/TraceType_2.cpp:10525 [kernel]
Autocast: fallthrough registered at ../aten/src/ATen/autocast_mode.cpp:250 [backend fallback]
Batched: registered at ../aten/src/ATen/BatchingRegistrations.cpp:1016 [backend fallback]
VmapMode: fallthrough registered at ../aten/src/ATen/VmapModeRegistrations.cpp:33 [backend fallback]


In [273]:
def quantize_weights(model, num_bits):
    curr_max = -float('inf')
    curr_min = float('inf')
    for params in list(model.parameters()):
        curr_max = max(torch.max(params.data), curr_max)
        curr_min = min(torch.min(params.data), curr_min)
    
    qmax = 2 ** num_bits - 1
    scale = (curr_max - curr_min) / qmax
    
    curr_max = torch.tensor(-255).type(torch.int8)
    curr_min = torch.tensor(255).type(torch.int8)
    offset = 2**(num_bits-1) - 1
    for params in list(model.parameters()):
        params.data = (params.data/scale).type(torch.int8)
        curr_max = max(torch.max(params.data), curr_max)
        curr_min = min(torch.min(params.data), curr_min)
        params.data = torch.add(params.data, offset)
    
    return model


def quantize_weights(model, num_bits):
    for params in list(model.parameters()):
        params.data = (params.data).type(torch.float16)
    
    return model

def dequantize_weights(model):    
    for params in list(model.parameters()):
        params.data = (params.data).type(torch.float32)
    
    return model

In [290]:
quantized_model = copy.deepcopy(model)
quantize_weights(quantized_model, 8)
dequantize_weights(quantized_model)

FCN(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [289]:
matching_flag = True
for p1, p2 in zip(quantized_model.parameters(), model.parameters()):
    if p1.data.ne(p2.data).sum() > 0:
        matching_flag = False
print("Copied weights" if matching_flag else "Not matching weights")

Not matching weights


In [275]:
print(model)

FCN(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [161]:
def print_size_of_model(model, label=""):
    torch.save(model.state_dict(), "../temp.p")
    size=os.path.getsize("../temp.p")
    print("model: ",label,' \t','Size (KB):', size/1e3)
    os.remove('../temp.p')
    return size

In [234]:
quantized_model = copy.deepcopy(model)
# for module_name, module in quantized_model.named_modules():
#     print(module_name)
quantized_model = torch.quantization.quantize_dynamic(
    model, 
    {torch.nn.Conv2d, torch.nn.BatchNorm2d},
#     {'backbone.conv1', 'backbone.layer1.0.conv1', 'backbone.layer1.0.conv2', 'backbone.layer1.0.conv3'},
    dtype=torch.qint8
)

In [235]:
print(get_parameter_size(quantized_model))

{'# Params': 33023703, 'Size in KB': 8255.92575}


In [266]:
f=print_size_of_model(model,"fp32")
q=print_size_of_model(quantized_model,"int8")
print("{0:.2f} times smaller".format(f/q))

model:  fp32  	 Size (KB): 132425.835
model:  int8  	 Size (KB): 33354.731
3.97 times smaller


In [237]:
for params in list(quantized_model.parameters()):
    print(params)

Parameter containing:
tensor([[[[ 1.3501e-02,  9.0639e-03, -2.0342e-02,  ..., -4.0378e-02,
           -4.3022e-02, -7.1094e-02],
          [-5.6608e-05,  3.4251e-03,  1.1069e-02,  ...,  3.8351e-03,
           -1.8958e-02, -4.0563e-02],
          [ 1.8695e-02,  1.9203e-02,  1.3678e-02,  ...,  1.0370e-01,
            6.2208e-02,  5.1936e-02],
          ...,
          [-1.6547e-03,  2.4803e-02, -1.3716e-02,  ..., -1.3217e-01,
           -8.2564e-02,  4.6945e-03],
          [ 1.7271e-03,  4.5326e-02,  6.0534e-02,  ...,  2.0959e-02,
           -3.5751e-02, -1.7664e-02],
          [-8.6065e-02, -4.1319e-02, -2.6315e-02,  ...,  3.0945e-02,
            2.3391e-02,  2.1817e-03]],

         [[-1.5520e-02,  7.6647e-03,  2.1023e-02,  ...,  5.5840e-02,
            4.5364e-02, -9.1547e-03],
          [-8.8469e-03,  1.7861e-02,  6.6089e-02,  ...,  1.6298e-01,
            1.5021e-01,  1.1929e-01],
          [-4.6652e-02, -7.9130e-02, -9.0239e-02,  ...,  1.2334e-01,
            1.6772e-01,  1.7610e-01]

Parameter containing:
tensor([0.0535, 0.1320, 0.1067,  ..., 0.1052, 0.0533, 0.0307],
       requires_grad=True)
Parameter containing:
tensor([ 0.0092, -0.0488, -0.0792,  ..., -0.0863, -0.0272, -0.0090],
       requires_grad=True)
Parameter containing:
tensor([[[[-0.0364]],

         [[ 0.0312]],

         [[-0.0206]],

         ...,

         [[ 0.0069]],

         [[ 0.0138]],

         [[-0.0073]]],


        [[[ 0.0030]],

         [[-0.0065]],

         [[-0.0079]],

         ...,

         [[-0.0036]],

         [[ 0.0098]],

         [[-0.0020]]],


        [[[-0.0217]],

         [[ 0.0115]],

         [[-0.0008]],

         ...,

         [[ 0.0125]],

         [[ 0.0101]],

         [[ 0.0041]]],


        ...,


        [[[-0.0107]],

         [[-0.0043]],

         [[ 0.0317]],

         ...,

         [[ 0.0170]],

         [[ 0.0095]],

         [[ 0.0104]]],


        [[[ 0.0037]],

         [[-0.0035]],

         [[-0.0043]],

         ...,

         [[ 0.0330]],

      

In [190]:
print(quantized_model)

FCN(
  (backbone): IntermediateLayerGetter(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequenti

In [286]:
validate(quantized_model, test_dataloader)

RuntimeError: Input type (torch.FloatTensor) and weight type (torch.HalfTensor) should be the same

In [283]:
validate(model, test_dataloader)

Testing accuracy: 0.32262436224489793
Testing IOU score: 0.2996266757369685
Testing accuracy: 0.46958705357142855
Testing IOU score: 0.3986047238729782
Testing time: 2.3113620281219482 seconds


{'Testing pixel accuracy': 0.39610570790816324,
 'Testing IOU accuracy': 0.34911569980497337}