In [1]:
import torch
import torchvision
from PIL import Image
import numpy as np
import random
from tqdm import tqdm
from datasets import load_dataset
import torch.multiprocessing
import os
from torch import nn

print('DEFAULT:', torch.get_num_threads(), torch.get_num_interop_threads())
# os.environ['OMP_NUM_THREADS'] = '1'
# os.environ['MKL_NUM_THREADS'] = '1'
# torch.set_num_threads(1), torch.set_num_interop_threads(1)
print(torch.get_num_threads(), torch.get_num_interop_threads())
print(torch.__config__.parallel_info())

torch.multiprocessing.set_sharing_strategy('file_system')

DEFAULT: 4 4
4 4
ATen/Parallel:
	at::get_num_threads() : 4
	at::get_num_interop_threads() : 4
OpenMP 201511 (a.k.a. OpenMP 4.5)
	omp_get_max_threads() : 4
Intel(R) oneAPI Math Kernel Library Version 2022.2-Product Build 20220804 for Intel(R) 64 architecture applications
	mkl_get_max_threads() : 4
Intel(R) MKL-DNN v3.1.1 (Git Hash 64f6bcbcbab628e96f33a62c3e975f8535a7bde4)
std::thread::hardware_concurrency() : 8
Environment variables:
	OMP_NUM_THREADS : [not set]
	MKL_NUM_THREADS : [not set]
ATen parallel backend: OpenMP



In [2]:
model = torchvision.models.vit_b_16(weights=torchvision.models.ViT_B_16_Weights.IMAGENET1K_V1)
model.eval()
transforms = torchvision.models.ViT_B_16_Weights.IMAGENET1K_V1.transforms

In [3]:
with open('../imagenet1000.txt', 'r') as fopen:
    lines = fopen.readlines()

def process_classes(line: str):
    splitted = line.strip().removeprefix('{').removesuffix(',').split(':')
    return (int(splitted[0]), splitted[1].strip().strip('\''))

orig_classes = dict(map(process_classes, lines))

imagenette_classes = dict(enumerate(['tench', 'English springer', 'cassette player', 'chain saw', 'church', 'French horn', 'garbage truck', 'gas pump', 'golf ball', 'parachute']))

for k, v in imagenette_classes.items():
    for k1, v1 in orig_classes.items():
        if v in v1:
            imagenette_classes[k] = k1

In [4]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, datasource, transforms: callable):
        super().__init__()
        self.transforms = transforms
        self.datasource = datasource

    def __len__(self) -> int:
        return len(self.datasource)

    def __getitem__(self, index: int) -> torch.Tensor:
        data = self.datasource[index]
        image, label = data['image'], data['label']
        if image.mode != 'RGB':
            image = Image.fromarray(np.array(image)[..., None].repeat(3, -1))
        return self.transforms(image), imagenette_classes[label]

In [5]:
imagenette_train = load_dataset('frgfm/imagenette', '320px', split='train')
imagenette_valid = load_dataset('frgfm/imagenette', '320px', split='validation')

In [6]:
num_workers = 4
batch_size = 1

In [7]:
tf = transforms()
trainset = Dataset(datasource=imagenette_train, transforms=tf)
validset = Dataset(datasource=imagenette_valid, transforms=tf)
valid_dataloader = torch.utils.data.DataLoader(validset, num_workers=num_workers, batch_size=batch_size, shuffle=False)

In [8]:
def nbytes(model: torch.nn.Module):
    n = 0
    for p in model.parameters():
        n += p.nbytes
    return n / 1024 ** 2

In [9]:
from torch.profiler import profile, record_function, ProfilerActivity
from itertools import product
from torch.quantization.observer import MinMaxObserver, MovingAverageMinMaxObserver, HistogramObserver
from torch.ao.quantization import get_default_qconfig_mapping, get_default_qconfig
from torch.quantization.quantize_fx import prepare_fx, convert_fx
from torch.ao.quantization import QConfigMapping
import gc
from contextlib import nullcontext
from timeit import timeit
import time
from sklearn.metrics import accuracy_score, top_k_accuracy_score
import datetime
# import torch.quantization._numeric_suite as ns
import torch.quantization._numeric_suite_fx as ns

def fix_seed(worker_id=0, seed=0xBADCAFE):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

fix_seed()

torch_generator = torch.Generator()
torch_generator.manual_seed(0xBADCAFFE)

<torch._C.Generator at 0x7f48e433ccd0>

In [10]:
from tqdm.notebook import tqdm
from copy import deepcopy

In [11]:
copy_model = deepcopy(model)
module_a = deepcopy(model)
module_a.heads.head = nn.Identity()
module_b = model.heads.head
# model = LoggerModule(module_a, module_b)

In [12]:
import nncf
import torch


def transform_fn(data_item):
    images, _ = data_item
    return images

with torch.inference_mode():
    calibration_dataset_a = nncf.Dataset(valid_dataloader, transform_fn)
    quantized_module_a = nncf.quantize(module_a, calibration_dataset_a, model_type=nncf.ModelType.TRANSFORMER,  subset_size=1024)

INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino
INFO:nncf:No match has been found among the model operations for the following ignored/target scope definitions:
 - ignored_scope: ['{re}.*Embeddings.*', '{re}.*__truediv__*', '{re}.*matmul_1']
Refer to the original_graph.dot to discover the operations in the model currently visible to NNCF and specify the ignored/target scopes in terms of the names there.
INFO:nncf:Not adding activation input quantizer for operation: 5 VisionTransformer/Encoder[encoder]/__add___0
INFO:nncf:Not adding activation input quantizer for operation: 7 VisionTransformer/Encoder[encoder]/Sequential[layers]/EncoderBlock[encoder_layer_0]/NNCFLayerNorm[ln_1]/layer_norm_0
INFO:nncf:Not adding activation input quantizer for operation: 9 VisionTransformer/Encoder[encoder]/Sequential[layers]/EncoderBlock[encoder_layer_0]/__add___0
INFO:nncf:Not adding activation input quantizer for operation: 10 VisionTransformer/Encoder[enco

In [13]:
from typing import Iterable, Optional, Union, List, Tuple, Any

with torch.inference_mode():
    embeddigs = [quantized_module_a(x) for x, _ in tqdm(valid_dataloader)]


class IterDataset(torch.utils.data.Dataset):
    def __init__(self, source: Union[List, Tuple], labels: Optional[Union[List, Tuple]] = None):
        self.source = source
        self.labels = labels
    def __len__(self) -> int:
        return len(self.source)
    def __getitem__(self, index: int) -> Tuple[Any, Any]:
        return self.source[index], index if self.labels is None else self.labels[index]

embeddigs_dataset = IterDataset(embeddigs)
embeddigs_loader = torch.utils.data.DataLoader(embeddigs_dataset, num_workers=num_workers, batch_size=batch_size, shuffle=False)

  0%|          | 0/3925 [00:00<?, ?it/s]

In [14]:
with torch.inference_mode():
    calibration_dataset_b = nncf.Dataset(embeddigs_loader, transform_fn)
    quantized_module_b = nncf.quantize(module_b, calibration_dataset_b, subset_size=1024)

INFO:nncf:Collecting tensor statistics |█               | 113 / 1024
INFO:nncf:Collecting tensor statistics |███             | 226 / 1024
INFO:nncf:Collecting tensor statistics |█████           | 339 / 1024
INFO:nncf:Collecting tensor statistics |███████         | 452 / 1024
INFO:nncf:Collecting tensor statistics |████████        | 565 / 1024
INFO:nncf:Collecting tensor statistics |██████████      | 678 / 1024
INFO:nncf:Collecting tensor statistics |████████████    | 791 / 1024
INFO:nncf:Collecting tensor statistics |██████████████  | 904 / 1024
INFO:nncf:Collecting tensor statistics |███████████████ | 1017 / 1024
INFO:nncf:Collecting tensor statistics |████████████████| 1024 / 1024
INFO:nncf:BatchNorm statistics adaptation |█               | 113 / 1024
INFO:nncf:BatchNorm statistics adaptation |███             | 226 / 1024
INFO:nncf:BatchNorm statistics adaptation |█████           | 339 / 1024
INFO:nncf:BatchNorm statistics adaptation |███████         | 452 / 1024
INFO:nncf:BatchNorm 

In [17]:
import openvino as ov
# from openvino.tools.mo import convert_model


In [28]:
vit_a_int8_path = '../onnx/vit_a_int8.onnx'
vit_b_int8_path = '../onnx/vit_b_int8.onnx'

In [30]:
# ov.save_model(quantized_module_a, vit_a_int8_path, compress_to_fp16=False)
# ov.save_model(quantized_module_b, vit_b_int8_path, compress_to_fp16=False)

input_f32 = validset[0][0][None]
with torch.inference_mode():
    torch.onnx.export(quantized_module_a, input_f32, '../onnx/q_vitb16_a.onnx')
    input_f32 = torch.rand_like(embeddigs[0])
    torch.onnx.export(quantized_module_b, input_f32, '../onnx/q_vitb16_b.onnx')

int8_a = ov.compile_model(quantized_module_a)
int8_b = ov.compile_model(quantized_module_b)

In [33]:
from sklearn.metrics import accuracy_score
gt = []
pred = []
# embeddings = []
Y = []
with torch.inference_mode():
    for x, y in tqdm(valid_dataloader):
        emb = quantized_module_a(x)
        # embeddings.append(emb)
        y_hat = quantized_module_b(emb)
        Y.append(y_hat)
        gt.append(y)
        pred.append(y_hat.argmax(-1))
    gt = torch.cat(gt).ravel().numpy()
    pred = torch.cat(pred).ravel().numpy()

  0%|          | 0/3925 [00:00<?, ?it/s]

In [None]:
nbytes(model), nbytes(quantized_module_a)

(330.2294006347656, 327.47565841674805)

In [None]:
accuracy_score(gt, pred)

0.8698089171974522

In [None]:
gc.collect()

20203