# Introduction

This tutorial demonstrates how to perform post training quantization (PTQ) on a [Resnet50](https://github.com/onnx/models/raw/main/vision/classification/resnet/) model.

## Prerequisite

### 1. Install packages

In [None]:
!pip install neural-compressor onnx onnxruntime

### 2. Prepare Model

In [None]:
!wget https://github.com/onnx/models/raw/main/vision/classification/resnet/model/resnet50-v1-12.onnx

### 3. Prepare Dataset

Get granted and download ILSVRC2012 dataset from https://image-net.org/signup.php

Download label

In [None]:
!wget http://dl.caffe.berkeleyvision.org/caffe_ilsvrc12.tar.gz
!tar -xvzf caffe_ilsvrc12.tar.gz val.txt

# Run

In [17]:
# dataloader

import re
import os
import cv2
import collections
from PIL import Image
import numpy as np

def collate(batch):
    """Puts each data field into a pd frame with outer dimension batch size"""
    elem = batch[0]
    if isinstance(elem, collections.abc.Mapping):
        return {key: collate([d[key] for d in batch]) for key in elem}
    elif isinstance(elem, collections.abc.Sequence):
        batch = zip(*batch)
        return [collate(samples) for samples in batch]
    elif isinstance(elem, np.ndarray):
        try:
            return np.stack(batch)
        except:
            return batch
    else:
        return batch

class Dataloader:
    def __init__(self, dataset_location, image_list, batch_size):
        self.batch_size = batch_size
        self.image_list = []
        self.label_list = []
        with open(image_list, 'r') as f:
            for s in f:
                image_name, label = re.split(r"\s+", s.strip())
                src = os.path.join(dataset_location, image_name)
                if not os.path.exists(src):
                    continue

                self.image_list.append(src)
                self.label_list.append(int(label))

    def __iter__(self):
        return self._generate_dataloader()

    def _generate_dataloader(self):
        sampler = iter(range(0, len(self.image_list), 1))
        
        def batch_sampler():
            batch = []
            for idx in sampler:
                batch.append(idx)
                if len(batch) == self.batch_size:
                    yield batch
                    batch = []
            if len(batch) > 0:
                yield batch

        def fetcher(ids):
            data = [self._preprpcess(self.image_list[idx]) for idx in ids]
            label = [self.label_list[idx] for idx in ids]
            return collate(data), label

        for batched_indices in batch_sampler():
            try:
                data = fetcher(batched_indices)
                yield data
            except StopIteration:
                return
    
    def _preprpcess(self, src):
        with Image.open(src) as image:
            image = np.array(image.convert('RGB'), dtype='float32')
            image /= 255.

            # resize
            image = cv2.resize(image, (256, 256))
            if len(image.shape) == 2:
                image = np.expand_dims(image, -1)
            
            # center crop
            h, w = image.shape[0], image.shape[1]
            if h < 224 or w < 224:
                raise ValueError(
                    "Required crop size (224, 224) is larger then input image size {}".format((h, w)))
            elif h > 224 or w > 224:
                y0 = (h - 224) // 2
                x0 = (w - 224) // 2
                image = image[y0:y0 + 224, x0:x0 + 224, :]

            # normalize
            image = (image - [0.485, 0.456, 0.406]) / [0.229, 0.224, 0.225]

            # transpose
            image = image.transpose((2, 0, 1))
            return image.astype('float32')

In [11]:
# metric

from sklearn.metrics import accuracy_score

def _topk_shape_validate(preds, labels):
    # preds shape can be Nxclass_num or class_num(N=1 by default)
    preds = np.array(preds)
    labels = np.array(labels)
    labels = labels.reshape((labels.shape[0], 1))

    N = preds.shape[0]
    preds = preds.reshape([N, -1])
    class_num = preds.shape[1]

    label_N = labels.shape[0]
    assert label_N == N, 'labels batch size should same with preds'
    labels = labels.reshape([N, -1])
    # one-hot labels will have 2 dimension not equal 1
    if labels.shape[1] != 1:
        labels = labels.argsort()[..., -1:]
    return preds, labels

class TopK:
    def __init__(self, k=1):
        self.k = k
        self.num_correct = 0
        self.num_sample = 0

    def update(self, preds, labels):
        preds, labels = _topk_shape_validate(preds, labels)
        preds = preds.argsort()[..., -self.k:]
        if self.k == 1:
            correct = accuracy_score(preds, labels, normalize=False)
            self.num_correct += correct
        else:
            for p, l in zip(preds, labels):
                # get top-k labels with np.argpartition
                # p = np.argpartition(p, -self.k)[-self.k:]
                l = l.astype('int32')
                if l in p:
                    self.num_correct += 1
        self.num_sample += len(labels)

    def reset(self):
        self.num_correct = 0
        self.num_sample = 0

    def result(self):
        if self.num_sample == 0:
            print("Sample num during evaluation is 0.")
            return 0
        return self.num_correct / self.num_sample

In [18]:
# evaluation function

import onnxruntime as ort

def eval_func(model, dataloader, metric):
    metric.reset()
    sess = ort.InferenceSession(model.SerializeToString(), providers=ort.get_available_providers())
    input_names = [i.name for i in sess.get_inputs()]
    for input_data, label in dataloader:
        output = sess.run(None, dict(zip(input_names, [input_data])))
        metric.update(output, label)
    return metric.result()

In [22]:
# launcher code

import onnx
from neural_compressor import quantization, PostTrainingQuantConfig

model = onnx.load('./resnet50-v1-12.onnx')
data_path = '/path/to/ILSVRC2012_img_val'
label_path = '/path/to/val.txt'
batch_size = 1
dataloader = Dataloader(data_path, label_path, batch_size)
top1 = TopK()

def eval(onnx_model):
    return eval_func(onnx_model, dataloader, top1)

config = PostTrainingQuantConfig()
q_model = quantization.fit(model, config, calib_dataloader=dataloader,
	     eval_func=eval)
q_model.save('./resnet50-int8.onnx')

2023-02-14 20:12:47 [INFO] Get FP32 model baseline.
2023-02-14 20:12:48 [INFO] Save tuning history to /home/mengniwa/notebook/nc_workspace/2023-02-14_19-55-18/./history.snapshot.
2023-02-14 20:12:48 [INFO] FP32 baseline is: [Accuracy: 0.7250, Duration (seconds): 1.0148]
2023-02-14 20:13:05 [INFO] |******Mixed Precision Statistics******|
2023-02-14 20:13:05 [INFO] +---------------------+-------+--------+
2023-02-14 20:13:05 [INFO] |       Op Type       | Total |  INT8  |
2023-02-14 20:13:05 [INFO] +---------------------+-------+--------+
2023-02-14 20:13:05 [INFO] |         Conv        |   53  |   53   |
2023-02-14 20:13:05 [INFO] |        MatMul       |   1   |   1    |
2023-02-14 20:13:05 [INFO] |       MaxPool       |   1   |   1    |
2023-02-14 20:13:05 [INFO] |  GlobalAveragePool  |   1   |   1    |
2023-02-14 20:13:05 [INFO] |         Add         |   17  |   17   |
2023-02-14 20:13:05 [INFO] |    QuantizeLinear   |   2   |   2    |
2023-02-14 20:13:05 [INFO] |   DequantizeLinear  