In [1]:
!pwd
!ls

/workspace
arch.json  docker_run.sh  dpu.xclbin		PROMPT.txt   Quantize.ipynb
build	   dpu.bit	  local_utils.py	__pycache__  weights.pth
data	   dpu.hwh	  notebook_train.ipynb	quant_dir


In [2]:
import torch
import torch.nn as nn
import local_utils
device = torch.device('cpu')

6. Instantiate MiniResNet.

Apply evaluation mode (method `.eval()`) to prevent of batch normalization layers parameters changes.

Load state dict with mapping location to cpu.

In [3]:
# Create your model
# apply eval() method
net = ...


<All keys matched successfully>

7. Instantiate train and test loaders with batch size = 1.

Extract 5% of training data (iterate in for loop to get a random samples).

Collect data and labels as lists.

Concatenate both lists (separately).

Initialize LoaderWrapper with results of concatenation and batch size = 1.

In [4]:
# concatenation: torch.cat()


class LoaderWrapper:
    def __init__(self, data, labels, batch_size=1):
        self.batch_size = batch_size
        self.data = data
        self.labels = labels
    
    def __getitem__(self,index):
        if index >= len(self):
            raise StopIteration()

        beg = index*self.batch_size
        end = beg+self.batch_size
        return self.data[beg:end], self.labels[beg:end]

    def __len__(self):
        return len(self.data) // self.batch_size


train_loader = ...          
test_loader = ...

quantizaton_data = []      
quantizaton_labels = []      

...
quantization_loader = ...

del train_loader


8. Instantiate accuracy metric.

In [1]:
metric = ...

In [5]:
def evaluate(model,
             dataloader,
             evaluator
             ):
    """
    :param model: torch.nn.Model
    :param dataloader: data generator / loader
    :param evaluator: fcn/obj like: fcn(y_pred, y_ref) -> float 
    """
    tm = local_utils.TimeMeasurement("Evaluation", len(dataloader))
    with torch.no_grad(), tm:
        score = 0.0
        cntr = 0
        for i, XY in enumerate(dataloader):
            X = XY[0]
            Y = XY[1:]
            y_pred = model(X)
            score = score*cntr + X.shape[0]*evaluator(y_pred, *Y)
            cntr += X.shape[0]
            score /= cntr
            print("\rEvaluation {}/{}. Score = {}".format(i,len(dataloader), score),end='')
        
        print("\rEvaluation {}/{}. Score = {}".format(len(dataloader),len(dataloader), score),end='\n')
    print(tm)


def quantize(float_model:torch.nn.Module, 
             input_shape:tuple,
             quant_dir:str, 
             quant_mode:str, 
             device:torch.device,
             dataloader,
             evaluator):
    """
    :param float_model: float model with loaded weights
    :param input_shape: shape of input(CH,W,H)
    :param quant_dir: path to directory with quantized model components
    :param quant_mode: quant_mode in ['calib', 'test'] 
    :param data_loader: data_loader - for 'calib' must be batch_size == 1
    :param evaluator: fcn/obj like: fcn(y_pred, y_ref) -> float 
    """
    tm = local_utils.TimeMeasurement("Quantization", len(dataloader))
    with tm:
        # available in docker or after packaging 
        # vitis-AI-tools/..../pytorch../pytorch_nndct
        # and installing the package
        from pytorch_nndct.apis import torch_quantizer, dump_xmodel
        # model to device
        model = float_model.to(device)

        # That was present in vai tutorial.
        # I don't know if it affects to anything?
        # Force to merge BN with CONV for better quantization accuracy
        optimize = 1

        rand_in = torch.randn(input_shape)
        print("get qunatizer start")
        try:
            quantizer = torch_quantizer(
                quant_mode, model, rand_in, output_dir=quant_dir, device=device)
        except Exception as e:
            print("exception:")
            print(e)
            return
        print("get qunatizer end")

        print("get quantized model start")
        quantized_model = quantizer.quant_model
        print("get quantized model end")

        # evaluate
        print("testing st")
        evaluate(quantized_model, dataloader, evaluator)
        print("testing end")

        # export config
        if quant_mode == 'calib':
            print("export config")
            quantizer.export_quant_config()
            print("export config end")
        # export model
        if quant_mode == 'test':
            print("export xmodel")
            quantizer.export_xmodel(deploy_check=False, output_dir=quant_dir)
            print("export xmodel end")
    print(tm)

''

9. Evaluate network floating-point model with  test loader and quantization loader with accuracy evaluator.

In [6]:
# You can evaluate your floating point model first 
evaluate(...)
evaluate(...)

Evaluation 10000/10000. Score = 0.983299970626831
Execution time: 0:0:28:0, processed 10000 frames, throughput: 357.14285714285717 fps.
Evaluation 3001/3001. Score = 0.9866710901260376
Execution time: 0:0:7:0, processed 3001 frames, throughput: 428.7142857142857 fps.


Vitis AI Quantizer for Post Training Quantization uses two stages.

First is `calib` mode (calibration) - VAI Quantizer parses the model and adjust quantization parameters.

Second is evaluation / test mode - after this step 

(theoretically after check is there is not too much of accuracy loss) 

model is exported in onnx format.


10. Run quantization for network in 'calib' mode with input shape = [1, 1, 28, 28].

Use quantization loader and accuracy metric evaluation. 

In [7]:
# Quantize model - calib - is slow
quantize(
         ...,
         quant_dir='quant_dir', # directory for quantizer results
         )

No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda'

[0;32m[VAIQ_NOTE]: Loading NNDCT kernels...[0m
get qunatizer start

[0;32m[VAIQ_NOTE]: Quantization calibration process start up...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing MiniResNet...[0m

[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quant_dir/MiniResNet.py)[0m
get qunatizer end
get quantized model start

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st
Evaluation 3001/3001. Score = 0.9876707792282104
Execution time: 1:0:27:0, processed 3001 frames, throughput: 34.49425287356322 fps.
testing end
export config

[0;32m[VAIQ_NOTE]: =>Exporting quant config.(quant_dir/quant_info.json)[0m
export config end
Execution time: 1:0:28:0, processed 3001 frames, throughput: 34.10227272727273 fps.


11. Run quantization in `test` mode.

In [8]:
# Quantize model - test - is faster

quantize(
         ...,
         quant_dir='quant_dir',  # directory for quantizer results
         )


get qunatizer start

[0;32m[VAIQ_NOTE]: Quantization test process start up...[0m

[0;32m[VAIQ_NOTE]: =>Quant Module is in 'cpu'.[0m

[0;32m[VAIQ_NOTE]: =>Parsing MiniResNet...[0m

[0;32m[VAIQ_NOTE]: =>Doing weights equalization...[0m

[0;32m[VAIQ_NOTE]: =>Quantizable module is generated.(quant_dir/MiniResNet.py)[0m
get qunatizer end
get quantized model start

[0;32m[VAIQ_NOTE]: =>Get module with quantization.[0m
get quantized model end
testing st
Evaluation 3001/3001. Score = 0.9880039691925049
Execution time: 0:0:17:0, processed 3001 frames, throughput: 176.52941176470588 fps.
testing end
export xmodel

[0;32m[VAIQ_NOTE]: =>Converting to xmodel ...[0m

[0;32m[VAIQ_NOTE]: =>Successfully convert 'MiniResNet' to xmodel.(quant_dir/MiniResNet_int.xmodel)[0m
export xmodel end
Execution time: 0:0:17:0, processed 3001 frames, throughput: 176.52941176470588 fps.


12. Compile the quantized model 

In [9]:
# compile model
# --xmodel quant_dir+'/'+{python class model name}+'_int.xmodel' - the result of quantization
# --arch file dpu fingerprint (denotes DPU architecture and supported operations) - *.json file 
# --net_name name of network  - any name
# --output_dir directory where results will be stored
!vai_c_xir --xmodel ... --arch arch.json --net_name ... --output_dir  build

**************************************************
* VITIS_AI Compilation - Xilinx Inc.
**************************************************
[UNILOG][INFO] Compile mode: dpu
[UNILOG][INFO] Debug mode: function
[UNILOG][INFO] Target architecture: DPUCZDX8G_ISA0_B4096_MAX_BG2
[UNILOG][INFO] Graph name: MiniResNet, with op num: 130
[UNILOG][INFO] Begin to compile...
[UNILOG][INFO] Total device subgraph number 3, DPU subgraph number 1
[UNILOG][INFO] Compile done.
[UNILOG][INFO] The meta json is saved to "/workspace/build/meta.json"
[UNILOG][INFO] The compiled xmodel is saved to "/workspace/build/MiniResnet_VAI.xmodel"
[UNILOG][INFO] The compiled xmodel's md5sum is 600f23936dab1908a0a723a09efdafc7, and has been saved to "/workspace/build/md5sum.txt"


13. Save this file. Close Jupyter server. Exit from Vitis AI docker environment (`exit` command).