In [2]:
import os
import openvino as ov
import torchvision.transforms as T
import numpy as np
import cv2 


###  <span style="color: orange;">**PyTorch**</span> -> <span style="color: #555555;">**ONNX**</span>


In [5]:
!python deploy/export_onnx.py -c rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml -r deploy/models/torchmodels/rtdetrv2_r18vd_120e_coco_rerun_48.1.pth --output_file deploy/models/onnxmodels/model.onnx --check

# !python rtdetrv2_pytorch/tools/export_onnx.py -c path/to/rtdetrv2_xxx_xxx_coco.yml -r path/to/last.pth --output_file deploy/models/onnxmodels/model.onnx --check

Loading.... rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/../dataset/coco_detection_custom.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/../runtime.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/dataloader.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/optimizer.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/rtdetrv2_r50vd.yml
  checkpoint = torch.load(args.resume, map_location='cpu')
Load PResNet18 state_dict
  if reference_points.shape[-1] == 2:
  elif reference_points.shape[-1] == 4:
Check export onnx model done...


### <span style="color: #555555;">**ONNX**</span> --> <span style="color:darkblue">**OpenVINO**</span>   (FP32)


In [6]:

!mo --input_model deploy/models/onnxmodels/model.onnx --output_dir deploy/models/openvinomodels


Check for a new version of Intel(R) Distribution of OpenVINO(TM) toolkit here https://software.intel.com/content/www/us/en/develop/tools/openvino-toolkit/download.html?cid=other&source=prod&campid=ww_2023_bu_IOTG_OpenVINO-2022-3&content=upg_all&medium=organic or on https://github.com/openvinotoolkit/openvino
[ INFO ] The model was converted to IR v11, the latest model format that corresponds to the source DL framework input/output format. While IR v11 is backwards compatible with OpenVINO Inference Engine API v1.0, please use API v2.0 (as of 2022.1) to take advantage of the latest improvements in IR v11.
Find more information about API v2.0 and IR v11 at https://docs.openvino.ai/latest/openvino_2_0_transition_guide.html
[ SUCCESS ] Generated IR version 11 model.
[ SUCCESS ] XML file: /home/dhavalsinh/Desktop/Object_Det_n_Seg/RT-DETRv2/deploy/models/openvinomodels/model.xml
[ SUCCESS ] BIN file: /home/dhavalsinh/Desktop/Object_Det_n_Seg/RT-DETRv2/deploy/models/openvinomodels/model.bin


### INT8 <span style="color:darkblue">**OpenVINO**</span> Quantization

In [None]:
#installation 
# !pip install git+https://github.com/openvinotoolkit/nncf.git#egg=nncf

In [2]:
import nncf 
from rtdetrv2_pytorch.src.core import YAMLConfig
from rtdetrv2_pytorch.src.misc import dist_utils


config_path = "rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml"
openvino_input_model = "deploy/models/openvinomodels/model.xml"
openvino_output_model = "deploy/models/openvinomodels/model_int8.xml"
update_dict = {
    'seed': 0, 'use_amp': True, 'test_only': False, 'print_method': 'builtin', 'print_rank': 0
}


INFO:nncf:NNCF initialized successfully. Supported frameworks detected: torch, onnx, openvino


In [3]:
cfg = YAMLConfig(config_path, **update_dict)
val_dataloader = dist_utils.warp_loader(cfg.val_dataloader, \
            shuffle=cfg.val_dataloader.shuffle)

model = ov.Core().read_model(openvino_input_model)
compiled_model = ov.Core().compile_model(model, 'CPU')
input_ir = model.input(0)
N, C, H, W = input_ir.partial_shape
W = W.get_length()
H = H.get_length()

transforms = T.Compose([
            T.ToPILImage(),
            T.Resize((W, H)),
            T.ToTensor(),
        ])
def prepare_input_tensor(image: np.ndarray):

    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = transforms(image)[None]
    return image

def transform_fn(data_item):
    """
    Quantization transform function. Extracts and preprocess input data from dataloader item for quantization.
    Parameters:
       data_item: Tuple with data item produced by DataLoader during iteration
    Returns:
        input_tensor: Input data for quantization
    """
    img = np.asarray(data_item[0]).astype(np.uint8)
    input_tensor = prepare_input_tensor(img)
    return input_tensor


quantization_dataset = nncf.Dataset(val_dataloader, transform_fn)

from openvino.runtime import serialize
quantized_model = nncf.quantize(model, quantization_dataset, preset=nncf.QuantizationPreset.MIXED)
serialize(quantized_model, openvino_output_model)

Loading.... rtdetrv2_pytorch/configs/rtdetrv2/rtdetrv2_r18vd_120e_coco.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/../dataset/coco_detection_custom.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/../runtime.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/dataloader.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/optimizer.yml
Loading.... rtdetrv2_pytorch/configs/rtdetrv2/./include/rtdetrv2_r50vd.yml
building val_dataloader with batch_size=32...
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


Output()

Output()

### <span style="color: #555555;">**ONNX**</span> -> <span style="color: #009B77;">**TRT**</span> (FP16)

In [None]:
#sudo apt install nvidia-cudnn
#install tensortrt 8.6.1

In [9]:
import tensorrt as trt

onnx_model = "deploy/models/onnxmodels/model.onnx"
trt_output_model = "deploy/models/tensorrtmodels/model.trt"

In [10]:
folder_path = os.path.dirname(trt_output_model)
# Create the directory if it doesn't exist
if not os.path.exists(folder_path):
    os.makedirs(folder_path)

verbose=False
t_dtype = trt.DataType.HALF
network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()

with trt.Builder(TRT_LOGGER) as builder, builder.create_network(flags=network_flags) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
    with open(onnx_model, 'rb') as model:
        if not parser.parse(model.read()):
            print('ERROR: ONNX Parse Failed')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
    print('Building an engine.  This would take a while...')
    print('(Use "--verbose" or "-v" to enable verbose logging.)')
    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 2 << 30)  # 1GB
    profile = builder.create_optimization_profile()
    input_name = network.get_input(0).name
    profile.set_shape(input_name, (1, 3, 640, 640), (1, 3, 640, 640), (16, 3, 640, 640))
    config.add_optimization_profile(profile)

    # config.max_workspace_size = 2 << 30
    if t_dtype == trt.DataType.HALF:
        config.flags |= 1 << int(trt.BuilderFlag.FP16)
    
    engine = builder.build_engine(network, config)
    

    with open(trt_output_model, 'wb') as f:
        f.write(engine.serialize())

[09/06/2024-15:02:49] [TRT] [W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[09/06/2024-15:02:49] [TRT] [W] onnx2trt_utils.cpp:400: One or more weights outside the range of INT32 was clamped
Building an engine.  This would take a while...
(Use "--verbose" or "-v" to enable verbose logging.)
[09/06/2024-15:02:49] [TRT] [W] Detected layernorm nodes in FP16: /model/encoder/encoder.0/layers.0/norm1/Sub, /model/encoder/encoder.0/layers.0/norm1/Pow, /model/encoder/encoder.0/layers.0/norm1/ReduceMean_1, /model/encoder/encoder.0/layers.0/norm1/Add, /model/encoder/encoder.0/layers.0/norm1/Sqrt, /model/encoder/encoder.0/layers.0/norm1/Div, /model/encoder/encoder.0/layers.0/norm1/Mul, /model/encoder/encoder.0/layers.0/norm1/Add_1, /model/encoder/encoder.0/layers.0/norm2/Sub, /model/encoder/encoder.0/layers.0/norm2/Pow, /model/encoder/encoder.0/layers.0/norm2/ReduceMean_1, /model/en

  engine = builder.build_engine(network, config)


[09/06/2024-15:05:31] [TRT] [W] TensorRT encountered issues when converting weights between types and that could affect accuracy.
[09/06/2024-15:05:31] [TRT] [W] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights.
[09/06/2024-15:05:31] [TRT] [W] Check verbose logs for the list of affected weights.
[09/06/2024-15:05:31] [TRT] [W] - 1 weights are affected by this issue: Detected FP32 infinity values and converted them to corresponding FP16 infinity.
[09/06/2024-15:05:31] [TRT] [W] - 140 weights are affected by this issue: Detected subnormal FP16 values.
[09/06/2024-15:05:31] [TRT] [W] - 17 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value.
[09/06/2024-15:05:31] [TRT] [W] - 4 weights are affected by this issue: Detected finite FP32 values which would overflow in FP16 and converted them to the closest

### <span style="color: #555555;">**ONNX**</span> -> <span style="color: #009B77;">**TRT**</span> (INT8)

In [5]:
import os
import tensorrt as trt
from deploy.calibrator import DataLoader

onnx_model = "deploy/models/onnxmodels/model.onnx"
trt_int8_output_model = "deploy/models/tensorrtmodels/model_int8.trt"
train_path = "path/to/train/images/folder"
model_input_resolution = 640

In [6]:

folder_path = os.path.dirname(trt_int8_output_model)
# Create the directory if it doesn't exist
if not os.path.exists(folder_path):
    os.makedirs(folder_path)


verbose=False
calib_loader = DataLoader(32, 6, train_path,
                                  model_input_resolution, model_input_resolution)
int8_calib = True
calib_cache = None
t_dtype = trt.DataType.INT8
network_flags = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
if t_dtype == trt.DataType.INT8:
    network_flags = network_flags | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION))
TRT_LOGGER = trt.Logger(trt.Logger.VERBOSE) if verbose else trt.Logger()

with trt.Builder(TRT_LOGGER) as builder, builder.create_network(flags=network_flags) as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
    with open(onnx_model, 'rb') as model:
        if not parser.parse(model.read()):
            print('ERROR: ONNX Parse Failed')
            for error in range(parser.num_errors):
                print(parser.get_error(error))
    print('Building an engine.  This would take a while...')
    print('(Use "--verbose" or "-v" to enable verbose logging.)')
    config = builder.create_builder_config()
    config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 2 << 30)  # 1GB
    profile = builder.create_optimization_profile()
    input_name = network.get_input(0).name
    profile.set_shape(input_name, (1, 3, 640, 640), (1, 3, 640, 640), (16, 3, 640, 640))
    config.add_optimization_profile(profile)

    # config.max_workspace_size = 2 << 30
    if t_dtype == trt.DataType.HALF:
        config.flags |= 1 << int(trt.BuilderFlag.FP16)
    if t_dtype == trt.DataType.INT8:
        print('trt.DataType.INT8')
        config.flags |= 1 << int(trt.BuilderFlag.INT8)
        config.flags |= 1 << int(trt.BuilderFlag.FP16)

        if int8_calib:
            from deploy.calibrator import Calibrator
            config.int8_calibrator = Calibrator(calib_loader, calib_cache)
            print('Int8 calibation is enabled.')
    
    engine = builder.build_engine(network, config)
    # print(engine)

    with open(trt_int8_output_model, 'wb') as f:
        f.write(engine.serialize())

found all 811 images to calib.
[09/06/2024-15:14:05] [TRT] [W] The NetworkDefinitionCreationFlag::kEXPLICIT_PRECISION flag has been deprecated and has no effect. Please do not use this flag when creating the network.
[09/06/2024-15:14:05] [TRT] [W] onnx2trt_utils.cpp:374: Your ONNX model has been generated with INT64 weights, while TensorRT does not natively support INT64. Attempting to cast down to INT32.
[09/06/2024-15:14:05] [TRT] [W] onnx2trt_utils.cpp:400: One or more weights outside the range of INT32 was clamped
Building an engine.  This would take a while...
(Use "--verbose" or "-v" to enable verbose logging.)
trt.DataType.INT8
Int8 calibation is enabled.
[09/06/2024-15:14:06] [TRT] [W] Calibration Profile is not defined. Calibrating with Profile 0


  engine = builder.build_engine(network, config)
[ERROR] Exception caught in read_calibration_cache(): TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

At:
  <frozen genericpath>(20): exists
  /home/dhavalsinh/Desktop/Object_Det_n_Seg/RT-DETRv2/deploy/calibrator.py(52): read_calibration_cache
  /tmp/ipykernel_12289/174181261.py(46): <module>
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3505): run_code
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3445): run_ast_nodes
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3266): run_cell_async
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py(129): _pseudo_sync_runner
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3061): _run_cell
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/

######################
['images']
######################
######################
['images']
######################
######################
['images']
######################
######################
['images']
######################
######################
['images']
######################
######################
['images']
######################
######################
['images']
######################
[09/06/2024-15:14:16] [TRT] [W] Missing scale and zero-point for tensor (Unnamed Layer* 138) [Constant]_output, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[09/06/2024-15:14:16] [TRT] [W] Missing scale and zero-point for tensor /model/encoder/encoder.0/layers.0/self_attn/Softmax_output_0, expect fall back to non-int8 implementation for any layer consuming or producing given tensor
[09/06/2024-15:14:16] [TRT] [W] Missing scale and zero-point for tensor (Unnamed Layer* 170) [Constant]_output, expect fall back to non-int8 implementation for any lay

[ERROR] Exception caught in read_calibration_cache(): TypeError: stat: path should be string, bytes, os.PathLike or integer, not NoneType

At:
  <frozen genericpath>(20): exists
  /home/dhavalsinh/Desktop/Object_Det_n_Seg/RT-DETRv2/deploy/calibrator.py(52): read_calibration_cache
  /tmp/ipykernel_12289/174181261.py(46): <module>
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3505): run_code
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3445): run_ast_nodes
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3266): run_cell_async
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/async_helpers.py(129): _pseudo_sync_runner
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3061): _run_cell
  /home/dhavalsinh/anaconda3/lib/python3.11/site-packages/IPython/core/interactiveshell.py(3006): run_cell
  /home/

[09/06/2024-15:23:00] [TRT] [W] TensorRT encountered issues when converting weights between types and that could affect accuracy.
[09/06/2024-15:23:00] [TRT] [W] If this is not the desired behavior, please modify the weights or retrain with regularization to adjust the magnitude of the weights.
[09/06/2024-15:23:00] [TRT] [W] Check verbose logs for the list of affected weights.
[09/06/2024-15:23:00] [TRT] [W] - 1 weights are affected by this issue: Detected FP32 infinity values and converted them to corresponding FP16 infinity.
[09/06/2024-15:23:00] [TRT] [W] - 140 weights are affected by this issue: Detected subnormal FP16 values.
[09/06/2024-15:23:00] [TRT] [W] - 17 weights are affected by this issue: Detected values less than smallest positive FP16 subnormal value and converted them to the FP16 minimum subnormalized value.
[09/06/2024-15:23:00] [TRT] [W] - 4 weights are affected by this issue: Detected finite FP32 values which would overflow in FP16 and converted them to the closest