# TensorRT Conversion

### Check tensorrt version: has to be HIGHER than ver 8.0

In [1]:
import tensorrt as trt
trt.__version__

'8.0.1.6'

### Do Constant Folding with Polygraphy (onnx-surgeon)

In [None]:
!polygraphy surgeon sanitize --fold-constants "weights/yolopv2.onnx" -o "weights/yolopv2_folded.onnx"

### Load Libraries

In [2]:
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import ImageDraw

import sys, os
sys.path.append("../patrasche/tensorrt/")
import samples.python.common as common

print(sys.path)

['/home/ubuntu/Desktop/workspace/patrasche_tensorrt', '/usr/bin', '/usr/lib/python36.zip', '/usr/lib/python3.6', '/usr/lib/python3.6/lib-dynload', '', '/home/ubuntu/.local/lib/python3.6/site-packages', '/usr/local/lib/python3.6/dist-packages', '/usr/local/lib/python3.6/dist-packages/pycuda-2022.1-py3.6-linux-aarch64.egg', '/usr/local/lib/python3.6/dist-packages/appdirs-1.4.4-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/pytools-2022.1.12-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/typing_extensions-4.2.0-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/platformdirs-2.5.2-py3.6.egg', '/home/ubuntu/Desktop/workspace/patrasche/deep-person-reid', '/usr/local/lib/python3.6/dist-packages/imageio-2.19.3-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/isort-4.3.21-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/yapf-0.32.0-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/flake8-4.0.1-py3.6.egg', '/usr/local/lib/python3.6/dist-packages/yacs-0.1.8-py3.6.egg', '/usr/lo

### Load Engine

In [3]:
TRT_LOGGER = trt.Logger()

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, builder.create_builder_config() as config, trt.OnnxParser(network, TRT_LOGGER) as parser, trt.Runtime(TRT_LOGGER) as runtime:
            config.max_workspace_size = 1 << 28 # 256MiB
            config.set_flag(trt.BuilderFlag.FP16)
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print ('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print (parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            network.get_input(0).shape = [1, 3, 384, 640]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            
            plan = builder.build_serialized_network(network, config)
            # engine = builder.build_cuda_engine(network)
            
            engine = runtime.deserialize_cuda_engine(plan)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(plan)
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        
        trt.init_libnvinfer_plugins(TRT_LOGGER, "")
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

### Convert Onnx to TensorRT if not converted yet

In [4]:
onnx_file_path = 'weights/yolopv2_folded.onnx'
engine_file_path = 'weights/yolopv2.trt'

engine = get_engine(onnx_file_path, engine_file_path) 

Loading ONNX file from path weights/yolop_folded.onnx...
Beginning ONNX file parsing
Completed parsing of ONNX file
Building an engine from file weights/yolop_folded.onnx; this may take a while...
Completed creating Engine


### Run Inference

In [5]:
# Create dummy input
dummy_input = np.zeros((1,3, 384, 640))
dummy_input = np.expand_dims(dummy_input, axis=0)
dummy_input = dummy_input.astype(np.float32)

# Load TensorRT Model
engine = get_engine(onnx_file_path, engine_file_path) 

# Prepare Context
context = engine.create_execution_context()
trt_outputs = []
inputs, outputs, bindings, stream = common.allocate_buffers(engine)

In [6]:
# Define Input
inputs[0].host = dummy_input

# Run Inference
trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
# 0: det_out (1,384,640)
# 1: drive_area_seg (46035, 4)
# 2: lane_line_seg (46035, 1)


In [7]:
for i in range(len(trt_outputs)):
    print(trt_outputs[i].shape)
# trt_outputs[3].shape

(1163520,)
(290880,)
(72720,)
(1527120,)
(491520,)
(491520,)


### Get Segmentation Result:

In [None]:
seg_raw = trt_outputs[0]
seg_result = np.reshape(seg_raw,(1,384,640))
seg_result