### Load torchreid in PyTorch

In [None]:
import numpy as np
import torch
import torchvision.transforms as T
from PIL import Image

from torchreid.utils import (
    check_isfile, load_pretrained_weights, compute_model_complexity
)
from torchreid.models import build_model

model_name = "osnet_x0_25"
model_path = "/home/ubuntu/workspace/ywshin/construct/Yolov5_StrongSORT_OSNet/weights/osnet_x0_25_msmt17.pt"
device = "cuda:0"

model = build_model(
            model_name,
            num_classes=1,
            pretrained=not (model_path and check_isfile(model_path)),
            use_gpu=device.startswith('cuda')
        )

if model_path and check_isfile(model_path):
    load_pretrained_weights(model, model_path)

model.eval()
model.to(device)

### Check Torch Model

In [None]:
import numpy as np
with torch.no_grad():
    rslt1 = model(dummy_input)

### Convert to ONNX Model

In [None]:
dummy_input = torch.ones(1, 3, 256, 128).cuda()

torch.onnx.export(model,         # model being run 
         dummy_input,       # model input (or a tuple for multiple inputs) 
         "torchreid.onnx",       # where to save the model  
         export_params=True,  # store the trained parameter weights inside the model file 
         opset_version=11,
         do_constant_folding=False,# the ONNX version to export the model to   # whether to execute constant folding for optimization 
         input_names = ["input"],   # the model's input names 
         output_names = ["features"],
         dynamic_axes = {'input' : {0 : 'batch_size', 2: "height", 3: "width"},    # variable length axes 
                         'features' : {0 : 'batch_size', 2: "height", 3: "width"}})    # variable length axes 

### Check onnx Model

In [None]:
import onnxruntime as ort
import numpy as np

# onnxruntime-gpu version must be compatible to the cuda version
print(ort.__version__)

# Check if ort is using GPU
print(ort.get_device())

# Load onnx model using CUDA
ort_sess = ort.InferenceSession('torchreid.onnx', providers=['CUDAExecutionProvider'])

rslt2 = ort_sess.run(None, {'input': dummy_input.cpu().detach().numpy()})


### TensorRT Conversion

In [None]:
import numpy as np
import tensorrt as trt
import pycuda.driver as cuda
import pycuda.autoinit
from PIL import ImageDraw

# from data_processing import PreprocessYOLO, PostprocessYOLO, ALL_CATEGORIES

import sys, os
sys.path.append("tensorrt/")
import samples.python.common as common

### Set Engine

In [None]:
TRT_LOGGER = trt.Logger()

def get_engine(onnx_file_path, engine_file_path=""):
    """Attempts to load a serialized engine if available, otherwise builds a new TensorRT engine and saves it."""
    def build_engine():
        """Takes an ONNX file and creates a TensorRT engine to run inference with"""
        with trt.Builder(TRT_LOGGER) as builder, builder.create_network(common.EXPLICIT_BATCH) as network, builder.create_builder_config() as config, trt.OnnxParser(network, TRT_LOGGER) as parser, trt.Runtime(TRT_LOGGER) as runtime:
            config.max_workspace_size = 1 << 28 # 256MiB
            # config.set_flag(trt.BuilderFlag.FP16)
            builder.max_batch_size = 1
            # Parse model file
            if not os.path.exists(onnx_file_path):
                print('ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'.format(onnx_file_path))
                exit(0)
            print('Loading ONNX file from path {}...'.format(onnx_file_path))
            with open(onnx_file_path, 'rb') as model:
                print('Beginning ONNX file parsing')
                if not parser.parse(model.read()):
                    print ('ERROR: Failed to parse the ONNX file.')
                    for error in range(parser.num_errors):
                        print (parser.get_error(error))
                    return None
            # The actual yolov3.onnx is generated with batch size 64. Reshape input to batch size 1
            network.get_input(0).shape = [1, 3, 256, 128]
            print('Completed parsing of ONNX file')
            print('Building an engine from file {}; this may take a while...'.format(onnx_file_path))
            
            plan = builder.build_serialized_network(network, config)
            # engine = builder.build_cuda_engine(network)
            
            engine = runtime.deserialize_cuda_engine(plan)
            print("Completed creating Engine")
            with open(engine_file_path, "wb") as f:
                f.write(plan)
            return engine

    if os.path.exists(engine_file_path):
        # If a serialized engine exists, use it instead of building an engine.
        print("Reading engine from file {}".format(engine_file_path))
        with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
            return runtime.deserialize_cuda_engine(f.read())
    else:
        return build_engine()

### Generate trt file

In [None]:
onnx_file_path = 'torchreid.onnx'
engine_file_path = 'torchreid_256X128_bs_16.trt'

get_engine(onnx_file_path, engine_file_path) 

### Simple Demo

In [None]:
engine_file_path = 'torchreid_256X128_bs_16.trt'
with open(engine_file_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())
    
context = engine.create_execution_context()
# trt_outputs = []
inputs, outputs, bindings, stream = common.allocate_buffers(engine)

inputs[0].host = torch.randn(1,3,256,128).numpy().astype(np.float32)
trt_outputs = common.do_inference_v2(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)