In [3]:
import os
import PIL.Image as Image
import numpy as np
import time
from onnx2trt import get_engine, allocate_buffers, do_inference

In [16]:
model_name = "resnet18"
onnx_path = os.path.join("/work", 
                          "garin0115", 
                          "models", 
                          model_name+"_256x832", 
                          "models", 
                          "weights_19", 
                          model_name+".onnx")

engine = get_engine(fp16_mode=True, onnx_file_path=onnx_path, engine_file_path=model_name+".plan", save_engine=True)

# Create the context for this engine
context = engine.create_execution_context()

# Allocate buffers for input and output
inputs, outputs, bindings, stream = allocate_buffers(engine) # input, output: host # bindings


# Load data to the buffer
image_path = "assets/test_image.jpg"
input_image = Image.open(image_path).convert('RGB').resize((832, 256), Image.LANCZOS)
input_image = np.array(input_image).transpose((2, 0, 1)).astype(np.float16) / 255.
input_image = np.expand_dims(input_image, axis=0)
inputs[0].host = input_image.reshape(-1)

# inputs[1].host = ... for multiple input
t1 = time.time()
for i in range(10):
    trt_outputs = do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream) # numpy data
t2 = time.time()


print("FPS: {}".format(10/(t2-t1)), 'TensorRT ok')

[info] Reading engine from file resnet18.plan
638976 <class 'numpy.float32'>
3328 <class 'numpy.float32'>
13312 <class 'numpy.float32'>
53248 <class 'numpy.float32'>
212992 <class 'numpy.float32'>
FPS: 366.7054853206036 TensorRT ok
