In [14]:
# import ctypes
import os
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
import threading
import numpy as np

class HostDeviceMem:
    def __init__(self, host_mem, device_mem):
        self.host = host_mem
        self.device = device_mem

    def __str__(self):
        return "Host:\n" + str(self.host) + "\nDevice:\n" + str(self.device)

    def __repr__(self):
        return self.__str__()


class InferenceBackend:
    # initialize TensorRT
    TRT_LOGGER = trt.Logger(trt.Logger.ERROR)
    trt.init_libnvinfer_plugins(TRT_LOGGER, '')

    def __init__(self, model_path, batch_size):
        self.cuda_ctx = cuda.Device(0).make_context()
        if self.cuda_ctx:
            self.cuda_ctx.push()
        # self.model = model
        self.batch_size = batch_size
        # # load plugin if the model requires one
        # if self.model.PLUGIN_PATH is not None:
        #     try:
        #         ctypes.cdll.LoadLibrary(self.model.PLUGIN_PATH)
        #     except OSError as err:
        #         raise RuntimeError('Plugin not found') from err

        # load trt engine or build one if not found
        if not os.path.exists(model_path):
            # self.engine = self.model.build_engine(InferenceBackend.TRT_LOGGER, self.batch_size)
            raise RuntimeError('Not exist model path')
        else:
            runtime = trt.Runtime(InferenceBackend.TRT_LOGGER)
            with open(model_path, 'rb') as engine_file:
                buf = engine_file.read()
                self.engine = runtime.deserialize_cuda_engine(buf)
        if self.engine is None:
            raise RuntimeError('Unable to load the engine file')
        if self.engine.has_implicit_batch_dimension:
            assert self.batch_size <= self.engine.max_batch_size

        if self.cuda_ctx:
        	self.cuda_ctx.pop()

    def create_new_context(self):
        if self.cuda_ctx:
            self.cuda_ctx.push()
        # allocate buffers
        self.bindings = []
        self.outputs = []
        for binding in self.engine:
            print(f"binding:{binding}")
            shape = self.engine.get_binding_shape(binding)
            print(f"shape:{shape}", f"shape_0:{shape[0]}")
            size = trt.volume(shape)
            if self.engine.has_implicit_batch_dimension:
                size *= self.batch_size
            dtype = trt.nptype(self.engine.get_binding_dtype(binding))
            # allocate host and device buffers
            host_mem = cuda.pagelocked_empty(size, dtype)
            device_mem = cuda.mem_alloc(host_mem.nbytes)
            # append the device buffer to device bindings
            self.bindings.append(int(device_mem))
            if self.engine.binding_is_input(binding):
                if not self.engine.has_implicit_batch_dimension:
                    assert self.batch_size == shape[0]
                self.input = HostDeviceMem(host_mem, device_mem)
                print(f"input:{self.input}", f"input_len:{len(self.input.host)}")
            else:
                self.outputs.append(HostDeviceMem(host_mem, device_mem))
        self.context = self.engine.create_execution_context()
        self.stream = cuda.Stream()
        if self.cuda_ctx:
        	self.cuda_ctx.pop()
        
    def infer(self):
        self.infer_async()
        return self.synchronize()

    def infer_async(self, images):
        self.input.host = [np.ravel(img) for img in images]
        if self.cuda_ctx:
            self.cuda_ctx.push()
            
        for inp in self.input.host:
            cuda.memcpy_htod_async(self.input.device, inp, self.stream)
            
        if self.engine.has_implicit_batch_dimension:
            self.context.execute_async(batch_size=self.batch_size, bindings=self.bindings,
                                       stream_handle=self.stream.handle)
        else:
            self.context.execute_async_v2(bindings=self.bindings, stream_handle=self.stream.handle)
        print(f"outputs: {self.outputs}")
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out.host, out.device, self.stream)
        if self.cuda_ctx:
            self.cuda_ctx.pop()

    def synchronize(self):
        if self.cuda_ctx:
        	self.cuda_ctx.push()
        self.stream.synchronize()
        if self.cuda_ctx:
        	self.cuda_ctx.pop()
        return [out.host for out in self.outputs]

In [None]:
def infer(self, img):
        print(self.inputs[0]['host'], len(self.inputs[0]['host']))
        self.inputs[0]['host'] = np.ravel(img)
        # print(self.inputs, self.inputs[0], self.inputs[0]['host'])
        # transfer data to the gpu (host to device)
        for inp in self.inputs:
            cuda.memcpy_htod_async(inp['device'], inp['host'], self.stream)
        # run inference-single
        self.context.execute_async_v2(
            bindings=self.bindings,
            stream_handle=self.stream.handle)
        # fetch outputs from gpu (device to host)
        for out in self.outputs:
            cuda.memcpy_dtoh_async(out['host'], out['device'], self.stream)
        # synchronize stream
        self.stream.synchronize()

        data = [out['host'] for out in self.outputs]
        return data

In [4]:
import cv2
horse = cv2.imread('inference/images/horse.jpg')
bus = cv2.imread('inference/images/bus.jpg')
batch_2 = [bus, horse]

In [15]:
detector_engine = InferenceBackend(model_path='./yolov7_2-py-fp16.trt', batch_size=2)
detector_engine.create_new_context()
detector_engine.infer_async(images = batch_2)
detector_engine.synchronize()

binding:images
shape:(2, 3, 640, 640) shape_0:2
input:Host:
[0. 0. 0. ... 0. 0. 0.]
Device:
<pycuda._driver.DeviceAllocation object at 0x7fd353c7dee0> input_len:2457600
binding:num_dets
shape:(2, 1) shape_0:2
binding:det_boxes
shape:(2, 100, 4) shape_0:2
binding:det_scores
shape:(2, 100) shape_0:2
binding:det_classes
shape:(2, 100) shape_0:2
outputs: [Host:
[0 0]
Device:
<pycuda._driver.DeviceAllocation object at 0x7fd353c78ca0>, Host:
[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 

[array([0, 0], dtype=int32),
 array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,