In [1]:
import os
import time
import numpy as np
import pycuda.autoinit
import pycuda.driver as cuda
import tensorrt as trt
import cv2

In [2]:
# Set the path to the directory containing the images
image_path = "./JPEGImages"
model_path = "./segformer_fp16_2.engine"
# model_path = "./segformer.trt"
# model_path = "./darknet_fp16.engine"

# Set the input size expected by your TensorRT model
input_height = 640
input_width = 640
input_channels = 3

In [3]:
# Load the TensorRT model
TRT_LOGGER = trt.Logger(trt.Logger.INFO)
with open(model_path, "rb") as f, trt.Runtime(TRT_LOGGER) as runtime:
    engine = runtime.deserialize_cuda_engine(f.read())

# Create execution context
context = engine.create_execution_context()

# Get input and output names
input_name = engine.get_binding_name(0)
output_name = engine.get_binding_name(1)

[05/03/2024-22:48:00] [TRT] [I] Loaded engine size: 25 MiB
[05/03/2024-22:48:00] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.
[05/03/2024-22:48:03] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +342, GPU +321, now: CPU 637, GPU 4730 (MiB)
[05/03/2024-22:48:03] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +24, now: CPU 0, GPU 24 (MiB)
[05/03/2024-22:48:03] [TRT] [I] [MemUsageChange] Init cuDNN: CPU +0, GPU +0, now: CPU 612, GPU 4708 (MiB)
[05/03/2024-22:48:03] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +122, now: CPU 0, GPU 146 (MiB)


  input_name = engine.get_binding_name(0)
  output_name = engine.get_binding_name(1)


In [4]:
# Allocate device memory for input and output
input_shape = (1, input_channels, input_height, input_width)
output_shape = engine.get_binding_shape(1)
input_size = trt.volume(input_shape) * np.dtype(np.float32).itemsize
output_size = trt.volume(output_shape) * np.dtype(np.float32).itemsize
d_input = cuda.mem_alloc(input_size)
d_output = cuda.mem_alloc(output_size)


  output_shape = engine.get_binding_shape(1)


In [5]:

# Create a stream to run inference
stream = cuda.Stream()

In [6]:
# Get the list of image files in the directory
image_files = [f for f in os.listdir(image_path) if f.endswith((".jpg", ".jpeg", ".png"))]
num_images = len(image_files)

def preprocess_image(image_path, input_height, input_width):
    # Read the image using OpenCV
    image = cv2.imread(image_path)
    
    # Resize the image to the desired input shape
    resized_image = cv2.resize(image, (input_width, input_height))
    
    # Convert the image from BGR to RGB color space
    rgb_image = cv2.cvtColor(resized_image, cv2.COLOR_BGR2RGB)
    
    # Normalize the pixel values to the range [0, 1]
    normalized_image = rgb_image.astype(np.float32) / 255.0
    
    # Transpose the image to match the input shape (channels first)
    transposed_image = normalized_image.transpose((2, 0, 1))
    
    # Add batch dimension to the image
    batch_image = np.expand_dims(transposed_image, axis=0)
    
    return batch_image

In [7]:
# Perform inference on each image

total_time = 0
for image_file in image_files:
    # Preprocess the image
    image = preprocess_image(os.path.join(image_path, image_file), input_height, input_width)

    start_time = time.time()
    
    # Transfer input data to device
    cuda.memcpy_htod_async(d_input, image.ravel(), stream)
    
    # Run inference
    context.execute_async_v2(bindings=[int(d_input), int(d_output)], stream_handle=stream.handle)
    
    # Transfer output data from device
    output = np.empty(output_shape, dtype=np.float32)
    cuda.memcpy_dtoh_async(output, d_output, stream)
    
    # Synchronize the stream to ensure the inference is complete
    stream.synchronize()

    end_time = time.time()
    total_time = total_time + (end_time - start_time)
    
    # Postprocess the output (if needed)
    # ...

# end_time = time.time()
# total_time = end_time - start_time
inference_speed = num_images / total_time

print(f"Processed {num_images} images in {total_time:.2f} seconds")
print(f"Inference speed: {inference_speed:.2f} images per second")

[05/03/2024-22:48:06] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::622] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::622, condition: bindings[x] || nullBindingOK
)
[05/03/2024-22:48:06] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::622] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::622, condition: bindings[x] || nullBindingOK
)
[05/03/2024-22:48:07] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::622] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::622, condition: bindings[x] || nullBindingOK
)
[05/03/2024-22:48:07] [TRT] [E] 3: [executionContext.cpp::enqueueInternal::622] Error Code 3: API Usage Error (Parameter check failed at: runtime/api/executionContext.cpp::enqueueInternal::622, condition: bindings[x] || nullBindingOK
)
[05/03/2024-22:48:07] [TRT] [E] 3: [executionContext.cpp