In [13]:

import pycuda.driver as cuda
import pycuda.autoinit
import pycuda.tools
import tensorrt as trt
import numpy as np
import cv2
def infer(engine, image):
    # Allocate buffers
    bindings = []
    h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=np.float16)
    h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float16)
    d_input = cuda.mem_alloc(h_input.nbytes)
    d_output = cuda.mem_alloc(h_output.nbytes)
    
    stream = cuda.Stream()
    
    # Preprocess the image
    image_resized = cv2.resize(image, (1024, 1024))  # Resize to model input size
    image_normalized = image_resized.astype(np.float16) / 255.0  # Normalize and convert to FP16
    np.copyto(h_input, image_normalized.ravel())
    
    # Create context and run inference
    context = engine.create_execution_context()
    bindings = [int(d_input), int(d_output)]
    context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
    
    # Copy output data to host
    cuda.memcpy_dtoh_async(h_output, d_output, stream)
    stream.synchronize()
    
    return np.array(h_output)

In [10]:
e={0:'defective',1:'good'}

In [3]:
import numpy as np
import tensorrt as trt
import cv2
ENGINE_PATH = 'ELClassifcationbased_fp16_dla0.engine'

# Load the TensorRT engine
def load_engine(trt_runtime, engine_path):
    with open(engine_path, 'rb') as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
engine = load_engine(trt_runtime, ENGINE_PATH)


[09/02/2024-17:38:02] [TRT] [W] Using an engine plan file across different models of devices is not recommended and is likely to affect performance or even cause errors.


In [15]:

# Initialize video capture
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1024)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)

cur_frame = 0.0
prev_frame = 0.0

# Dictionary for class labels (replace with your actual labels)
e = {0: "Class0", 1: "Class1"}

while True:
    ret, frame = cap.read()
    if not ret:
        break

    cur_frame = cv2.getTickCount()
    fps = cv2.getTickFrequency() / (cur_frame - prev_frame)
    prev_frame = cur_frame

    # Run inference
    logits = infer(engine, frame)
    
    # Draw FPS
    cv2.putText(frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Draw rectangle and text (adjust positions as needed)
    cv2.rectangle(frame, (0, 0), (1024, 1024), (0, 255, 0), 2)
    cv2.putText(frame, e.get(np.argmax(logits), "Unknown"), (1024, 1014), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Camera', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()

  h_input = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(0)), dtype=np.float16)
  h_output = cuda.pagelocked_empty(trt.volume(engine.get_binding_shape(1)), dtype=np.float16)


ValueError: could not broadcast input array from shape (3145728,) into shape (1048576,)

In [37]:
import cv2
import numpy as np
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt

# Load TensorRT engine
def load_engine(trt_runtime, engine_path):
    with open(engine_path, 'rb') as f:
        engine_data = f.read()
    engine = trt_runtime.deserialize_cuda_engine(engine_data)
    return engine


def infer(engine, image):
    # Allocate buffers
    input_shape = engine.get_binding_shape(0)
    output_shape = engine.get_binding_shape(1)
    input_size = trt.volume(input_shape) * np.dtype(np.float16).itemsize
    output_size = trt.volume(output_shape) * np.dtype(np.float16).itemsize
    
    h_input = cuda.pagelocked_empty(input_size, dtype=np.float16)
    h_output = cuda.pagelocked_empty(output_size, dtype=np.float16)
    d_input = cuda.mem_alloc(input_size)
    d_output = cuda.mem_alloc(output_size)
    
    stream = cuda.Stream()

    # Preprocess image
    image_resized = cv2.resize(image, (input_shape[3], input_shape[2]))  # Resize to model input size
    image_gray = cv2.cvtColor(image_resized, cv2.COLOR_BGR2GRAY)  # Convert to grayscale
    image_normalized = image_gray.astype(np.float16) / 255.0  # Normalize and convert to FP16
    image_normalized = np.expand_dims(image_normalized, axis=(0, 1))  # Add batch and channel dimensions
    
    np.copyto(h_input, image_normalized.ravel())
    
    # Create context and run inference
    context = engine.create_execution_context()
    bindings = [int(d_input), int(d_output)]
    
    try:
        cuda.memcpy_htod_async(d_input, h_input, stream)
        context.execute_async_v2(bindings=bindings, stream_handle=stream.handle)
        cuda.memcpy_dtoh_async(h_output, d_output, stream)
        stream.synchronize()
        return np.array(h_output).reshape(output_shape)
    except Exception as e:
        print(f"Inference Error: {e}")
        return None
# Path to TensorRT engine

# Create TensorRT runtime and load the engine
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
trt_runtime = trt.Runtime(TRT_LOGGER)
engine = load_engine(trt_runtime, ENGINE_PATH)

# Initialize video capture
cap = cv2.VideoCapture(0)
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1024)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 1024)

cur_frame = 0.0
prev_frame = 0.0

# Dictionary for class labels (replace with your actual labels)
e = {0: "Class0", 1: "Class1"}

while True:
    ret, frame = cap.read()
    if not ret:
        break

    cur_frame = cv2.getTickCount()
    fps = cv2.getTickFrequency() / (cur_frame - prev_frame)
    prev_frame = cur_frame

    # Run inference
    logits = infer(engine, frame)
    
    # Draw FPS
    cv2.putText(frame, f'FPS: {int(fps)}', (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
    
    # Draw rectangle and text (adjust positions as needed)
    cv2.rectangle(frame, (0, 0), (1024, 1024), (0, 255, 0), 2)
    cv2.putText(frame, e.get(np.argmax(logits), "Unknown"), (10, 1014), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

    # Display the frame
    cv2.imshow('Camera', frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

cap.release()
cv2.destroyAllWindows()


[09/02/2024-18:15:20] [TRT] [W] CUDA initialization failure with error: 700


TypeError: pybind11::init(): factory function returned nullptr

In [27]:
import pycuda.driver as cuda
import pycuda.autoinit

def test_cuda():
    try:
        cuda.init()
        print("CUDA initialized successfully")
    except cuda.Error as e:
        print(f"CUDA initialization error: {e}")

test_cuda()


CUDA initialized successfully
