# Inference speed

In [4]:
# import os
# import glob
# import subprocess

# def find_libraries():
#     """Find CUDA and CUDNN libraries"""
#     print("=== System Libraries ===")
    
#     # Check CUDA version
#     try:
#         nvcc = subprocess.check_output(['nvcc', '--version']).decode()
#         print("NVCC version:", nvcc.split('release')[-1].strip())
#     except:
#         print("NVCC not found")
    
#     # Find CUDA libraries
#     cuda_paths = [
#         '/usr/local/cuda*/lib64',
#         '/usr/lib/x86_64-linux-gnu',
#         '/usr/local/nvidia/lib64',
#         '/usr/lib/cuda/lib64'
#     ]
    
#     print("\n=== CUDA Libraries ===")
#     for path in cuda_paths:
#         libs = glob.glob(f"{path}/libcudnn*.so*")
#         if libs:
#             print(f"\nIn {path}:")
#             for lib in libs:
#                 print(f"  {os.path.basename(lib)} -> {os.path.realpath(lib)}")
    
#     # Find ONNX Runtime libraries
#     print("\n=== ONNX Runtime Libraries ===")
#     import onnxruntime as ort
#     print(f"ONNX Runtime version: {ort.__version__}")
#     print(f"ONNX Runtime path: {os.path.dirname(ort.__file__)}")
    
#     # Check if CUDA is available for ONNX Runtime
#     providers = ort.get_available_providers()
#     print("\nAvailable ONNX Runtime providers:", providers)
    
#     # Create a dummy model to test CUDA provider
#     if 'CUDAExecutionProvider' in providers:
#         import numpy as np
#         dummy_input = np.random.randn(1, 3, 640, 640).astype(np.float32)
#         try:
#             sess = ort.InferenceSession('rt-detr/best.onnx', 
#                                       providers=['CUDAExecutionProvider'])
#             print("\nCUDA Provider successfully initialized!")
#         except Exception as e:
#             print("\nError initializing CUDA Provider:", e)

# find_libraries()

In [5]:
import os

# Add CUDA and CUDNN library paths
os.environ['LD_LIBRARY_PATH'] = '/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:' + os.environ.get('LD_LIBRARY_PATH', '')

import time
import torch
import numpy as np
from pathlib import Path
from ultralytics import YOLO, RTDETR
import onnxruntime as ort

os.environ['CUDA_VISIBLE_DEVICES'] = '0'
assert torch.cuda.is_available(), "CUDA is not available!"
print(f"Using CUDA device: {torch.cuda.get_device_name()}")
print("ONNX Runtime providers:", ort.get_available_providers())

Using CUDA device: GRID A100-20C
ONNX Runtime providers: ['TensorrtExecutionProvider', 'CUDAExecutionProvider', 'AzureExecutionProvider', 'CPUExecutionProvider']


In [6]:
# def time_inference(model, img_path, num_warmup=10, num_iter=50):
#     """Time model inference for any format"""
#     # Create dummy input tensor
#     dummy_input = torch.ones(1, 3, 640, 640, device="cuda:0")
    
#     # Warmup
#     for _ in range(num_warmup):
#         with torch.no_grad():
#             _ = model(dummy_input)
#             torch.cuda.synchronize()
    
#     # Timing
#     times = []
#     for _ in range(num_iter):
#         start = time.perf_counter()
#         with torch.no_grad():
#             _ = model(dummy_input)
#             torch.cuda.synchronize()
#         times.append(time.perf_counter() - start)
    
#     return np.mean(times), np.std(times)

def time_inference(model, img_path, num_warmup=10, num_iter=50):
    """Time model inference for any format"""
    # Create dummy input tensor
    dummy_input = torch.randn(1, 3, 640, 640, device="cuda:0")
    dummy_input = torch.clamp(dummy_input, 0, 1)  # Ensure values are 0-1

    
    # For ONNX, create session with CUDA provider
    if isinstance(model, str) and model.endswith('.onnx'):
        providers = [
            ('CUDAExecutionProvider', {
                'device_id': 0,
                'cudnn_conv_algo_search': 'EXHAUSTIVE',
            }),
        ]
        session = ort.InferenceSession(model, providers=providers)
        input_name = session.get_inputs()[0].name
        dummy_input = dummy_input.cpu().numpy()  # ONNX expects numpy array
    
    # Warmup
    for _ in range(num_warmup):
        with torch.no_grad():
            if isinstance(model, str) and model.endswith('.onnx'):
                _ = session.run(None, {input_name: dummy_input})
            else:
                _ = model(dummy_input)
                torch.cuda.synchronize()
    
    # Timing
    times = []
    for _ in range(num_iter):
        start = time.perf_counter()
        with torch.no_grad():
            if isinstance(model, str) and model.endswith('.onnx'):
                _ = session.run(None, {input_name: dummy_input})
            else:
                _ = model(dummy_input)
                torch.cuda.synchronize()
        times.append(time.perf_counter() - start)
    
    return np.mean(times), np.std(times)

In [7]:
img_path = "testim.jpg"

# Models to test with their appropriate classes
models = {
    "YOLOv3": ("yolov3/best.pt", YOLO),
    "YOLOv11": ("yolov11/best.pt", YOLO),
    "RT-DETR": ("rt-detr/best.pt", RTDETR)
}

results = {}

for model_name, (model_path, model_class) in models.items():
    print(f"\nTesting {model_name}...")
    base_path = Path(model_path).with_suffix('')
    
    # Native PyTorch
    try:
        model = model_class(f"{base_path}.pt")
        mean_time, std_time = time_inference(model, img_path)
        results[f"{model_name}_pytorch"] = (1/mean_time, std_time/mean_time**2)  # Convert to FPS
    except Exception as e:
        print(f"Error with PyTorch model: {e}")
    
    # TorchScript
    if (Path(f"{base_path}.torchscript")).exists():
        try:
            model = model_class(f"{base_path}.torchscript")
            mean_time, std_time = time_inference(model, img_path)
            results[f"{model_name}_torchscript"] = (1/mean_time, std_time/mean_time**2)  # Convert to FPS
        except Exception as e:
            print(f"Error with TorchScript model: {e}")
    
    # ONNX
    if (Path(f"{base_path}.onnx")).exists():
        try:
            model = model_class(f"{base_path}.onnx")
            mean_time, std_time = time_inference(model, img_path)
            results[f"{model_name}_onnx"] = (1/mean_time, std_time/mean_time**2)  # Convert to FPS
        except Exception as e:
            print(f"Error with ONNX model: {e}")
    
    # TensorRT
    if (Path(f"{base_path}.engine")).exists():
        try:
            model = model_class(f"{base_path}.engine")
            mean_time, std_time = time_inference(model, img_path)
            results[f"{model_name}_tensorrt"] = (1/mean_time, std_time/mean_time**2)  # Convert to FPS
        except Exception as e:
            print(f"Error with TensorRT model: {e}")
        
print("\nInference Speed Results (FPS):")
print("-" * 60)
print(f"{'Model':30} {'Mean FPS':15} {'Std Dev':15}")
print("-" * 60)
for model_name, (mean_fps, std_fps) in results.items():
    print(f"{model_name:30} {mean_fps:15.1f} {std_fps:15.1f}")


Testing YOLOv3...

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 1.0ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.4ms
Speed: 0.0ms preprocess, 14.4ms inference, 0.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms preprocess, 14.3ms inference, 0.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.3ms
Speed: 0.0ms p