### HARDWARE OPTIMIZATIONS

1. TensorRT
2. ONNX
3. OpenVINO


(using the fine tuned YOLOv11_5_x_best.pt model (largest) all throught this file)

In [1]:
model_path = "yolov11/yolov11_5_x_best.pt"

### 1. TensorRT

##### export config

In [2]:
from ultralytics import YOLO

model = YOLO(model_path)

# TensorRT FP32
out = model.export(format="engine", imgsz=640, dynamic=True, verbose=False, batch=8, workspace=2)

# TensorRT FP16
out = model.export(format="engine", imgsz=640, dynamic=True, verbose=False, batch=8, workspace=2, half=True)

# TensorRT INT8 with calibration `data` (i.e. COCO, ImageNet, or DOTAv1 for appropriate model task)
out = model.export(
    format="engine", imgsz=640, dynamic=True, verbose=False, batch=8, workspace=2, int8=True, data="coco8.yaml"
)

Ultralytics 8.3.59 🚀 Python-3.11.11 torch-2.5.1 


ValueError: Invalid CUDA 'device=0' requested. Use 'device=cpu' or pass valid CUDA device(s) if available, i.e. 'device=0' or 'device=0,1,2,3' for Multi-GPU.

torch.cuda.is_available(): False
torch.cuda.device_count(): 0
os.environ['CUDA_VISIBLE_DEVICES']: None
See https://pytorch.org/get-started/locally/ for up-to-date torch install instructions if no CUDA devices are seen by torch.


##### inference

In [None]:
import cv2

from ultralytics import YOLO

model = YOLO("yolov8n.engine")
img = cv2.imread("path/to/image.jpg")

for _ in range(100):
    result = model.predict(
        [img] * 8,  # batch=8 of the same image
        verbose=False,
        device="cuda",
    )

##### validation

In [None]:
from ultralytics import YOLO

model = YOLO("yolov8n.engine")
results = model.val(
    data="data.yaml",  # COCO, ImageNet, or DOTAv1 for appropriate model task
    batch=1,
    imgsz=640,
    verbose=False,
    device="cuda",
)

### 2. ONNX Runtime

In [9]:
def check_onnx_model(file_path: str):
    
    import onnx

    onnx_model = onnx.load(onnx_model_path)
    onnx.checker.check_model(onnx_model)

In [11]:
from ultralytics import YOLO

model = YOLO(model_path)

model.export(format="onnx", dynamic=True)

onnx_model_path = "yolov11/yolov11_5_x_best.onnx"

check_onnx_model(onnx_model_path)

Ultralytics 8.3.59 🚀 Python-3.11.11 torch-2.5.1 CPU (Apple M3 Pro)
YOLO11x summary (fused): 464 layers, 56,839,729 parameters, 0 gradients, 194.5 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov11/yolov11_5_x_best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 15, 8400) (109.1 MB)

[34m[1mONNX:[0m starting export with onnx 1.17.0 opset 19...
[34m[1mONNX:[0m slimming with onnxslim 0.1.47...
[34m[1mONNX:[0m export success ✅ 8.0s, saved as 'yolov11/yolov11_5_x_best.onnx' (217.1 MB)

Export complete (9.1s)
Results saved to [1m/Users/ayush/Documents/projects/yolov11-inference-optimizations/yolov11[0m
Predict:         yolo predict task=detect model=yolov11/yolov11_5_x_best.onnx imgsz=640  
Validate:        yolo val task=detect model=yolov11/yolov11_5_x_best.onnx imgsz=640 data=./doclaynet.yaml  
Visualize:       https://netron.app
