In [None]:
from ultralytics import YOLO
from datetime import datetime

In [None]:
# Load YOLOv11 model configuration
model = YOLO("yolo11s.pt").to("cuda")

In [None]:
# Prepare training arguments
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
save_dir = f"runs/train/{timestamp}"
train_args = {
    "data": "../data/dataset/data.yaml",
    "epochs": 150,
    "imgsz": 1280,
    "batch": 8,
    "patience": 50,
    "save": True,
    "optimizer": "AdamW",
    "save_dir": save_dir,
    "lr0": 0.01,
    "close_mosaic": 20,
    "name": f"train_{timestamp}"
}

In [None]:
# Train model on custom dataset
results = model.train(**train_args)

In [None]:
metrics = model.val()
print(metrics)

In [None]:
# Print training results
print(results)

## Export to TensorRT

In [None]:
# Optional: LOAD model
model = YOLO("runs/detect/720_best/weights/best.pt").to("cuda")

In [None]:
model.export(
    format="engine",           
    half=True,                
    imgsz=1280,                    
    batch=6,                   
    dynamic=True,             
)

## Model Performance Evaluation

In [None]:
import time
import numpy as np

# Paths to your models
pt_path = "runs/detect/720_best/weights/best.pt"
engine_path = "runs/detect/720_best/weights/best.engine"

# Load models
model_pt = YOLO(pt_path).to("cuda")
model_engine = YOLO(engine_path)

In [None]:
def benchmark_model(model, test_images, num_runs=100, warmup_runs=10):
    """
    Benchmark model performance with detailed metrics
    """
    # Warmup runs
    for _ in range(warmup_runs):
        _ = model(test_images[0])
    
    # Benchmark runs
    inference_times = []
    memory_usage_before = []
    memory_usage_after = []
    
    import torch
    import psutil
    import gc
    
    for i in range(num_runs):
        # Clear cache and collect garbage
        torch.cuda.empty_cache()
        gc.collect()
        
        # Record memory before inference
        memory_usage_before.append(torch.cuda.memory_allocated() / 1024**2)  # MB
        
        # Time inference
        start_time = time.time()
        with torch.no_grad():
            results = model(test_images[i % len(test_images)])
        torch.cuda.synchronize()  # Wait for GPU operations to complete
        end_time = time.time()
        
        inference_times.append((end_time - start_time) * 1000)  # Convert to ms
        
        # Record memory after inference
        memory_usage_after.append(torch.cuda.memory_allocated() / 1024**2)  # MB
    
    return {
        'inference_times': inference_times,
        'mean_time': np.mean(inference_times),
        'std_time': np.std(inference_times),
        'min_time': np.min(inference_times),
        'max_time': np.max(inference_times),
        'median_time': np.median(inference_times),
        'fps': 1000 / np.mean(inference_times),
        'memory_before': np.mean(memory_usage_before),
        'memory_after': np.mean(memory_usage_after),
        'memory_diff': np.mean(memory_usage_after) - np.mean(memory_usage_before)
    }

In [None]:
# Prepare test images for benchmarking
import cv2
import torch
import glob
import os

# Get some test images from your dataset
test_image_paths = glob.glob("../data/dataset/valid/images/*.jpg")[:50]  # Use first 50 validation images
if not test_image_paths:
    test_image_paths = glob.glob("../data/dataset/valid/images/*.png")[:50]

print(f"Found {len(test_image_paths)} test images")

# Load and preprocess test images
test_images = []
for img_path in test_image_paths[:20]:  # Use 20 images for testing
    img = cv2.imread(img_path)
    if img is not None:
        test_images.append(img_path)

print(f"Loaded {len(test_images)} test images for benchmarking")

In [None]:
# Benchmark PyTorch model (.pt)
print("Benchmarking PyTorch model (.pt)...")
print("=" * 50)

pt_results = benchmark_model(model_pt, test_images, num_runs=100, warmup_runs=10)

print(f"PyTorch Model Performance:")
print(f"Mean inference time: {pt_results['mean_time']:.2f} ± {pt_results['std_time']:.2f} ms")
print(f"Median inference time: {pt_results['median_time']:.2f} ms")
print(f"Min inference time: {pt_results['min_time']:.2f} ms")
print(f"Max inference time: {pt_results['max_time']:.2f} ms")
print(f"Average FPS: {pt_results['fps']:.2f}")
print(f"Memory usage: {pt_results['memory_before']:.2f} MB → {pt_results['memory_after']:.2f} MB")
print(f"Memory difference per inference: {pt_results['memory_diff']:.2f} MB")
print()

In [None]:
# Benchmark TensorRT engine model (.engine)
print("Benchmarking TensorRT engine model (.engine)...")
print("=" * 50)

engine_results = benchmark_model(model_engine, test_images, num_runs=100, warmup_runs=10)

print(f"TensorRT Engine Model Performance:")
print(f"Mean inference time: {engine_results['mean_time']:.2f} ± {engine_results['std_time']:.2f} ms")
print(f"Median inference time: {engine_results['median_time']:.2f} ms")
print(f"Min inference time: {engine_results['min_time']:.2f} ms")
print(f"Max inference time: {engine_results['max_time']:.2f} ms")
print(f"Average FPS: {engine_results['fps']:.2f}")
print(f"Memory usage: {engine_results['memory_before']:.2f} MB → {engine_results['memory_after']:.2f} MB")
print(f"Memory difference per inference: {engine_results['memory_diff']:.2f} MB")
print()

In [None]:
# Detailed Performance Comparison
print("DETAILED PERFORMANCE COMPARISON")
print("=" * 60)

# Speed comparison
speed_improvement = ((pt_results['mean_time'] - engine_results['mean_time']) / pt_results['mean_time']) * 100
fps_improvement = ((engine_results['fps'] - pt_results['fps']) / pt_results['fps']) * 100

print(f"   INFERENCE SPEED COMPARISON:")
print(f"   PyTorch (.pt):     {pt_results['mean_time']:.2f} ms/image ({pt_results['fps']:.2f} FPS)")
print(f"   TensorRT (.engine): {engine_results['mean_time']:.2f} ms/image ({engine_results['fps']:.2f} FPS)")
print(f"   Speed improvement:  {speed_improvement:.1f}% faster")
print(f"   FPS improvement:    {fps_improvement:.1f}% higher")
print()

# Memory comparison
memory_improvement = ((pt_results['memory_diff'] - engine_results['memory_diff']) / abs(pt_results['memory_diff'])) * 100 if pt_results['memory_diff'] != 0 else 0

print(f"   MEMORY USAGE COMPARISON:")
print(f"   PyTorch (.pt):     {pt_results['memory_diff']:.2f} MB per inference")
print(f"   TensorRT (.engine): {engine_results['memory_diff']:.2f} MB per inference")
print(f"   Memory efficiency:  {memory_improvement:.1f}% {'better' if memory_improvement > 0 else 'worse'}")
print()

# Consistency comparison
pt_cv = (pt_results['std_time'] / pt_results['mean_time']) * 100
engine_cv = (engine_results['std_time'] / engine_results['mean_time']) * 100

print(f"   CONSISTENCY COMPARISON:")
print(f"   PyTorch (.pt):     CV = {pt_cv:.2f}% (std: {pt_results['std_time']:.2f} ms)")
print(f"   TensorRT (.engine): CV = {engine_cv:.2f}% (std: {engine_results['std_time']:.2f} ms)")
print(f"   More consistent:    {'TensorRT' if engine_cv < pt_cv else 'PyTorch'}")
print()

# Throughput analysis
print(f"   THROUGHPUT ANALYSIS:")
print(f"   PyTorch theoretical max:  {pt_results['fps']:.0f} images/second")
print(f"   TensorRT theoretical max: {engine_results['fps']:.0f} images/second")
print(f"   Throughput gain:          {engine_results['fps'] - pt_results['fps']:.0f} additional images/second")
print()

# Model size comparison (if files exist)
import os
if os.path.exists(pt_path) and os.path.exists(engine_path):
    pt_size = os.path.getsize(pt_path) / (1024**2)  # MB
    engine_size = os.path.getsize(engine_path) / (1024**2)  # MB
    size_ratio = engine_size / pt_size
    
    print(f"   MODEL SIZE COMPARISON:")
    print(f"   PyTorch (.pt):     {pt_size:.1f} MB")
    print(f"   TensorRT (.engine): {engine_size:.1f} MB")
    print(f"   Size ratio:        {size_ratio:.2f}x {'larger' if size_ratio > 1 else 'smaller'}")
    print()

print("=" * 60)

In [None]:
# Visualize Performance Comparison
import matplotlib.pyplot as plt
import seaborn as sns

plt.style.use('default')
fig, axes = plt.subplots(2, 2, figsize=(15, 12))

# 1. Inference Time Distribution
axes[0, 0].hist(pt_results['inference_times'], bins=30, alpha=0.7, label='PyTorch (.pt)', color='blue')
axes[0, 0].hist(engine_results['inference_times'], bins=30, alpha=0.7, label='TensorRT (.engine)', color='red')
axes[0, 0].set_xlabel('Inference Time (ms)')
axes[0, 0].set_ylabel('Frequency')
axes[0, 0].set_title('Inference Time Distribution')
axes[0, 0].legend()
axes[0, 0].grid(True, alpha=0.3)

# 2. Performance Metrics Comparison
metrics = ['Mean Time (ms)', 'FPS', 'Memory (MB)']
pt_values = [pt_results['mean_time'], pt_results['fps'], pt_results['memory_diff']]
engine_values = [engine_results['mean_time'], engine_results['fps'], engine_results['memory_diff']]

x = np.arange(len(metrics))
width = 0.35

bars1 = axes[0, 1].bar(x - width/2, pt_values, width, label='PyTorch (.pt)', color='blue', alpha=0.7)
bars2 = axes[0, 1].bar(x + width/2, engine_values, width, label='TensorRT (.engine)', color='red', alpha=0.7)

axes[0, 1].set_xlabel('Metrics')
axes[0, 1].set_ylabel('Values')
axes[0, 1].set_title('Performance Metrics Comparison')
axes[0, 1].set_xticks(x)
axes[0, 1].set_xticklabels(metrics, rotation=45)
axes[0, 1].legend()
axes[0, 1].grid(True, alpha=0.3)

# Add value labels on bars
for bars in [bars1, bars2]:
    for bar in bars:
        height = bar.get_height()
        axes[0, 1].annotate(f'{height:.1f}',
                           xy=(bar.get_x() + bar.get_width() / 2, height),
                           xytext=(0, 3),  # 3 points vertical offset
                           textcoords="offset points",
                           ha='center', va='bottom', fontsize=8)

# 3. Time Series Comparison (First 50 runs)
runs = np.arange(1, min(51, len(pt_results['inference_times']) + 1))
axes[1, 0].plot(runs, pt_results['inference_times'][:50], 'b-', alpha=0.7, label='PyTorch (.pt)')
axes[1, 0].plot(runs, engine_results['inference_times'][:50], 'r-', alpha=0.7, label='TensorRT (.engine)')
axes[1, 0].set_xlabel('Run Number')
axes[1, 0].set_ylabel('Inference Time (ms)')
axes[1, 0].set_title('Inference Time Over Runs (First 50)')
axes[1, 0].legend()
axes[1, 0].grid(True, alpha=0.3)

# 4. Box Plot Comparison
data_to_plot = [pt_results['inference_times'], engine_results['inference_times']]
bp = axes[1, 1].boxplot(data_to_plot, labels=['PyTorch (.pt)', 'TensorRT (.engine)'], patch_artist=True)
bp['boxes'][0].set_facecolor('blue')
bp['boxes'][0].set_alpha(0.7)
bp['boxes'][1].set_facecolor('red')
bp['boxes'][1].set_alpha(0.7)
axes[1, 1].set_ylabel('Inference Time (ms)')
axes[1, 1].set_title('Inference Time Distribution (Box Plot)')
axes[1, 1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()