In [1]:
# STEP 1: Install Required Dependencies for RF-DETR
import sys
import subprocess

def install_package(package):
    """Install a package using pip with proper error handling"""
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "-q", package])
        print(f"✅ {package} installed successfully")
    except subprocess.CalledProcessError as e:
        print(f"❌ Error installing {package}: {e}")

# Install required packages for RF-DETR object detection
packages = [
    "torch torchvision",  # PyTorch with torchvision for CIFAR-10
    "ultralytics",        # Includes RT-DETR models
    "Pillow",             # Image processing
    "matplotlib",         # Plotting
    "numpy",              # Numerical operations
    "opencv-python",      # Computer vision
    "pycocotools",        # COCO format tools for object detection
    "supervision",        # Modern computer vision utilities
    "albumentations",     # Data augmentation
    "PyYAML",             # YAML file handling
    "tqdm"                # Progress bars
]

print("🔧 Installing dependencies for RF-DETR object detection...")
for package in packages:
    install_package(package)

print("\n🎉 All dependencies installed successfully!")
print("📦 CIFAR-10 dataset will be downloaded and converted to object detection format")
print("🤖 RT-DETR model will be loaded from Ultralytics")

🔧 Installing dependencies for RF-DETR object detection...
❌ Error installing torch torchvision: Command '['c:\\Users\\Eric Aquino\\AppData\\Local\\Programs\\Python\\Python313\\python.exe', '-m', 'pip', 'install', '-q', 'torch torchvision']' returned non-zero exit status 1.
✅ ultralytics installed successfully
✅ Pillow installed successfully
✅ matplotlib installed successfully
✅ numpy installed successfully
✅ opencv-python installed successfully
✅ pycocotools installed successfully
✅ supervision installed successfully
✅ albumentations installed successfully
✅ PyYAML installed successfully
✅ tqdm installed successfully

🎉 All dependencies installed successfully!
📦 CIFAR-10 dataset will be downloaded and converted to object detection format
🤖 RT-DETR model will be loaded from Ultralytics


In [2]:
# STEP 2: Download and Prepare CIFAR-10 for Object Detection
import torchvision
import torchvision.transforms as transforms
import torch
import os
import numpy as np
from PIL import Image, ImageDraw
import yaml
import json
import random

print("📦 Downloading and preparing CIFAR-10 for object detection...")

# CIFAR-10 class names
cifar10_classes = [
    'airplane', 'automobile', 'bird', 'cat', 'deer',
    'dog', 'frog', 'horse', 'ship', 'truck'
]

# Download CIFAR-10 dataset (no transforms for now)
print("⬇️ Downloading CIFAR-10 training set...")
trainset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=None  # We'll handle transforms manually
)

print("⬇️ Downloading CIFAR-10 test set...")
testset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=None
)

print(f"✅ CIFAR-10 dataset loaded successfully!")
print(f"📊 Training samples: {len(trainset)}")
print(f"📊 Test samples: {len(testset)}")
print(f"📊 Classes: {cifar10_classes}")

# Create directory structure for YOLO object detection
dataset_dir = "./cifar10_detection"
os.makedirs(f"{dataset_dir}/images/train", exist_ok=True)
os.makedirs(f"{dataset_dir}/images/val", exist_ok=True)
os.makedirs(f"{dataset_dir}/labels/train", exist_ok=True)
os.makedirs(f"{dataset_dir}/labels/val", exist_ok=True)

def create_detection_annotation(image_size=(64, 64), padding_ratio=0.1):
    """Create a bounding box annotation for the entire image with some padding"""
    width, height = image_size

    # Add some randomness to bounding box to make it more realistic
    pad_x = int(width * padding_ratio * random.uniform(0.5, 1.5))
    pad_y = int(height * padding_ratio * random.uniform(0.5, 1.5))

    # Ensure padding doesn't exceed image boundaries
    pad_x = min(pad_x, width // 4)
    pad_y = min(pad_y, height // 4)

    # Calculate bounding box coordinates (normalized to 0-1)
    x_center = 0.5
    y_center = 0.5
    bbox_width = (width - 2 * pad_x) / width
    bbox_height = (height - 2 * pad_y) / height

    return x_center, y_center, bbox_width, bbox_height

def prepare_detection_data():
    """Convert CIFAR-10 to YOLO object detection format"""
    print("🔄 Converting CIFAR-10 to object detection format...")

    target_size = (64, 64)  # Resize CIFAR-10 images to 64x64

    # Convert training data
    train_count = 0
    for idx, (image, label) in enumerate(trainset):
        if train_count >= 5000:  # Limit for faster processing
            break

        class_name = cifar10_classes[label]

        # Resize image
        image_resized = image.resize(target_size, Image.Resampling.LANCZOS)

        # Save image
        image_filename = f"train_img_{idx:05d}.jpg"
        image_path = f"{dataset_dir}/images/train/{image_filename}"
        image_resized.save(image_path)

        # Create YOLO format annotation
        x_center, y_center, bbox_width, bbox_height = create_detection_annotation(target_size)

        # Save label file (YOLO format: class_id x_center y_center width height)
        label_filename = f"train_img_{idx:05d}.txt"
        label_path = f"{dataset_dir}/labels/train/{label_filename}"

        with open(label_path, 'w') as f:
            f.write(f"{label} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

        train_count += 1

        if train_count % 1000 == 0:
            print(f"  Processed {train_count} training images...")

    # Convert test data (use as validation)
    val_count = 0
    for idx, (image, label) in enumerate(testset):
        if val_count >= 1000:  # Limit for validation
            break

        class_name = cifar10_classes[label]

        # Resize image
        image_resized = image.resize(target_size, Image.Resampling.LANCZOS)

        # Save image
        image_filename = f"val_img_{idx:05d}.jpg"
        image_path = f"{dataset_dir}/images/val/{image_filename}"
        image_resized.save(image_path)

        # Create YOLO format annotation
        x_center, y_center, bbox_width, bbox_height = create_detection_annotation(target_size)

        # Save label file
        label_filename = f"val_img_{idx:05d}.txt"
        label_path = f"{dataset_dir}/labels/val/{label_filename}"

        with open(label_path, 'w') as f:
            f.write(f"{label} {x_center:.6f} {y_center:.6f} {bbox_width:.6f} {bbox_height:.6f}\n")

        val_count += 1

        if val_count % 200 == 0:
            print(f"  Processed {val_count} validation images...")

# Prepare the detection data
prepare_detection_data()

# Create data.yaml file for YOLO training
data_config = {
    'path': os.path.abspath(dataset_dir),
    'train': 'images/train',
    'val': 'images/val',
    'names': {i: name for i, name in enumerate(cifar10_classes)},
    'nc': len(cifar10_classes)  # number of classes
}

data_yaml_path = f"{dataset_dir}/data.yaml"
with open(data_yaml_path, 'w') as f:
    yaml.dump(data_config, f, default_flow_style=False)

print(f"✅ Object detection dataset prepared in: {dataset_dir}")
print("📁 Structure:")
print("  - images/train/ - training images")
print("  - images/val/ - validation images")
print("  - labels/train/ - training annotations (YOLO format)")
print("  - labels/val/ - validation annotations (YOLO format)")
print("  - data.yaml - dataset configuration")

# Store dataset info
class DatasetInfo:
    def __init__(self):
        self.location = dataset_dir
        self.data_yaml = data_yaml_path

dataset = DatasetInfo()

# Display sample annotation
print(f"\n📋 Sample annotation format:")
sample_label_file = f"{dataset_dir}/labels/train/train_img_00000.txt"
if os.path.exists(sample_label_file):
    with open(sample_label_file, 'r') as f:
        sample_content = f.read().strip()
        print(f"  {sample_content}")
        print("  Format: class_id x_center y_center width height (normalized 0-1)")
else:
    print("  Sample file not found")

📦 Downloading and preparing CIFAR-10 for object detection...
⬇️ Downloading CIFAR-10 training set...


100%|██████████| 170M/170M [00:13<00:00, 12.9MB/s] 


⬇️ Downloading CIFAR-10 test set...
✅ CIFAR-10 dataset loaded successfully!
📊 Training samples: 50000
📊 Test samples: 10000
📊 Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
🔄 Converting CIFAR-10 to object detection format...
  Processed 1000 training images...
  Processed 2000 training images...
  Processed 3000 training images...
  Processed 4000 training images...
  Processed 5000 training images...
  Processed 200 validation images...
  Processed 400 validation images...
  Processed 600 validation images...
  Processed 800 validation images...
  Processed 1000 validation images...
✅ Object detection dataset prepared in: ./cifar10_detection
📁 Structure:
  - images/train/ - training images
  - images/val/ - validation images
  - labels/train/ - training annotations (YOLO format)
  - labels/val/ - validation annotations (YOLO format)
  - data.yaml - dataset configuration

📋 Sample annotation format:
  6 0.500000 0.500000 0.843750 0.8

In [3]:
# STEP 3: Load RT-DETR Model
import torch
from ultralytics import RTDETR

print("🤖 Loading RT-DETR object detection model...")

try:
    # Load RT-DETR model (Real-time DEtection TRansformer)
    model = RTDETR('rtdetr-l.pt')  # Large RT-DETR model
    print("✅ RT-DETR large model loaded successfully")
    model_type = "rtdetr_large"

except Exception as e:
    print(f"❌ Error loading RT-DETR large model: {e}")
    print("🔄 Trying RT-DETR medium model...")

    try:
        model = RTDETR('rtdetr-m.pt')  # Medium RT-DETR model
        print("✅ RT-DETR medium model loaded successfully")
        model_type = "rtdetr_medium"
    except Exception as e:
        print(f"❌ Error loading RT-DETR medium model: {e}")
        print("🔄 Trying RT-DETR small model...")

        try:
            model = RTDETR('rtdetr-s.pt')  # Small RT-DETR model
            print("✅ RT-DETR small model loaded successfully")
            model_type = "rtdetr_small"
        except Exception as e:
            print(f"❌ Error loading RT-DETR models: {e}")
            print("Please check your internet connection and try again.")
            raise

# Display model information
print(f"\n📋 Model Information:")
print(f"  - Model Type: {model_type}")
print(f"  - Architecture: RT-DETR (Real-time DEtection TRansformer)")
print(f"  - Task: Object Detection")
print(f"  - Target Classes: {len(cifar10_classes)} CIFAR-10 classes")
print(f"  - Input Size: 640x640 (will be resized from 64x64)")

# Print class information
print(f"\n📊 CIFAR-10 Classes for Detection:")
for i, class_name in enumerate(cifar10_classes):
    print(f"  {i}: {class_name}")

print(f"\n🔧 RT-DETR Features:")
print("  - Transformer-based architecture")
print("  - End-to-end object detection")
print("  - No NMS (Non-Maximum Suppression) required")
print("  - Real-time inference capability")
print("  - Better accuracy than traditional CNN detectors")

🤖 Loading RT-DETR object detection model...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/rtdetr-l.pt to 'rtdetr-l.pt'...


100%|██████████| 63.4M/63.4M [00:02<00:00, 26.1MB/s]


✅ RT-DETR large model loaded successfully

📋 Model Information:
  - Model Type: rtdetr_large
  - Architecture: RT-DETR (Real-time DEtection TRansformer)
  - Task: Object Detection
  - Target Classes: 10 CIFAR-10 classes
  - Input Size: 640x640 (will be resized from 64x64)

📊 CIFAR-10 Classes for Detection:
  0: airplane
  1: automobile
  2: bird
  3: cat
  4: deer
  5: dog
  6: frog
  7: horse
  8: ship
  9: truck

🔧 RT-DETR Features:
  - Transformer-based architecture
  - End-to-end object detection
  - No NMS (Non-Maximum Suppression) required
  - Real-time inference capability
  - Better accuracy than traditional CNN detectors


In [4]:
# STEP 4: Train RT-DETR Model
import os

print("🏋️ Starting RT-DETR object detection training...")

try:
    # Train the RT-DETR model with optimized parameters
    results = model.train(
        data=dataset.data_yaml,     # Path to dataset YAML file
        epochs=36,                  # Number of training epochs
        imgsz=640,                  # Input image size (RT-DETR standard)
        batch=8,                    # Batch size (adjust based on GPU memory)
        lr0=0.0001,                 # Initial learning rate (lower for transformers)
        weight_decay=0.0001,        # Weight decay for regularization
        patience=10,                # Early stopping patience
        save=True,                  # Save checkpoints
        plots=True,                 # Generate training plots
        verbose=True,               # Verbose output
        workers=2,                  # Number of worker threads
        project='runs/detect',      # Project directory
        name='rtdetr_cifar10',      # Experiment name
        exist_ok=True,              # Overwrite existing experiment
        pretrained=True,            # Use pretrained weights
        optimizer='AdamW',          # Optimizer (AdamW works well with transformers)
        cos_lr=True,                # Cosine learning rate scheduler
        warmup_epochs=3,            # Warmup epochs
        warmup_momentum=0.8,        # Warmup momentum
        box=7.5,                    # Box loss gain
        cls=0.5,                    # Classification loss gain
        dfl=1.5,                    # Distribution focal loss gain
    )

    print("✅ RT-DETR training completed successfully!")
    print(f"📊 Best model saved at: {results.save_dir}")

    # Display training summary
    if hasattr(results, 'results_dict'):
        print("\n📈 Training Summary:")
        metrics = results.results_dict
        print(f"  - Best mAP50: {metrics.get('metrics/mAP50(B)', 'N/A')}")
        print(f"  - Best mAP50-95: {metrics.get('metrics/mAP50-95(B)', 'N/A')}")
        print(f"  - Final Box Loss: {metrics.get('train/box_loss', 'N/A')}")
        print(f"  - Final Class Loss: {metrics.get('train/cls_loss', 'N/A')}")
        print(f"  - Final DFL Loss: {metrics.get('train/dfl_loss', 'N/A')}")

except Exception as e:
    print(f"❌ Training failed: {e}")
    print("💡 Troubleshooting tips:")
    print("  - Try reducing batch size (batch=4 or batch=2)")
    print("  - Try reducing epochs (epochs=10)")
    print("  - Ensure CUDA is available for GPU training")
    print("  - Check if the dataset YAML file is correct")

    # Alternative: Try with reduced parameters
    print("\n🔄 Trying with reduced parameters...")
    try:
        results = model.train(
            data=dataset.data_yaml,
            epochs=36,              # Reduced epochs
            imgsz=320,              # Smaller image size
            batch=4,                # Smaller batch size
            lr0=0.0001,
            patience=5,
            project='runs/detect',
            name='rtdetr_cifar10_reduced',
            exist_ok=True
        )
        print("✅ Reduced training completed!")
    except Exception as e2:
        print(f"❌ Reduced training also failed: {e2}")
        print("💡 Consider using a pre-trained model for inference instead")

# Additional training information
print(f"\n🎯 RT-DETR Training Characteristics:")
print("  - Transformer architecture requires more epochs to converge")
print("  - Lower learning rates work better than CNN-based models")
print("  - AdamW optimizer is recommended for transformers")
print("  - Cosine learning rate scheduling helps with convergence")
print("  - Warmup epochs help stabilize early training")

🏋️ Starting RT-DETR object detection training...
New https://pypi.org/project/ultralytics/8.3.170 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.160  Python-3.13.3 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=False, auto_augment=randaugment, batch=8, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=True, cutmix=0.0, data=./cifar10_detection/data.yaml, degrees=0.0, deterministic=True, device=None, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=36, erasing=0.4, exist_ok=True, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=640, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.0001, lrf=0.01, mask_ratio=4, max_det=300, mixup=0.0, mode=train, model=rtdetr-l.pt, momentum=0.937, mo

100%|██████████| 5.35M/5.35M [00:00<00:00, 23.6MB/s]


[34m[1mAMP: [0mchecks passed 
[34m[1mtrain: [0mFast image access  (ping: 0.10.0 ms, read: 0.20.0 MB/s, size: 1.4 KB)


[34m[1mtrain: [0mScanning C:\Users\Eric Aquino\Documents\RFDETR\cifar10_detection\labels\train... 5000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:08<00:00, 585.00it/s]


[34m[1mtrain: [0mNew cache created: C:\Users\Eric Aquino\Documents\RFDETR\cifar10_detection\labels\train.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 0.20.1 MB/s, size: 1.4 KB)


[34m[1mval: [0mScanning C:\Users\Eric Aquino\Documents\RFDETR\cifar10_detection\labels\val... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:01<00:00, 721.65it/s]


[34m[1mval: [0mNew cache created: C:\Users\Eric Aquino\Documents\RFDETR\cifar10_detection\labels\val.cache
Plotting labels to runs\detect\rtdetr_cifar10\labels.jpg... 
[34m[1moptimizer:[0m AdamW(lr=0.0001, momentum=0.937) with parameter groups 143 weight(decay=0.0), 206 weight(decay=0.0001), 226 bias(decay=0.0)
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns\detect\rtdetr_cifar10[0m
Starting training for 36 epochs...

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       1/36      6.64G     0.3111      2.979     0.3687         16        640: 100%|██████████| 625/625 [04:52<00:00,  2.14it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.44it/s]


                   all       1000       1000    3.4e-05        0.1   0.000286   0.000133

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       2/36      7.07G     0.1894      2.571     0.2261         20        640: 100%|██████████| 625/625 [04:34<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.37it/s]


                   all       1000       1000    0.00295      0.279     0.0089    0.00365

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       3/36      6.99G     0.2092      2.138     0.2676         20        640: 100%|██████████| 625/625 [04:34<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.46it/s]


                   all       1000       1000      0.226       0.29     0.0431      0.018

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       4/36      7.07G     0.2601      1.513     0.3706         24        640: 100%|██████████| 625/625 [09:38<00:00,  1.08it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.37it/s]


                   all       1000       1000      0.272      0.274     0.0751     0.0347

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       5/36      7.06G     0.2712      1.178     0.3959         22        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.45it/s]


                   all       1000       1000      0.295      0.288      0.105     0.0481

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       6/36      7.08G     0.2702      1.056     0.4056         18        640: 100%|██████████| 625/625 [04:34<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.32it/s]


                   all       1000       1000      0.301      0.297     0.0896     0.0421

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       7/36      7.07G      0.271      1.003     0.4067         19        640: 100%|██████████| 625/625 [20:12<00:00,  1.94s/it]    
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.39it/s]


                   all       1000       1000      0.418      0.331      0.176     0.0742

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       8/36      7.06G     0.2678     0.9704     0.4015         21        640: 100%|██████████| 625/625 [04:33<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.43it/s]


                   all       1000       1000      0.348      0.332      0.178     0.0775

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
       9/36      7.07G     0.2697     0.9507     0.4068         23        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.41it/s]


                   all       1000       1000       0.44      0.344      0.188     0.0891

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      10/36      7.07G     0.2669     0.9351     0.4017         19        640: 100%|██████████| 625/625 [04:33<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.43it/s]


                   all       1000       1000      0.406      0.406      0.259      0.124

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      11/36      6.99G     0.2679     0.9115     0.4062         20        640: 100%|██████████| 625/625 [04:34<00:00,  2.27it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.43it/s]


                   all       1000       1000      0.429      0.462      0.323      0.171

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      12/36      6.98G     0.2631     0.8973      0.398         22        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.40it/s]


                   all       1000       1000      0.492       0.48      0.342      0.181

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      13/36      7.07G     0.2616     0.8765     0.3977         16        640: 100%|██████████| 625/625 [23:23<00:00,  2.25s/it]   
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.32it/s]


                   all       1000       1000      0.374      0.469      0.321      0.171

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      14/36      7.07G     0.2642     0.8528     0.4021         21        640: 100%|██████████| 625/625 [04:33<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.45it/s]


                   all       1000       1000      0.529      0.524       0.39      0.175

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      15/36      7.06G     0.2611     0.8424     0.3975         20        640: 100%|██████████| 625/625 [05:39<00:00,  1.84it/s]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.42it/s]


                   all       1000       1000      0.422      0.515      0.402        0.2

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      16/36      6.97G     0.2618     0.8188     0.3975         23        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.41it/s]


                   all       1000       1000      0.479      0.555      0.441      0.239

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      17/36      6.97G     0.2633     0.7991     0.4019         22        640: 100%|██████████| 625/625 [04:36<00:00,  2.26it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.45it/s]


                   all       1000       1000      0.557        0.6      0.492      0.266

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      18/36      6.97G     0.2615     0.7906     0.3967         24        640: 100%|██████████| 625/625 [06:53<00:00,  1.51it/s]  
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.38it/s]


                   all       1000       1000      0.532      0.602      0.488      0.233

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      19/36      6.99G     0.2609     0.7715      0.398         23        640: 100%|██████████| 625/625 [04:45<00:00,  2.19it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.35it/s]


                   all       1000       1000       0.52      0.605      0.482      0.268

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      20/36      6.98G     0.2608     0.7664     0.3985         18        640: 100%|██████████| 625/625 [06:21<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.37it/s]


                   all       1000       1000      0.611      0.639      0.537      0.275

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      21/36      7.07G      0.259     0.7495     0.3911         26        640: 100%|██████████| 625/625 [06:20<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.38it/s]


                   all       1000       1000      0.586      0.596        0.5      0.288

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      22/36      7.07G     0.2602      0.735     0.3936         15        640: 100%|██████████| 625/625 [06:20<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.38it/s]


                   all       1000       1000      0.599      0.639      0.538      0.271

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      23/36      7.07G     0.2591     0.7314     0.3957         25        640: 100%|██████████| 625/625 [06:18<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.32it/s]


                   all       1000       1000      0.622      0.656      0.561      0.313

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      24/36      6.98G     0.2584     0.7134     0.3952         25        640: 100%|██████████| 625/625 [06:19<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.38it/s]


                   all       1000       1000      0.621      0.662      0.569      0.335

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      25/36      7.06G     0.2597     0.7105     0.3937         17        640: 100%|██████████| 625/625 [06:20<00:00,  1.64it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.34it/s]


                   all       1000       1000      0.671      0.664      0.597      0.328

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      26/36      7.07G     0.2588     0.6946     0.3931         24        640: 100%|██████████| 625/625 [06:19<00:00,  1.65it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:18<00:00,  3.35it/s]


                   all       1000       1000      0.655       0.66      0.594      0.333
Closing dataloader mosaic
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, method='weighted_average', num_output_channels=3), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      27/36         7G     0.2199     0.5939     0.5063          8        640: 100%|██████████| 625/625 [06:14<00:00,  1.67it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:19<00:00,  3.27it/s]


                   all       1000       1000      0.717      0.741      0.665      0.333

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      28/36      6.98G     0.2094     0.5428     0.4688          8        640: 100%|██████████| 625/625 [05:23<00:00,  1.93it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.40it/s]


                   all       1000       1000      0.736      0.751      0.691      0.385

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      29/36      6.98G     0.2092     0.5312     0.4693          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.43it/s]


                   all       1000       1000      0.742      0.748      0.688      0.376

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      30/36      7.07G     0.2068      0.518     0.4622          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.39it/s]


                   all       1000       1000      0.763       0.77      0.708      0.414

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      31/36      7.07G     0.2054     0.5039     0.4606          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.43it/s]


                   all       1000       1000      0.752      0.768      0.708      0.416

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      32/36      7.07G     0.2062     0.5017     0.4621          8        640: 100%|██████████| 625/625 [04:33<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.39it/s]


                   all       1000       1000      0.764      0.781      0.722       0.43

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      33/36      7.07G     0.2042     0.4988     0.4524          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.42it/s]


                   all       1000       1000      0.769      0.781      0.724      0.424

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      34/36      7.07G     0.2033     0.4951     0.4567          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.40it/s]


                   all       1000       1000      0.769      0.784      0.726      0.433

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      35/36      6.99G     0.2024       0.49     0.4559          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.39it/s]


                   all       1000       1000      0.766      0.771      0.717      0.426

      Epoch    GPU_mem  giou_loss   cls_loss    l1_loss  Instances       Size


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass
      36/36      6.98G     0.2045     0.4935     0.4584          8        640: 100%|██████████| 625/625 [04:34<00:00,  2.28it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:14<00:00,  4.37it/s]


                   all       1000       1000      0.776      0.779       0.73      0.431

36 epochs completed in 3.900 hours.
Optimizer stripped from runs\detect\rtdetr_cifar10\weights\last.pt, 66.2MB
Optimizer stripped from runs\detect\rtdetr_cifar10\weights\best.pt, 66.2MB

Validating runs\detect\rtdetr_cifar10\weights\best.pt...
Ultralytics 8.3.160  Python-3.13.3 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
rt-detr-l summary: 302 layers, 32,004,290 parameters, 0 gradients, 103.5 GFLOPs


                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 63/63 [00:13<00:00,  4.63it/s]


                   all       1000       1000      0.766      0.786      0.725      0.434
              airplane        103        103      0.654      0.786      0.684      0.386
            automobile         89         89      0.863      0.933      0.915      0.549
                  bird        100        100       0.59       0.68      0.579      0.319
                   cat        103        103      0.672      0.477      0.393      0.244
                  deer         90         90      0.708      0.809      0.686      0.418
                   dog         86         86      0.781      0.674      0.619      0.386
                  frog        112        112      0.813      0.866       0.83      0.497
                 horse        102        102      0.847      0.824      0.801       0.49
                  ship        106        106      0.855      0.896      0.849      0.536
                 truck        109        109      0.872      0.917      0.892      0.511
Speed: 0.2ms preproce

In [5]:
# STEP 5: RT-DETR Model Evaluation
print("📊 Evaluating RT-DETR object detection performance...")

try:
    # Validate the model on the validation set
    validation_results = model.val(
        data=dataset.data_yaml,
        split='val',
        plots=True,
        save_json=True,
        conf=0.25,              # Confidence threshold
        iou=0.6,                # IoU threshold for NMS
        max_det=100,            # Maximum detections per image
        half=False,             # Use FP16 inference
        device='',              # Auto-select device
        verbose=True
    )

    # Display key metrics for object detection
    print("\n📈 Object Detection Results:")

    # Access box metrics
    if hasattr(validation_results, 'box'):
        box_metrics = validation_results.box
        print(f"  - mAP50: {box_metrics.map50:.3f}")
        print(f"  - mAP50-95: {box_metrics.map:.3f}")
        print(f"  - Precision: {box_metrics.mp:.3f}")
        print(f"  - Recall: {box_metrics.mr:.3f}")
        print(f"  - mAP50 (all classes): {box_metrics.maps}")

    # Alternative way to access metrics
    if hasattr(validation_results, 'results_dict'):
        metrics = validation_results.results_dict
        print(f"\n📊 Detailed Metrics:")
        print(f"  - Box Precision: {metrics.get('metrics/precision(B)', 'N/A')}")
        print(f"  - Box Recall: {metrics.get('metrics/recall(B)', 'N/A')}")
        print(f"  - mAP50: {metrics.get('metrics/mAP50(B)', 'N/A')}")
        print(f"  - mAP50-95: {metrics.get('metrics/mAP50-95(B)', 'N/A')}")

    # Plot training results if available
    import matplotlib.pyplot as plt
    import glob

    # Look for training plots
    plot_files = glob.glob(os.path.join('runs/detect/rtdetr_cifar10*', '*.png'))

    if plot_files:
        print(f"\n📊 Training plots saved in: runs/detect/rtdetr_cifar10/")

        # Display results plot
        results_plot = [f for f in plot_files if 'results.png' in f]
        if results_plot:
            img = plt.imread(results_plot[0])
            plt.figure(figsize=(15, 10))
            plt.imshow(img)
            plt.axis('off')
            plt.title('RT-DETR Training Results')
            plt.tight_layout()
            plt.show()

        # Display confusion matrix
        confusion_plot = [f for f in plot_files if 'confusion_matrix.png' in f]
        if confusion_plot:
            img = plt.imread(confusion_plot[0])
            plt.figure(figsize=(12, 10))
            plt.imshow(img)
            plt.axis('off')
            plt.title('RT-DETR Confusion Matrix')
            plt.tight_layout()
            plt.show()

        # Display PR curve
        pr_curve_plot = [f for f in plot_files if 'PR_curve.png' in f]
        if pr_curve_plot:
            img = plt.imread(pr_curve_plot[0])
            plt.figure(figsize=(12, 8))
            plt.imshow(img)
            plt.axis('off')
            plt.title('Precision-Recall Curve')
            plt.tight_layout()
            plt.show()

        # Display F1 curve
        f1_curve_plot = [f for f in plot_files if 'F1_curve.png' in f]
        if f1_curve_plot:
            img = plt.imread(f1_curve_plot[0])
            plt.figure(figsize=(12, 8))
            plt.imshow(img)
            plt.axis('off')
            plt.title('F1-Confidence Curve')
            plt.tight_layout()
            plt.show()

except Exception as e:
    print(f"⚠️ Evaluation error: {e}")
    print("Continuing with manual evaluation...")

    # Manual evaluation using validation images
    print("\n🔄 Performing manual evaluation...")
    try:
        val_images_dir = f"{dataset.location}/images/val"
        val_images = [f for f in os.listdir(val_images_dir) if f.endswith('.jpg')][:10]

        correct_detections = 0
        total_detections = 0

        for img_file in val_images:
            img_path = os.path.join(val_images_dir, img_file)

            # Run detection
            results = model(img_path, conf=0.25, verbose=False)

            if results and len(results) > 0:
                result = results[0]

                # Count detections
                if result.boxes is not None:
                    num_detections = len(result.boxes)
                    total_detections += num_detections

                    # For CIFAR-10, we expect exactly 1 detection per image
                    if num_detections == 1:
                        correct_detections += 1

        if total_detections > 0:
            detection_accuracy = correct_detections / len(val_images)
            avg_detections = total_detections / len(val_images)
            print(f"📊 Manual evaluation results:")
            print(f"  - Images with correct detection count: {correct_detections}/{len(val_images)} ({detection_accuracy:.3f})")
            print(f"  - Average detections per image: {avg_detections:.2f}")

    except Exception as eval_error:
        print(f"❌ Manual evaluation failed: {eval_error}")

print(f"\n🎯 RT-DETR Performance Notes:")
print("  - mAP50: Mean Average Precision at IoU threshold 0.5")
print("  - mAP50-95: Mean Average Precision averaged over IoU thresholds 0.5-0.95")
print("  - Precision: TP / (TP + FP) - How many detections were correct")
print("  - Recall: TP / (TP + FN) - How many objects were detected")
print("  - Good mAP50 for CIFAR-10: > 0.7")
print("  - Good mAP50-95 for CIFAR-10: > 0.4")

📊 Evaluating RT-DETR object detection performance...
Ultralytics 8.3.160  Python-3.13.3 torch-2.7.1+cu118 CUDA:0 (NVIDIA GeForce RTX 4060 Laptop GPU, 8188MiB)
rt-detr-l summary: 302 layers, 32,004,290 parameters, 0 gradients, 103.5 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.00.0 ms, read: 29.311.5 MB/s, size: 1.3 KB)


[34m[1mval: [0mScanning C:\Users\Eric Aquino\Documents\RFDETR\cifar10_detection\labels\val.cache... 1000 images, 0 backgrounds, 0 corrupt: 100%|██████████| 1000/1000 [00:00<?, ?it/s]
                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100%|██████████| 125/125 [00:22<00:00,  5.50it/s]


                   all       1000       1000       0.75      0.782       0.79      0.496
              airplane        103        103      0.594      0.796      0.737      0.432
            automobile         89         89      0.856      0.933      0.936      0.588
                  bird        100        100      0.607       0.68      0.676      0.391
                   cat        103        103      0.681      0.456      0.532      0.355
                  deer         90         90      0.657      0.789      0.729      0.469
                   dog         86         86      0.787      0.686      0.745       0.49
                  frog        112        112      0.817      0.875      0.888      0.551
                 horse        102        102      0.835      0.794      0.846      0.546
                  ship        106        106      0.841      0.896      0.887      0.584
                 truck        109        109      0.826      0.917       0.92      0.552
Speed: 0.2ms preproce

<Figure size 1500x1000 with 1 Axes>

<Figure size 1200x1000 with 1 Axes>

<Figure size 1200x800 with 1 Axes>

<Figure size 1200x800 with 1 Axes>


🎯 RT-DETR Performance Notes:
  - mAP50: Mean Average Precision at IoU threshold 0.5
  - mAP50-95: Mean Average Precision averaged over IoU thresholds 0.5-0.95
  - Precision: TP / (TP + FP) - How many detections were correct
  - Recall: TP / (TP + FN) - How many objects were detected
  - Good mAP50 for CIFAR-10: > 0.7
  - Good mAP50-95 for CIFAR-10: > 0.4


In [6]:
# STEP 6: Visualize RT-DETR Object Detection Predictions
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image, ImageDraw, ImageFont
import os
import random
import cv2

print("🖼️ Running RT-DETR object detection inference and visualization...")

def visualize_detection_predictions(image_path, model, conf_threshold=0.25, iou_threshold=0.6):
    """Run object detection inference and return results"""
    try:
        # Run prediction
        results = model(
            image_path,
            conf=conf_threshold,
            iou=iou_threshold,
            verbose=False
        )

        if results and len(results) > 0:
            result = results[0]

            # Get annotated image
            annotated_img = result.plot(
                line_width=2,
                font_size=12,
                font='Arial.ttf',
                pil=False,  # Return as numpy array
                img=None,
                labels=True,
                boxes=True,
                conf=True
            )

            # Convert BGR to RGB for matplotlib
            if annotated_img is not None:
                annotated_img_rgb = cv2.cvtColor(annotated_img, cv2.COLOR_BGR2RGB)
            else:
                annotated_img_rgb = None

            return annotated_img_rgb, result
        else:
            return None, None

    except Exception as e:
        print(f"❌ Detection error: {e}")
        return None, None

# Get validation images
val_images_dir = os.path.join(dataset.location, "images", "val")
val_labels_dir = os.path.join(dataset.location, "labels", "val")

if not os.path.exists(val_images_dir):
    print(f"❌ Validation directory not found: {val_images_dir}")
else:
    image_files = [f for f in os.listdir(val_images_dir) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]

    if not image_files:
        print("❌ No images found in validation directory")
    else:
        # Select 6 random images for visualization
        sample_images = random.sample(image_files, min(6, len(image_files)))

        plt.figure(figsize=(18, 12))

        detection_results = []

        for i, img_file in enumerate(sample_images):
            image_path = os.path.join(val_images_dir, img_file)

            # Load original image for display
            original_img = Image.open(image_path)

            # Get ground truth from label file
            label_file = img_file.replace('.jpg', '.txt').replace('.jpeg', '.txt').replace('.png', '.txt')
            label_path = os.path.join(val_labels_dir, label_file)

            true_class = "unknown"
            if os.path.exists(label_path):
                with open(label_path, 'r') as f:
                    label_line = f.readline().strip()
                    if label_line:
                        class_id = int(label_line.split()[0])
                        true_class = cifar10_classes[class_id]

            # Run detection
            annotated_img, result = visualize_detection_predictions(image_path, model, conf_threshold=0.25)

            # Plot original image
            plt.subplot(2, 6, i+1)
            plt.imshow(original_img)
            plt.axis('off')
            plt.title(f'Original\nTrue: {true_class}', fontsize=10)

            # Plot detection result
            plt.subplot(2, 6, i+7)
            if annotated_img is not None:
                plt.imshow(annotated_img)
                plt.axis('off')

                # Extract detection info
                detections_info = []
                if result.boxes is not None and len(result.boxes) > 0:
                    for j in range(len(result.boxes)):
                        conf = result.boxes.conf[j].item()
                        cls_id = int(result.boxes.cls[j].item())
                        pred_class = result.names[cls_id]
                        detections_info.append(f"{pred_class} ({conf:.2f})")

                    title = f"RT-DETR Detection\n" + "\n".join(detections_info[:2])  # Show max 2 detections

                    # Color code based on accuracy
                    color = 'green' if any(true_class in det for det in detections_info) else 'red'
                    plt.title(title, fontsize=9, color=color)
                else:
                    plt.title("RT-DETR Detection\nNo objects detected", fontsize=9, color='red')
            else:
                plt.imshow(original_img)
                plt.axis('off')
                plt.title("Detection Failed", fontsize=9, color='red')

            # Store results for analysis
            detection_results.append({
                'image': img_file,
                'true_class': true_class,
                'result': result
            })

        plt.tight_layout()
        plt.suptitle('RT-DETR Object Detection Results on CIFAR-10', fontsize=16, y=0.98)
        plt.show()

        # Analyze detection results
        print(f"\n🔍 Detection Analysis:")
        correct_class_detections = 0
        total_detections = 0
        no_detection_count = 0

        for res in detection_results:
            result = res['result']
            true_class = res['true_class']

            if result and result.boxes is not None and len(result.boxes) > 0:
                # Get the most confident detection
                max_conf_idx = result.boxes.conf.argmax()
                pred_class_id = int(result.boxes.cls[max_conf_idx].item())
                pred_class = result.names[pred_class_id]
                confidence = result.boxes.conf[max_conf_idx].item()

                status = "✅" if pred_class == true_class else "❌"
                print(f"  {status} {res['image']}: True={true_class}, Pred={pred_class} ({confidence:.3f})")

                if pred_class == true_class:
                    correct_class_detections += 1
                total_detections += 1
            else:
                print(f"  ⚪ {res['image']}: True={true_class}, No detection")
                no_detection_count += 1

        # Summary statistics
        total_images = len(detection_results)
        class_accuracy = correct_class_detections / total_images if total_images > 0 else 0
        detection_rate = total_detections / total_images if total_images > 0 else 0

        print(f"\n📊 Detection Summary:")
        print(f"  - Total images: {total_images}")
        print(f"  - Images with detections: {total_detections}")
        print(f"  - Correct class detections: {correct_class_detections}")
        print(f"  - No detections: {no_detection_count}")
        print(f"  - Detection rate: {detection_rate:.3f}")
        print(f"  - Classification accuracy: {class_accuracy:.3f}")

        # Class distribution analysis
        detected_classes = {}
        for res in detection_results:
            result = res['result']
            if result and result.boxes is not None and len(result.boxes) > 0:
                for j in range(len(result.boxes)):
                    cls_id = int(result.boxes.cls[j].item())
                    pred_class = result.names[cls_id]
                    detected_classes[pred_class] = detected_classes.get(pred_class, 0) + 1

        if detected_classes:
            print(f"\n📈 Detected Classes Distribution:")
            for class_name, count in detected_classes.items():
                print(f"  - {class_name}: {count} detections")

🖼️ Running RT-DETR object detection inference and visualization...


<Figure size 1800x1200 with 12 Axes>


🔍 Detection Analysis:
  ✅ val_img_00913.jpg: True=deer, Pred=deer (0.862)
  ✅ val_img_00929.jpg: True=airplane, Pred=airplane (0.844)
  ✅ val_img_00223.jpg: True=deer, Pred=deer (0.702)
  ✅ val_img_00516.jpg: True=ship, Pred=ship (0.881)
  ✅ val_img_00142.jpg: True=frog, Pred=frog (0.853)
  ✅ val_img_00288.jpg: True=horse, Pred=horse (0.572)

📊 Detection Summary:
  - Total images: 6
  - Images with detections: 6
  - Correct class detections: 6
  - No detections: 0
  - Detection rate: 1.000
  - Classification accuracy: 1.000

📈 Detected Classes Distribution:
  - deer: 2 detections
  - airplane: 1 detections
  - ship: 1 detections
  - frog: 1 detections
  - horse: 1 detections


In [7]:
# STEP 7: Batch RT-DETR Object Detection Inference
import time
from pathlib import Path
import json
import csv

def run_batch_detection(input_dir, output_dir, conf_threshold=0.25, iou_threshold=0.6):
    """Run RT-DETR object detection inference on all images in a directory"""
    print(f"🚀 Running batch RT-DETR object detection inference...")

    # Create output directory
    Path(output_dir).mkdir(parents=True, exist_ok=True)

    # Get all image files
    image_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
    image_files = []
    for ext in image_extensions:
        image_files.extend(Path(input_dir).glob(f'**/*{ext}'))  # Recursive search
        image_files.extend(Path(input_dir).glob(f'**/*{ext.upper()}'))

    if not image_files:
        print(f"❌ No images found in {input_dir}")
        return

    print(f"📊 Processing {len(image_files)} images...")

    start_time = time.time()
    results_data = []
    class_counts = {class_name: 0 for class_name in cifar10_classes}
    total_detections = 0

    # Process images
    for i, image_path in enumerate(image_files):
        try:
            # Run RT-DETR detection
            results = model(
                str(image_path),
                conf=conf_threshold,
                iou=iou_threshold,
                verbose=False
            )

            if results and len(results) > 0:
                result = results[0]

                # Process detections
                detections = []
                if result.boxes is not None and len(result.boxes) > 0:
                    for j in range(len(result.boxes)):
                        # Get detection info
                        box = result.boxes.xyxy[j].cpu().numpy()  # [x1, y1, x2, y2]
                        conf = result.boxes.conf[j].item()
                        cls_id = int(result.boxes.cls[j].item())
                        class_name = result.names[cls_id]

                        detection = {
                            'bbox': box.tolist(),
                            'confidence': conf,
                            'class_id': cls_id,
                            'class_name': class_name
                        }
                        detections.append(detection)

                        # Count classes
                        class_counts[class_name] += 1
                        total_detections += 1

                # Store image results
                result_entry = {
                    'image_path': str(image_path),
                    'image_name': image_path.name,
                    'num_detections': len(detections),
                    'detections': detections
                }

                # Try to extract true class from directory structure or filename
                parent_dir = image_path.parent.name
                if parent_dir in cifar10_classes:
                    result_entry['true_class'] = parent_dir

                    # Check if correct class was detected
                    detected_classes = [det['class_name'] for det in detections]
                    result_entry['correct_detection'] = parent_dir in detected_classes

                    # Find best detection for this true class
                    matching_detections = [det for det in detections if det['class_name'] == parent_dir]
                    if matching_detections:
                        best_detection = max(matching_detections, key=lambda x: x['confidence'])
                        result_entry['best_confidence'] = best_detection['confidence']

                results_data.append(result_entry)

            else:
                print(f"⚠️ Failed to process {image_path.name}")
                results_data.append({
                    'image_path': str(image_path),
                    'image_name': image_path.name,
                    'num_detections': 0,
                    'detections': [],
                    'error': 'Detection failed'
                })

        except Exception as e:
            print(f"❌ Error processing {image_path.name}: {e}")
            results_data.append({
                'image_path': str(image_path),
                'image_name': image_path.name,
                'num_detections': 0,
                'detections': [],
                'error': str(e)
            })
            continue

        # Progress update
        if (i + 1) % 50 == 0:
            print(f"  Processed {i + 1}/{len(image_files)} images...")

    processing_time = time.time() - start_time

    # Save results to files
    results_json_path = Path(output_dir) / 'detection_results.json'
    results_csv_path = Path(output_dir) / 'detection_summary.csv'

    # Save detailed JSON results
    with open(results_json_path, 'w') as f:
        json.dump(results_data, f, indent=2)

    # Create CSV summary
    csv_data = []
    for result in results_data:
        csv_row = {
            'image_name': result['image_name'],
            'num_detections': result['num_detections'],
            'true_class': result.get('true_class', 'unknown'),
            'correct_detection': result.get('correct_detection', False),
            'best_confidence': result.get('best_confidence', 0.0),
            'error': result.get('error', '')
        }

        # Add top detection info
        if result['detections']:
            top_detection = max(result['detections'], key=lambda x: x['confidence'])
            csv_row.update({
                'top_class': top_detection['class_name'],
                'top_confidence': top_detection['confidence']
            })
        else:
            csv_row.update({
                'top_class': '',
                'top_confidence': 0.0
            })

        csv_data.append(csv_row)

    # Save CSV
    if csv_data:
        with open(results_csv_path, 'w', newline='') as f:
            writer = csv.DictWriter(f, fieldnames=csv_data[0].keys())
            writer.writeheader()
            writer.writerows(csv_data)

    # Calculate metrics
    correct_detections = sum(1 for r in results_data if r.get('correct_detection', False))
    total_with_labels = sum(1 for r in results_data if 'true_class' in r)
    images_with_detections = sum(1 for r in results_data if r['num_detections'] > 0)

    print(f"\n✅ Batch RT-DETR detection completed!")
    print(f"⏱️ Processing time: {processing_time:.2f} seconds")
    print(f"📊 Total images processed: {len(results_data)}")
    print(f"📁 Results saved in: {output_dir}")
    print(f"⚡ Average: {len(image_files)/processing_time:.1f} images/second")
    print(f"🎯 Total detections: {total_detections}")
    print(f"🔍 Images with detections: {images_with_detections}/{len(results_data)} ({images_with_detections/len(results_data)*100:.1f}%)")

    if total_with_labels > 0:
        accuracy = correct_detections / total_with_labels
        print(f"📈 Classification accuracy: {accuracy:.3f} ({correct_detections}/{total_with_labels})")

    # Show class distribution
    print(f"\n📊 Detected Class Distribution (conf >= {conf_threshold}):")
    for class_name, count in class_counts.items():
        if count > 0:
            percentage = (count / total_detections) * 100 if total_detections > 0 else 0
            print(f"  - {class_name}: {count} ({percentage:.1f}%)")

    return results_data

# Example usage - detect objects in validation images
print("🎯 Example: Running batch detection on validation set...")

# Run on a subset of validation data
val_dir = os.path.join(dataset.location, "images", "val")
if os.path.exists(val_dir):
    # Create a smaller sample for demonstration
    sample_dir = "./temp_detection_sample"
    os.makedirs(sample_dir, exist_ok=True)

    # Copy a few images from validation set
    import shutil
    val_images = [f for f in os.listdir(val_dir) if f.endswith('.jpg')][:20]  # Sample 20 images

    sample_count = 0
    for img in val_images:
        try:
            shutil.copy2(
                os.path.join(val_dir, img),
                os.path.join(sample_dir, img)
            )
            sample_count += 1
        except Exception as e:
            print(f"Error copying {img}: {e}")

    if sample_count > 0:
        print(f"📦 Created sample with {sample_count} images")
        results = run_batch_detection(
            input_dir=sample_dir,
            output_dir="./detection_results",
            conf_threshold=0.25,
            iou_threshold=0.6
        )

        # Clean up sample directory
        shutil.rmtree(sample_dir)

        # Show some statistics
        if results:
            avg_detections = sum(r['num_detections'] for r in results) / len(results)
            max_detections = max(r['num_detections'] for r in results)
            print(f"\n📈 Additional Statistics:")
            print(f"  - Average detections per image: {avg_detections:.2f}")
            print(f"  - Maximum detections in single image: {max_detections}")
    else:
        print("❌ No sample images created for batch processing")
else:
    print("❌ Validation directory not found")

# Uncomment below to run on full validation set
# run_batch_detection(
#     input_dir=os.path.join(dataset.location, "images", "val"),
#     output_dir="./full_detection_results",
#     conf_threshold=0.25,
#     iou_threshold=0.6
# )

🎯 Example: Running batch detection on validation set...
📦 Created sample with 20 images
🚀 Running batch RT-DETR object detection inference...
📊 Processing 40 images...

✅ Batch RT-DETR detection completed!
⏱️ Processing time: 1.23 seconds
📊 Total images processed: 40
📁 Results saved in: ./detection_results
⚡ Average: 32.4 images/second
🎯 Total detections: 44
🔍 Images with detections: 40/40 (100.0%)

📊 Detected Class Distribution (conf >= 0.25):
  - airplane: 6 (13.6%)
  - automobile: 6 (13.6%)
  - cat: 4 (9.1%)
  - deer: 2 (4.5%)
  - dog: 2 (4.5%)
  - frog: 10 (22.7%)
  - horse: 4 (9.1%)
  - ship: 6 (13.6%)
  - truck: 4 (9.1%)

📈 Additional Statistics:
  - Average detections per image: 1.10
  - Maximum detections in single image: 2
