In [None]:
import fiftyone as fo
import supervision as sv
from ultralytics import YOLO
from PIL import Image
from transformers import AutoProcessor, AutoModelForObjectDetection
import torch

In [None]:
# Get image from V51 dataset
dataset = fo.load_dataset("fisheye8k-100")
sample = dataset.first()
img_filepath = sample.filepath
image = Image.open(img_filepath)

In [None]:
# Inference YOLO
model = YOLO("yolo12n.pt")
result = model(image)[0]

In [None]:
# Inference DETR
processor = AutoProcessor.from_pretrained("facebook/detr-resnet-50")
model = AutoModelForObjectDetection.from_pretrained("facebook/detr-resnet-50")

inputs = processor(images=image, return_tensors="pt")

with torch.no_grad():
    outputs = model(**inputs)

width, height = image.size
target_size = torch.tensor([[height, width]])
results = processor.post_process_object_detection(
    outputs=outputs, target_sizes=target_size)[0]

In [None]:
# Load results into roboflow
detections_ultralytics = sv.Detections.from_ultralytics(result)
detections_hf = sv.Detections.from_transformers(
    transformers_results=results,
    id2label=model.config.id2label)

# Combine detections
detections = sv.Detections.merge([detections_ultralytics, detections_hf])


In [None]:
# Add detections to image
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()

annotated_image = box_annotator.annotate(
    scene=image, detections=detections)
annotated_image = label_annotator.annotate(
    scene=annotated_image, detections=detections)

# Visualize in Notebook
annotated_image