In [1]:
from transformers import DetrImageProcessor, DetrForObjectDetection
from PIL import Image, ImageDraw, ImageFont
import requests
import torch

In [2]:
image = Image.open("test_imgs/Lionel-Messi-Kylian-Mbappe-Paris-gagnant.jpg")

In [3]:
# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.95
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.95)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )

Detected person with confidence 0.993 at location [658.68, 480.52, 877.11, 1129.2]
Detected person with confidence 0.995 at location [32.29, 471.81, 449.33, 1148.73]
Detected person with confidence 0.999 at location [766.33, 186.6, 1330.44, 1316.73]
Detected person with confidence 0.968 at location [1081.24, 119.68, 1284.37, 455.65]
Detected person with confidence 1.0 at location [1247.76, 102.37, 1765.05, 1292.22]


In [4]:
# Créer un objet de dessin pour l'image
draw = ImageDraw.Draw(image)

# Optionnel : Définir une police pour le texte (si disponible)
try:
    font = ImageFont.truetype("arial.ttf", size=15)
except IOError:
    font = ImageFont.load_default()

# Parcourir les détections et dessiner les boîtes
for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    # Convertir les coordonnées des boîtes en entiers
    box = [round(i, 2) for i in box.tolist()]
    x_min, y_min, x_max, y_max = map(int, box)

    # Dessiner le rectangle
    draw.rectangle([x_min, y_min, x_max, y_max], outline="red", width=3)

    # Ajouter le label et le score
    label_text = f"{model.config.id2label[label.item()]}: {round(score.item(), 2)}"
    
    # Calculer les dimensions du texte (bbox)
    text_bbox = draw.textbbox((0, 0), label_text, font=font)
    text_width = text_bbox[2] - text_bbox[0]  # Largeur du texte
    text_height = text_bbox[3] - text_bbox[1]  # Hauteur du texte

    # Dessiner le fond du texte (rectangle rouge derrière le texte)
    text_bg = [x_min, y_min - text_height, x_min + text_width, y_min]
    
    draw.rectangle(text_bg, fill="red")

    # Dessiner le texte
    draw.text((x_min, y_min - text_height), label_text, fill="white", font=font)

# Afficher ou enregistrer l'image annotée
image.show()  # Ouvrir dans une visionneuse d'images
#image.save("output_image.jpg")  # Sauvegarder l'image annotée
