In [1]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests
import pickle

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
filename = "../test_image.jpg"
image = Image.open(filename)

In [4]:
# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50", revision="no_timm")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50", revision="no_timm")

inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)

In [5]:
# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )

Detected bottle with confidence 0.969 at location [1007.24, 603.93, 1382.07, 1287.15]
Detected bottle with confidence 0.963 at location [715.08, 562.39, 1017.66, 1226.52]
Detected cup with confidence 0.975 at location [1392.92, 468.19, 1720.44, 1240.55]


In [10]:
# Goal: store objects along with maximum perceived confidences in a database

In [None]:
model.config.id2label[label.item()]

In [8]:
object_labels = [model.config.id2label[label.item()] for label in results['labels']]

In [15]:
found_objects = dict(zip(object_labels, results['scores'].tolist()))

In [16]:
found_objects

{'bottle': 0.9630879759788513, 'cup': 0.9751743674278259}

In [17]:
known_objects = {'bottle': .5, 'cup': .99}

In [18]:
for object in found_objects:
    if object in known_objects:
        if found_objects[object] > known_objects[object]:
            known_objects[object] = found_objects[object]
    else:
        known_objects[object] = found_objects[object]

In [19]:
found_objects

{'bottle': 0.9630879759788513, 'cup': 0.9751743674278259}

In [20]:
known_objects

{'bottle': 0.9630879759788513, 'cup': 0.99}

In [21]:
with open('object_database.pickle', 'wb') as f:
    pickle.dump(known_objects, f)

In [22]:
with open('object_database.pickle', 'rb') as f:
    loaded_objects = pickle.load(f)