In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [None]:
import os

homeDir = os.environ["HOME"]
dataDir = f'{homeDir}/Data/coco2017'
imageDir = f'{dataDir}/train2017'
annFile = f'{dataDir}/annotations/instances_train2017.json'

In [None]:
# initialize COCO api for instance annotations
coco=COCO(annotation_file=annFile)

In [None]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
id_to_cat = {cat['id']: cat for cat in cats}

In [None]:
# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['person','dog','skateboard'])
imgIds = coco.getImgIds(catIds=catIds)
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]

In [None]:
# load and display image
from PIL import Image

imgFile = img['file_name']
I = Image.open(f'{imageDir}/{imgFile}')
plt.axis('off')
plt.imshow(I)
plt.show()

In [None]:
import matplotlib as mpl

fig, ax = plt.subplots()
ax.imshow(I)
annIds = coco.getAnnIds(imgIds=img['id'])
anns = coco.loadAnns(annIds)
for item in anns:
    x0, y0, width, height = item['bbox']
    ax.add_patch(
        mpl.patches.Rectangle(
            (x0, y0),
            width ,
            height ,
            fill=False,
            edgecolor=(0, 1, 0),
            linewidth=1,
            linestyle=':',
        )
    )
    ax.text(
        x0, y0, f"{item['category_id']}",
        size=10, family="sans-serif",
        bbox={
            "facecolor": "gray", "alpha": 0.5,
            "pad": 0.7, "edgecolor": "none"
        },
        verticalalignment="top",
        color=(1, 1, 1),
        zorder=10,
    )

In [None]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

if torch.cuda.is_available():
    print('Using GPU')
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    print('Using MPS')
    device = torch.device('mps')
else:
    print('Using CPU')
    device = torch.device('cpu')

image = I

with torch.no_grad():
    processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101")
    model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101").to(device)
    inputs = processor(images=image, return_tensors="pt").to(device)
    for k, v in inputs.items():
        print(f"{k}: {v.shape}")
    outputs = model(**inputs)
    # convert outputs (bounding boxes and class logits) to COCO API
    # let's only keep detections with score > 0.9
    target_sizes = torch.tensor([image.size[::-1]])
    results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]
    # display the results
    fig, ax = plt.subplots()
    ax.imshow(I)
    for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
        box = [round(i, 2) for i in box.tolist()]
        x0, y0, x1, y1 = box
        width, height = x1 - x0, y1 - y0
        ax.add_patch(
            mpl.patches.Rectangle(
                (x0, y0),
                width ,
                height ,
                fill=False,
                edgecolor=(1, 0, 0),
                linewidth=3,
                linestyle=':',
            )
        )
        ax.text(
            x0, y0, f"{model.config.id2label[label.item()]} (score:{round(score.item(), 3)})",
            size=10, family="sans-serif",
            bbox={
                "facecolor": "gray", "alpha": 0.5,
                "pad": 0.7, "edgecolor": "none"
            },
            verticalalignment="top",
            color=(1, 1, 1),
            zorder=10)