In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from pycocotools.coco import COCO
import numpy as np
import skimage.io as io
import matplotlib.pyplot as plt
import pylab
pylab.rcParams['figure.figsize'] = (8.0, 10.0)

In [None]:
import os

homeDir = os.environ["HOME"]
dataDir = f'{homeDir}/Data/coco2017'
imageDir = f'{dataDir}/train2017'
annFile = f'{dataDir}/annotations/instances_train2017.json'
print(annFile)

In [None]:
# initialize COCO api for instance annotations
coco=COCO(annotation_file=annFile)

In [None]:
# display COCO categories and supercategories
cats = coco.loadCats(coco.getCatIds())
id_to_cat = {cat['id']: cat for cat in cats}
print(id_to_cat)

In [None]:
# get all images containing given categories, select one at random
catIds = coco.getCatIds(catNms=['person','dog','skateboard'])
imgIds = coco.getImgIds(catIds=catIds)
img = coco.loadImgs(imgIds[np.random.randint(0,len(imgIds))])[0]
print(img)

In [None]:
# load and display image
imgFile = img['file_name']
I = io.imread(f'{imageDir}/{imgFile}')
plt.axis('off')
plt.imshow(I)
plt.show()

In [None]:
# load and display instance annotations
plt.imshow(I); plt.axis('off')
annIds = coco.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco.loadAnns(annIds)
for item in anns:
    for k, v in item.items():
        print(k)
    print(item['bbox'])
    print(item['category_id'])
coco.showAnns(anns)

In [None]:
# initialize COCO api for person keypoints annotations
annFile = f'{dataDir}/annotations/person_keypoints_val2017.json'
coco_kps=COCO(annFile)

In [None]:
# load and display keypoints annotations
plt.imshow(I); plt.axis('off')
ax = plt.gca()
annIds = coco_kps.getAnnIds(imgIds=img['id'], catIds=catIds, iscrowd=None)
anns = coco_kps.loadAnns(annIds)
coco_kps.showAnns(anns)

In [None]:
# initialize COCO api for caption annotations
annFile = f'{dataDir}/annotations/captions_train2017.json'
coco_caps=COCO(annFile)

In [None]:
# load and display caption annotations
annIds = coco_caps.getAnnIds(imgIds=img['id']);
anns = coco_caps.loadAnns(annIds)
coco_caps.showAnns(anns)
plt.imshow(I); plt.axis('off'); plt.show()

In [None]:
from transformers import DetrImageProcessor, DetrForObjectDetection
import torch
from PIL import Image
import requests

if torch.cuda.is_available():
    device = torch.device('cuda')
elif torch.backends.mps.is_available():
    device = torch.device('mps')
else:
    device = torch.device('cpu')

image = I

# you can specify the revision tag if you don't want the timm dependency
processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-101")
model = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-101").to(device)

inputs = processor(images=image).to(device)
outputs = model(**inputs)

# convert outputs (bounding boxes and class logits) to COCO API
# let's only keep detections with score > 0.9
target_sizes = torch.tensor([image.size[::-1]])
results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.9)[0]

for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
    box = [round(i, 2) for i in box.tolist()]
    print(
            f"Detected {model.config.id2label[label.item()]} with confidence "
            f"{round(score.item(), 3)} at location {box}"
    )
