In [2]:
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog
import torch
import os, json, cv2, random


In [8]:
# https://github.com/facebookresearch/detectron2/blob/main/configs/COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-Keypoints/keypoint_rcnn_R_50_FPN_3x.yaml")
cfg.MODEL.DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
model=DefaultPredictor(cfg)

In [9]:
from detectron2.modeling import build_model
model = build_model(cfg)
from detectron2.checkpoint import DetectionCheckpointer
DetectionCheckpointer(model).load(cfg.MODEL.WEIGHTS)  # load a file, usually from cfg.MODEL.WEIGHTS


{'__author__': 'Detectron2 Model Zoo'}

In [36]:
img=cv2.imread("./input/input.jpg")
import detectron2.data.transforms as T
original_image=img
model.eval()
input_format = cfg.INPUT.FORMAT
aug = T.ResizeShortestEdge(
            [cfg.INPUT.MIN_SIZE_TEST, cfg.INPUT.MIN_SIZE_TEST], cfg.INPUT.MAX_SIZE_TEST
        )
with torch.no_grad():  # https://github.com/sphinx-doc/sphinx/issues/4258
    # Apply pre-processing to image.
    if input_format == "RGB":
        # whether the model expects BGR inputs or RGB
        original_image = original_image[:, :, ::-1]
    height, width = original_image.shape[:2]
    image = aug.get_transform(original_image).apply_image(original_image)
    image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))
    image.to(cfg.MODEL.DEVICE)

    inputs = {"image": image, "height": height, "width": width}

    predictions = model([inputs])[0]   
v = Visualizer(img[:, :, ::-1], MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), scale=1.2)
out = v.draw_instance_predictions(predictions["instances"].to("cpu"))
output_path="./output/output_img2.jpg"
print(predictions['instances'])
cv2.imwrite(output_path, out.get_image()[:, :, ::-1])

Instances(num_instances=12, image_height=480, image_width=640, fields=[pred_boxes: Boxes(tensor([[253.6294, 166.8539, 337.8562, 410.6996],
        [113.9885, 267.3944, 150.8945, 397.5751],
        [ 49.9475, 274.8294,  80.3438, 347.1398],
        [386.3582, 272.1289, 412.8902, 302.1226],
        [561.4410, 271.2314, 597.4069, 380.3328],
        [  1.7791, 281.9361,  76.7597, 478.0888],
        [522.6172, 280.9470, 562.6262, 383.9173],
        [404.0400, 273.3606, 461.3900, 347.6411],
        [353.9471, 269.3989, 385.4014, 298.6602],
        [341.6039, 268.6259, 363.2268, 299.0634],
        [593.4145, 268.4577, 614.2204, 314.5610],
        [509.0113, 286.8092, 534.0632, 338.4181]], device='cuda:0')), scores: tensor([0.9973, 0.9967, 0.9888, 0.9843, 0.9800, 0.9798, 0.9703, 0.9548, 0.9207,
        0.7551, 0.5568, 0.5180], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='cuda:0'), pred_keypoints: tensor([[[2.9395e+02, 1.9352e+02, 1.3941e+00],
         [2.

True