In [2]:
import pycocotools.coco
import pycocotools.cocoeval
import os
import torch
import PIL.Image
import torchvision
import torchvision.transforms
import trt_pose.plugins
import trt_pose.models
import trt_pose.coco
import tqdm
import json
from trt_pose.parse_objects import ParseObjects

NOTE! Installing ujson may make loading annotations faster.


In [3]:
with open('models/human_pose.json', 'r') as f:
    human_pose = json.load(f)

topology = trt_pose.coco.coco_category_to_topology(human_pose)

In [4]:
num_parts = len(human_pose['keypoints'])
num_links = len(human_pose['skeleton'])

In [5]:
model = trt_pose.models.resnet50_baseline_att(num_parts, 2 * num_links)

In [6]:
model.load_state_dict(torch.load('experiments/resnet18_baseline_att_368x368_A.json.checkpoints/epoch_249.pth'))

<All keys matched successfully>

In [7]:
data = torch.zeros((1, 3, 368, 368)).cuda()

In [8]:
model = model.cuda().eval()

In [9]:
cmap, paf = model(torch.zeros((1, 3, 368, 368)).cuda())

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [10]:
cmap.shape


torch.Size([1, 18, 64, 64])

In [11]:
paf.shape

torch.Size([1, 42, 64, 64])

In [12]:
IMAGE_SHAPE = (368, 368)
images_dir = '../../trt_pose/val2017'
annotation_file = 'annotations/person_keypoints_val2017_modified.json'

In [13]:
cocoGtTmp = pycocotools.coco.COCO('../../trt_pose/annotations/person_keypoints_val2017_modified.json')

loading annotations into memory...
Done (t=0.28s)
creating index...
index created!


In [14]:
topology = trt_pose.coco.coco_category_to_topology(cocoGtTmp.cats[1])

In [15]:
cocoGt = pycocotools.coco.COCO('../../trt_pose/annotations/person_keypoints_val2017.json')

loading annotations into memory...
Done (t=0.31s)
creating index...
index created!


In [16]:
catIds = cocoGt.getCatIds('person')
imgIds = cocoGt.getImgIds(catIds=catIds)

In [17]:
transform = torchvision.transforms.Compose([
    torchvision.transforms.ToTensor(),
    torchvision.transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [18]:
parse_objects = ParseObjects(topology, cmap_threshold=0.05, link_threshold=0.1, cmap_window=11, line_integral_samples=7, max_num_parts=100, max_num_objects=100)

In [19]:
results = []

for n, imgId in enumerate(imgIds):
    
    # read image
    img = cocoGt.imgs[imgId]
    img_path = os.path.join(images_dir, img['file_name'])

    image = PIL.Image.open(img_path).convert('RGB').resize(IMAGE_SHAPE)
    data = transform(image).cuda()[None, ...]

    cmap, paf = model(data)
    cmap, paf = cmap.cpu(), paf.cpu()

#     object_counts, objects, peaks, int_peaks = postprocess(cmap, paf, cmap_threshold=0.05, link_threshold=0.01, window=5)
#     object_counts, objects, peaks = int(object_counts[0]), objects[0], peaks[0]
    
    object_counts, objects, peaks = parse_objects(cmap, paf)
    object_counts, objects, peaks = int(object_counts[0]), objects[0], peaks[0]

    for i in range(object_counts):
        object = objects[i]
        score = 0.0
        kps = [0]*(17*3)
        x_mean = 0
        y_mean = 0
        cnt = 0
        for j in range(17):
            k = object[j]
            if k >= 0:
                peak = peaks[j][k]
                x = round(float(img['width'] * peak[1]))
                y = round(float(img['height'] * peak[0]))
                score += 1.0
                kps[j * 3 + 0] = x
                kps[j * 3 + 1] = y
                kps[j * 3 + 2] = 2
                x_mean += x
                y_mean += y
                cnt += 1

        ann = {
            'image_id': imgId,
            'category_id': 1,
            'keypoints': kps,
            'score': score / 17.0
        }
        results.append(ann)
    if n % 100 == 0:
        print('%d / %d' % (n, len(imgIds)))
#     break
        
with open('results.json', 'w') as f:
    json.dump(results, f)

0 / 2693
100 / 2693
200 / 2693
300 / 2693
400 / 2693
500 / 2693
600 / 2693
700 / 2693
800 / 2693
900 / 2693
1000 / 2693
1100 / 2693
1200 / 2693
1300 / 2693
1400 / 2693
1500 / 2693
1600 / 2693
1700 / 2693
1800 / 2693
1900 / 2693
2000 / 2693
2100 / 2693
2200 / 2693
2300 / 2693
2400 / 2693
2500 / 2693
2600 / 2693


In [36]:
with open('results.json', 'w') as f:
    json.dump(results, f)

In [20]:
cocoDt = cocoGt.loadRes('results.json')

Loading and preparing results...
DONE (t=0.39s)
creating index...
index created!


In [21]:
cocoEval = pycocotools.cocoeval.COCOeval(cocoGt, cocoDt, 'keypoints')
cocoEval.params.imgIds = imgIds
cocoEval.params.catIds = [1]
cocoEval.evaluate()
cocoEval.accumulate()
cocoEval.summarize()

Running per image evaluation...
Evaluate annotation type *keypoints*
DONE (t=4.52s).
Accumulating evaluation results...
DONE (t=0.09s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.24617
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.45277
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.23746
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.12129
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.42217
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 20 ] = 0.32032
 Average Recall     (AR) @[ IoU=0.50      | area=   all | maxDets= 20 ] = 0.49795
 Average Recall     (AR) @[ IoU=0.75      | area=   all | maxDets= 20 ] = 0.32950
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets= 20 ] = 0.13201
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets= 20 ] = 0.57800
