In [10]:
def coco_category_to_topology(coco_category):
    """Gets topology tensor from a COCO category
    """
    skeleton = coco_category['skeleton']
    K = len(skeleton)
    topology = torch.zeros((K, 4)).int()
    for k in range(K):
        topology[k][0] = 2 * k
        topology[k][1] = 2 * k + 1
        topology[k][2] = skeleton[k][0] - 1
        topology[k][3] = skeleton[k][1] - 1
    return topology


def coco_category_to_parts(coco_category):
    """Gets list of parts name from a COCO category
    """
    return coco_category['keypoints']

def coco_annotations_to_tensors(coco_annotations,
                                image_shape,
                                parts,
                                topology,
                                max_count=100):
    """Gets tensors corresponding to peak counts, peak coordinates, and peak to peak connections
    """
    annotations = coco_annotations
    C = len(parts)
    K = topology.shape[0]
    M = max_count
    IH = image_shape[0]
    IW = image_shape[1]
    counts = torch.zeros((C)).int()
    peaks = torch.zeros((C, M, 2)).float()
    visibles = torch.zeros((len(annotations), C)).int()
    connections = -torch.ones((K, 2, M)).int()

    for ann_idx, ann in enumerate(annotations):

        kps = ann['keypoints']

        # add visible peaks
        for c in range(C):

            x = kps[c * 3]
            y = kps[c * 3 + 1]
            visible = kps[c * 3 + 2]

            if visible:
                peaks[c][counts[c]][0] = (float(y) + 0.5) / (IH + 1.0)
                peaks[c][counts[c]][1] = (float(x) + 0.5) / (IW + 1.0)
                counts[c] = counts[c] + 1
                visibles[ann_idx][c] = 1

        for k in range(K):
            c_a = topology[k][2]
            c_b = topology[k][3]
            if visibles[ann_idx][c_a] and visibles[ann_idx][c_b]:
                connections[k][0][counts[c_a] - 1] = counts[c_b] - 1
                connections[k][1][counts[c_b] - 1] = counts[c_a] - 1

    return counts, peaks, connections

In [11]:
import json
import torch
import tqdm

min_area=0.0
max_area=1.0
max_part_count = 100

annotations_file = r"D:\person_keypoints_val2017_modified.json"
category_name = "person"
use_crowd=False




with open(annotations_file, 'r') as f:
    data = json.load(f)

cat = [c for c in data['categories'] if c['name'] == category_name][0]
print(cat)
print("")
cat_id = cat['id']


# 目的是包成 => 397133: {'license': 4, 'file_name': '000000397133.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg', 'height': 427, 'width': 640, 'date_captured': '2013-11-14 17:02:52', 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg', 'id': 397133}
img_map = {}
for img in data['images']:
    img_map[img['id']] = img
    

    


{'supercategory': 'person', 'id': 1, 'name': 'person', 'keypoints': ['nose', 'left_eye', 'right_eye', 'left_ear', 'right_ear', 'left_shoulder', 'right_shoulder', 'left_elbow', 'right_elbow', 'left_wrist', 'right_wrist', 'left_hip', 'right_hip', 'left_knee', 'right_knee', 'left_ankle', 'right_ankle', 'neck'], 'skeleton': [[16, 14], [14, 12], [17, 15], [15, 13], [12, 13], [6, 8], [7, 9], [8, 10], [9, 11], [2, 3], [1, 2], [1, 3], [2, 4], [3, 5], [4, 6], [5, 7], [18, 1], [18, 6], [18, 7], [18, 12], [18, 13]]}



In [12]:
samples = {}
count = 0
c = 0
for ann in data['annotations']:

    # filter by category
    if ann['category_id'] != cat_id:
        continue

    # filter by crowd
    if not use_crowd and ann['iscrowd']:
        continue

    img_id = ann['image_id']
    img = img_map[img_id]    
    height = img['height']
    width = img['width']    
    area = ann['area']
    

#     print(img_id)
#     print(img)
#     print(height)
#     print(width)
#     print(area)
#     print("")


    # 複製圖片
    new_img_id = -img_id   
    new_img = img    
    new_height = height
    new_width = width   
    new_area = area

    
    # filter by object area
    normalized_area = float(area) / float(height * width)
    if normalized_area < min_area or normalized_area > max_area:
        continue
    
   
    # add metadata
    if img_id not in samples:
        c = c + 2
        sample = {}
        
        sample['img'] = img
        sample['anns'] = [ann]
        
        samples[img_id] = sample        
        samples[new_img_id] = sample


        
    
    # 當同張圖片出現兩個骨架以上時
    else:
        c = c + 2        
        
#         print("detect")
#         print(img_id)
#         print("before")
#         print(samples[img_id]['anns'])
#         print([ann])
#         print("after")
        
    
        samples[img_id]['anns'] += [ann]    
        samples[new_img_id]['anns'] += [ann]
        
#         print(samples[img_id]['anns'])
#         print("end\n")

    
       

In [13]:
# generate tensors
topology = coco_category_to_topology(cat)
parts = coco_category_to_parts(cat)


N = len(samples)
C = len(parts)
K = topology.shape[0]
M = max_part_count

#print([N,C,K,M])

print("total 圖片數量:",N)
print("total 骨架標記數量:",c)

total 圖片數量: 5386
total 骨架標記數量: 21554


In [103]:
print('Generating intermediate tensors...')


counts = torch.zeros((N, C), dtype=torch.int32)
peaks = torch.zeros((N, C, M, 2), dtype=torch.float32)
connections = torch.zeros((N, K, 2, M), dtype=torch.int32)
filenames = []
samples2 = []

for i, sample in tqdm.tqdm(enumerate(samples.values())):
    filename = sample['img']['file_name']   
    filenames.append(filename)
    image_shape = (sample['img']['height'], sample['img']['width'])
    counts_i, peaks_i, connections_i = coco_annotations_to_tensors(
        sample['anns'], image_shape, parts, topology)
    counts[i] = counts_i
    peaks[i] = peaks_i
    connections[i] = connections_i
    samples2 += [sample]
    
print(filenames)

Generating intermediate tensors...


5386it [00:33, 162.77it/s]

000000425226.jpg





In [105]:
print(filenames[1])

000000425226.jpg


In [123]:
print(samples2[0:2]) 

[{'img': {'license': 1, 'file_name': '000000425226.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000425226.jpg', 'height': 640, 'width': 480, 'date_captured': '2013-11-14 21:48:51', 'flickr_url': 'http://farm5.staticflickr.com/4055/4546463824_bc40e0752b_z.jpg', 'id': 425226}, 'anns': [{'segmentation': [[125.12, 539.69, 140.94, 522.43, 100.67, 496.54, 84.85, 469.21, 73.35, 450.52, 104.99, 342.65, 168.27, 290.88, 179.78, 288, 189.84, 286.56, 191.28, 260.67, 202.79, 240.54, 221.48, 237.66, 248.81, 243.42, 257.44, 256.36, 253.12, 262.11, 253.12, 275.06, 299.15, 233.35, 329.35, 207.46, 355.24, 206.02, 363.87, 206.02, 365.3, 210.34, 373.93, 221.84, 363.87, 226.16, 363.87, 237.66, 350.92, 237.66, 332.22, 234.79, 314.97, 249.17, 271.82, 313.89, 253.12, 326.83, 227.24, 352.72, 214.29, 357.03, 212.85, 372.85, 208.54, 395.87, 228.67, 414.56, 245.93, 421.75, 266.07, 424.63, 276.13, 437.57, 266.07, 450.52, 284.76, 464.9, 286.2, 479.28, 291.96, 489.35, 310.65, 512.36, 284.76, 549.75, 2

In [115]:
print(samples2[0]['img']) 
print(samples2[1]['img'])
print(samples2[2]['img']) 

{'license': 1, 'file_name': '000000425226.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000425226.jpg', 'height': 640, 'width': 480, 'date_captured': '2013-11-14 21:48:51', 'flickr_url': 'http://farm5.staticflickr.com/4055/4546463824_bc40e0752b_z.jpg', 'id': 425226}
{'license': 1, 'file_name': '000000425226.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000425226.jpg', 'height': 640, 'width': 480, 'date_captured': '2013-11-14 21:48:51', 'flickr_url': 'http://farm5.staticflickr.com/4055/4546463824_bc40e0752b_z.jpg', 'id': 425226}
{'license': 3, 'file_name': '000000440475.jpg', 'coco_url': 'http://images.cocodataset.org/val2017/000000440475.jpg', 'height': 427, 'width': 640, 'date_captured': '2013-11-14 22:53:18', 'flickr_url': 'http://farm1.staticflickr.com/54/186534636_d975224e88_z.jpg', 'id': 440475}


In [121]:
print(samples2[0]["anns"]) 
print(samples2[1]['anns']) 
print(samples2[2]['anns']) 

[{'segmentation': [[125.12, 539.69, 140.94, 522.43, 100.67, 496.54, 84.85, 469.21, 73.35, 450.52, 104.99, 342.65, 168.27, 290.88, 179.78, 288, 189.84, 286.56, 191.28, 260.67, 202.79, 240.54, 221.48, 237.66, 248.81, 243.42, 257.44, 256.36, 253.12, 262.11, 253.12, 275.06, 299.15, 233.35, 329.35, 207.46, 355.24, 206.02, 363.87, 206.02, 365.3, 210.34, 373.93, 221.84, 363.87, 226.16, 363.87, 237.66, 350.92, 237.66, 332.22, 234.79, 314.97, 249.17, 271.82, 313.89, 253.12, 326.83, 227.24, 352.72, 214.29, 357.03, 212.85, 372.85, 208.54, 395.87, 228.67, 414.56, 245.93, 421.75, 266.07, 424.63, 276.13, 437.57, 266.07, 450.52, 284.76, 464.9, 286.2, 479.28, 291.96, 489.35, 310.65, 512.36, 284.76, 549.75, 244.49, 522.43, 215.73, 546.88, 199.91, 558.38, 204.22, 565.57, 189.84, 568.45, 184.09, 575.64, 172.58, 578.52, 145.26, 567.01, 117.93, 551.19, 133.75, 532.49]], 'num_keypoints': 10, 'area': 47803.27955, 'iscrowd': 0, 'keypoints': [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 142, 309, 1, 177, 320, 