# Loading packages

In [1]:
import json
import os
import cv2
import zipfile
import requests
import numpy as np
import shutil

np.random.seed(10)

# Downloading annotations and dataset

In [2]:
DATASET_URL = "http://images.cocodataset.org/zips/val2017.zip"
ANNOTATION_URL = "http://images.cocodataset.org/annotations/annotations_trainval2017.zip"
SAMPLED_SIZE = 100

In [3]:
def download_zip(url, path):
    response = requests.get(url)
    with open(path, "wb") as f:
        f.write(response.content)
    zip_ref = zipfile.ZipFile(path, "r")
    zip_ref.extractall()


download_zip(DATASET_URL, "val2017.zip")
download_zip(ANNOTATION_URL, "annotations_trainval2017.zip")

In [4]:
def load_json(path):
    with open(path) as f:
        data = json.load(f)
    return data

data_detection = load_json('annotations/instances_val2017.json')
data_detection.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [5]:
data_detection['info']

{'description': 'COCO 2017 Dataset',
 'url': 'http://cocodataset.org',
 'version': '1.0',
 'year': 2017,
 'contributor': 'COCO Consortium',
 'date_created': '2017/09/01'}

In [6]:
data_detection['categories']

[{'supercategory': 'person', 'id': 1, 'name': 'person'},
 {'supercategory': 'vehicle', 'id': 2, 'name': 'bicycle'},
 {'supercategory': 'vehicle', 'id': 3, 'name': 'car'},
 {'supercategory': 'vehicle', 'id': 4, 'name': 'motorcycle'},
 {'supercategory': 'vehicle', 'id': 5, 'name': 'airplane'},
 {'supercategory': 'vehicle', 'id': 6, 'name': 'bus'},
 {'supercategory': 'vehicle', 'id': 7, 'name': 'train'},
 {'supercategory': 'vehicle', 'id': 8, 'name': 'truck'},
 {'supercategory': 'vehicle', 'id': 9, 'name': 'boat'},
 {'supercategory': 'outdoor', 'id': 10, 'name': 'traffic light'},
 {'supercategory': 'outdoor', 'id': 11, 'name': 'fire hydrant'},
 {'supercategory': 'outdoor', 'id': 13, 'name': 'stop sign'},
 {'supercategory': 'outdoor', 'id': 14, 'name': 'parking meter'},
 {'supercategory': 'outdoor', 'id': 15, 'name': 'bench'},
 {'supercategory': 'animal', 'id': 16, 'name': 'bird'},
 {'supercategory': 'animal', 'id': 17, 'name': 'cat'},
 {'supercategory': 'animal', 'id': 18, 'name': 'dog'},

In [7]:
data_detection['images'][0]

{'license': 4,
 'file_name': '000000397133.jpg',
 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg',
 'height': 427,
 'width': 640,
 'date_captured': '2013-11-14 17:02:52',
 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg',
 'id': 397133}

In [8]:
data_detection['annotations'][0]

{'segmentation': [[510.66,
   423.01,
   511.72,
   420.03,
   510.45,
   416.0,
   510.34,
   413.02,
   510.77,
   410.26,
   510.77,
   407.5,
   510.34,
   405.16,
   511.51,
   402.83,
   511.41,
   400.49,
   510.24,
   398.16,
   509.39,
   397.31,
   504.61,
   399.22,
   502.17,
   399.64,
   500.89,
   401.66,
   500.47,
   402.08,
   499.09,
   401.87,
   495.79,
   401.98,
   490.59,
   401.77,
   488.79,
   401.77,
   485.39,
   398.58,
   483.9,
   397.31,
   481.56,
   396.35,
   478.48,
   395.93,
   476.68,
   396.03,
   475.4,
   396.77,
   473.92,
   398.79,
   473.28,
   399.96,
   473.49,
   401.87,
   474.56,
   403.47,
   473.07,
   405.59,
   473.39,
   407.71,
   476.68,
   409.41,
   479.23,
   409.73,
   481.56,
   410.69,
   480.4,
   411.85,
   481.35,
   414.93,
   479.86,
   418.65,
   477.32,
   420.03,
   476.04,
   422.58,
   479.02,
   422.58,
   480.29,
   423.01,
   483.79,
   419.93,
   486.66,
   416.21,
   490.06,
   415.57,
   492.18,
   416.85,

In [9]:
len(data_detection['images']), len(data_detection['annotations'])

(5000, 36781)

In [10]:
data_pose = load_json('annotations/person_keypoints_val2017.json')
data_pose.keys()

dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])

In [11]:
data_pose['categories']

[{'supercategory': 'person',
  'id': 1,
  'name': 'person',
  'keypoints': ['nose',
   'left_eye',
   'right_eye',
   'left_ear',
   'right_ear',
   'left_shoulder',
   'right_shoulder',
   'left_elbow',
   'right_elbow',
   'left_wrist',
   'right_wrist',
   'left_hip',
   'right_hip',
   'left_knee',
   'right_knee',
   'left_ankle',
   'right_ankle'],
  'skeleton': [[16, 14],
   [14, 12],
   [17, 15],
   [15, 13],
   [12, 13],
   [6, 12],
   [7, 13],
   [6, 7],
   [6, 8],
   [7, 9],
   [8, 10],
   [9, 11],
   [2, 3],
   [1, 2],
   [1, 3],
   [2, 4],
   [3, 5],
   [4, 6],
   [5, 7]]}]

In [12]:
data_pose['images'][0]

{'license': 4,
 'file_name': '000000397133.jpg',
 'coco_url': 'http://images.cocodataset.org/val2017/000000397133.jpg',
 'height': 427,
 'width': 640,
 'date_captured': '2013-11-14 17:02:52',
 'flickr_url': 'http://farm7.staticflickr.com/6116/6255196340_da26cf2c9e_z.jpg',
 'id': 397133}

In [13]:
data_pose['annotations'][0]

{'segmentation': [[125.12,
   539.69,
   140.94,
   522.43,
   100.67,
   496.54,
   84.85,
   469.21,
   73.35,
   450.52,
   104.99,
   342.65,
   168.27,
   290.88,
   179.78,
   288,
   189.84,
   286.56,
   191.28,
   260.67,
   202.79,
   240.54,
   221.48,
   237.66,
   248.81,
   243.42,
   257.44,
   256.36,
   253.12,
   262.11,
   253.12,
   275.06,
   299.15,
   233.35,
   329.35,
   207.46,
   355.24,
   206.02,
   363.87,
   206.02,
   365.3,
   210.34,
   373.93,
   221.84,
   363.87,
   226.16,
   363.87,
   237.66,
   350.92,
   237.66,
   332.22,
   234.79,
   314.97,
   249.17,
   271.82,
   313.89,
   253.12,
   326.83,
   227.24,
   352.72,
   214.29,
   357.03,
   212.85,
   372.85,
   208.54,
   395.87,
   228.67,
   414.56,
   245.93,
   421.75,
   266.07,
   424.63,
   276.13,
   437.57,
   266.07,
   450.52,
   284.76,
   464.9,
   286.2,
   479.28,
   291.96,
   489.35,
   310.65,
   512.36,
   284.76,
   549.75,
   244.49,
   522.43,
   215.73,
   546.88,
  

In [14]:
len(data_pose['images']), len(data_pose['annotations'])

(5000, 11004)

# Getting people bounding boxes

In [15]:
people_annotations = {}
for detec in data_pose['annotations']:
    category_id = detec['category_id']
    if category_id == 1 and detec['num_keypoints'] >= 7:
        image_id = detec['image_id']
        if image_id not in people_annotations:
            people_annotations[image_id] = []
        people_annotations[image_id].append(detec)
len(people_annotations)

2101

In [16]:
sampled_people_images = np.random.choice(list(people_annotations.keys()), SAMPLED_SIZE, replace=False).tolist()
len(sampled_people_images)

100

In [17]:
samples_images_info = {}
for image in data_pose['images']:
    if image['id'] in sampled_people_images:
        samples_images_info[image['id']] = image

len(samples_images_info)

100

In [18]:
samples_images_info[sampled_people_images[0]]

{'license': 4,
 'file_name': '000000574702.jpg',
 'coco_url': 'http://images.cocodataset.org/val2017/000000574702.jpg',
 'height': 500,
 'width': 333,
 'date_captured': '2013-11-15 12:24:51',
 'flickr_url': 'http://farm3.staticflickr.com/2035/2252476376_bfc3cda192_z.jpg',
 'id': 574702}

In [19]:
people_annotations[sampled_people_images[0]]

[{'segmentation': [[251.11,
    266.12,
    243.68,
    266.65,
    234.14,
    286.8,
    232.55,
    288.39,
    228.3,
    294.76,
    223.53,
    302.71,
    221.94,
    306.42,
    222.47,
    313.32,
    226.71,
    319.15,
    238.38,
    321.8,
    249.52,
    319.68,
    258,
    319.15,
    258.53,
    319.15,
    267.02,
    314.91,
    277.09,
    312.79,
    280.8,
    309.6,
    281.86,
    305.89,
    282.93,
    297.94,
    281.33,
    293.7,
    277.09,
    288.39,
    273.38,
    285.74,
    271.26,
    278.85,
    267.02,
    269.3,
    261.71,
    265.06,
    253.76,
    264.53,
    251.11,
    265.06,
    251.11,
    265.06],
   [287.17,
    323.39,
    294.59,
    324.98,
    294.59,
    328.17,
    292.47,
    334,
    288.23,
    339.3,
    287.7,
    350.97,
    288.23,
    353.62,
    293.53,
    358.39,
    298.83,
    363.7,
    300.43,
    367.94,
    300.43,
    376.42,
    298.83,
    378.54,
    291.94,
    370.06,
    284.52,
    363.7,
    281.86,
    

In [20]:
payload = {}

for id in sampled_people_images:
    image = people_annotations[id]
    for person in image:
        x, y, _, _ = person['bbox']
        x, y = int(x), int(y)
        image_id = person['image_id']
        people_id = person['id']
        keypoints = person['keypoints']
        for i in range(0, len(keypoints), 3):
            key_x, key_y, key_v = keypoints[i:i+3]
            if key_v == 0:
                continue
            keypoints[i] = key_x - x
            keypoints[i+1] = key_y - y
        person['keypoints'] = keypoints
        payload[f"{image_id}_{people_id}"] = person

payload["__metadata__"] = data_pose['categories']
len(payload)

274

In [21]:
payload["__metadata__"]

[{'supercategory': 'person',
  'id': 1,
  'name': 'person',
  'keypoints': ['nose',
   'left_eye',
   'right_eye',
   'left_ear',
   'right_ear',
   'left_shoulder',
   'right_shoulder',
   'left_elbow',
   'right_elbow',
   'left_wrist',
   'right_wrist',
   'left_hip',
   'right_hip',
   'left_knee',
   'right_knee',
   'left_ankle',
   'right_ankle'],
  'skeleton': [[16, 14],
   [14, 12],
   [17, 15],
   [15, 13],
   [12, 13],
   [6, 12],
   [7, 13],
   [6, 7],
   [6, 8],
   [7, 9],
   [8, 10],
   [9, 11],
   [2, 3],
   [1, 2],
   [1, 3],
   [2, 4],
   [3, 5],
   [4, 6],
   [5, 7]]}]

In [22]:
payload[list(payload.keys())[0]]

{'segmentation': [[251.11,
   266.12,
   243.68,
   266.65,
   234.14,
   286.8,
   232.55,
   288.39,
   228.3,
   294.76,
   223.53,
   302.71,
   221.94,
   306.42,
   222.47,
   313.32,
   226.71,
   319.15,
   238.38,
   321.8,
   249.52,
   319.68,
   258,
   319.15,
   258.53,
   319.15,
   267.02,
   314.91,
   277.09,
   312.79,
   280.8,
   309.6,
   281.86,
   305.89,
   282.93,
   297.94,
   281.33,
   293.7,
   277.09,
   288.39,
   273.38,
   285.74,
   271.26,
   278.85,
   267.02,
   269.3,
   261.71,
   265.06,
   253.76,
   264.53,
   251.11,
   265.06,
   251.11,
   265.06],
  [287.17,
   323.39,
   294.59,
   324.98,
   294.59,
   328.17,
   292.47,
   334,
   288.23,
   339.3,
   287.7,
   350.97,
   288.23,
   353.62,
   293.53,
   358.39,
   298.83,
   363.7,
   300.43,
   367.94,
   300.43,
   376.42,
   298.83,
   378.54,
   291.94,
   370.06,
   284.52,
   363.7,
   281.86,
   359.45,
   280.8,
   348.85,
   280.8,
   345.13,
   281.86,
   336.65,
   277.62,
 

In [23]:
os.makedirs('sampled_images', exist_ok=True)

def save_json(data, path):
    with open(path, 'w') as f:
        json.dump(data, f)

In [24]:
for image_id, image_info in samples_images_info.items():
    image_path = os.path.join('val2017', image_info['file_name'])
    image = cv2.imread(image_path)
    for i, detec in enumerate(people_annotations[image_id]):
        x, y, w, h = detec['bbox']
        x, y, w, h = int(x), int(y), int(w) + 1, int(h) + 1
        image = image[y:y+h, x:x+w]
        if image.shape[0] > 0 and image.shape[1] > 0:
            cv2.imwrite(f"sampled_images/{detec['image_id']}_{detec['id']}.jpg", image)

save_json(payload, 'samples_info.json')

# Clean up

In [25]:
os.remove("val2017.zip")
os.remove("annotations_trainval2017.zip")
shutil.rmtree("annotations")
shutil.rmtree("val2017")

In [27]:
os.makedirs('sampled', exist_ok=True)
shutil.move('sampled_images', 'sampled/sampled_images')
shutil.move('samples_info.json', 'sampled/samples_info.json')

'sampled/samples_info.json'