In [1]:
import os
import json
import random
import numpy as np

random.seed(10)

def convert(o):
    if isinstance(o, np.generic): return o.item()  
    raise TypeError

In [2]:
# Global Files

ROOT_DIR = './data'

COCO_FILE = 'coco.json'
PANOPTIC_FILE = 'panoptic.json'

VAL_COCO_FILE = 'val_coco.json'
VAL_PANOPTIC_FILE = 'val_panoptic.json'

In [3]:
# Read Files

with open(os.path.join(ROOT_DIR, COCO_FILE), "r") as coco_file:
    coco_data = json.load(coco_file)
    
with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
    panoptic_data = json.load(panoptic_file)

In [4]:
total_images = len(coco_data['images'])

val_images = random.sample(list(range(1, total_images+1)), 500)

total_images, len(val_images), total_images/len(val_images)

(10146, 500, 20.292)

In [5]:
str(val_images)

'[9362, 534, 7027, 7907, 9472, 244, 3377, 7579, 8050, 4547, 2626, 564, 8531, 8030, 5371, 1247, 4096, 5918, 731, 6888, 2273, 9883, 5818, 6253, 6902, 4647, 4298, 7485, 2862, 4967, 5941, 2179, 7487, 3923, 7204, 10049, 6148, 726, 9551, 67, 3861, 2196, 3195, 4963, 8788, 5999, 3936, 5151, 8995, 7384, 7144, 7701, 1065, 9578, 5320, 8224, 2563, 3675, 6765, 3906, 605, 521, 8137, 4935, 9940, 1181, 8741, 1327, 2453, 6299, 9294, 6131, 9852, 1851, 1583, 7235, 2726, 3129, 5713, 7099, 6785, 7301, 4022, 4501, 2352, 10114, 8551, 2924, 1958, 4375, 7467, 4958, 2703, 2856, 2886, 7867, 5695, 5375, 7123, 3665, 83, 8927, 716, 5434, 5234, 3976, 1310, 4289, 7341, 6633, 9550, 2593, 6400, 8123, 3956, 8608, 4452, 8522, 9865, 8191, 1029, 2725, 8019, 7561, 6573, 2225, 6889, 8869, 9584, 5722, 8826, 6355, 8027, 2717, 9155, 7232, 1617, 6803, 579, 47, 7186, 8773, 1064, 829, 5729, 1520, 2491, 1782, 9966, 7440, 7948, 2283, 7515, 7152, 8211, 7326, 5624, 4246, 7508, 6936, 5381, 8515, 8576, 2518, 3889, 5294, 10144, 540, 3205

In [6]:
coco_data.keys(), panoptic_data.keys()

(dict_keys(['licenses', 'info', 'categories', 'annotations', 'images']),
 dict_keys(['licenses', 'info', 'categories', 'annotations', 'images']))

In [7]:
panoptic_data['annotations'][0].keys(), coco_data['annotations'][0].keys(), 

(dict_keys(['segments_info', 'file_name', 'image_id']),
 dict_keys(['id', 'image_id', 'category_id', 'segmentation', 'area', 'bbox', 'iscrowd', 'attributes']))

In [8]:
coco_data_val = {
    'licenses': coco_data["licenses"], 
    'info': coco_data["info"], 
    'categories': coco_data["categories"],
    'images': [],
    'annotations': []
}

panoptic_data_val = {
    'licenses': panoptic_data["licenses"], 
    'info': panoptic_data["info"], 
    'categories': panoptic_data["categories"],
    'images': [],
    'annotations': []
}

In [9]:
for im in coco_data['images']:
    if im['id'] in val_images:
        coco_data_val['images'].append(im)
        panoptic_data_val['images'].append(im)

In [10]:
for ann in coco_data['annotations']:
    if ann['image_id'] in val_images:
        coco_data_val['annotations'].append(ann)
        
for ann in panoptic_data['annotations']:
    if ann['image_id'] in val_images:
        panoptic_data_val['annotations'].append(ann)

In [11]:
# Write Files

with open(os.path.join(ROOT_DIR, VAL_COCO_FILE), "w") as coco_file_val:
    json.dump(coco_data_val, coco_file_val)
    
with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "w") as panoptic_file_val:
    json.dump(panoptic_data_val, panoptic_file_val, default=convert)