In [2]:
import os
import json
import random
import numpy as np

random.seed(10)

def convert(o):
    if isinstance(o, np.generic): return o.item()  
    raise TypeError

In [3]:
# Global Files

ROOT_DIR = './data'

COCO_FILE = 'coco.json'
PANOPTIC_FILE = 'panoptic.json'

AAC_PANOPTIC_FILE = 'aac_panoptic.json'

VAL_COCO_FILE = 'val_coco.json'
VAL_PANOPTIC_FILE = 'val_panoptic.json'

AAC_VAL_PANOPTIC_FILE = 'aac_val_panoptic.json'

In [None]:
# Read Files

with open(os.path.join(ROOT_DIR, COCO_FILE), "r") as coco_file:
    coco_data = json.load(coco_file)
    
with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
    panoptic_data = json.load(panoptic_file)

In [None]:
total_images = len(coco_data['images'])

val_images = random.sample(list(range(1, total_images+1)), 500)

total_images, len(val_images), total_images/len(val_images)

In [None]:
str(val_images)

# [9362, 534, 7027, 7907, 9472, 244, 3377, 7579, 8050, 4547, 2626, 564, 8531, 8030, 5371, 1247, 4096, 5918, 731, 6888, 2273, 9883, 5818, 6253, 6902, 4647, 4298, 7485, 2862, 4967, 5941, 2179, 7487, 3923, 7204, 10049, 6148, 726, 9551, 67, 3861, 2196, 3195, 4963, 8788, 5999, 3936, 5151, 8995, 7384, 7144, 7701, 1065, 9578, 5320, 8224, 2563, 3675, 6765, 3906, 605, 521, 8137, 4935, 9940, 1181, 8741, 1327, 2453, 6299, 9294, 6131, 9852, 1851, 1583, 7235, 2726, 3129, 5713, 7099, 6785, 7301, 4022, 4501, 2352, 10114, 8551, 2924, 1958, 4375, 7467, 4958, 2703, 2856, 2886, 7867, 5695, 5375, 7123, 3665, 83, 8927, 716, 5434, 5234, 3976, 1310, 4289, 7341, 6633, 9550, 2593, 6400, 8123, 3956, 8608, 4452, 8522, 9865, 8191, 1029, 2725, 8019, 7561, 6573, 2225, 6889, 8869, 9584, 5722, 8826, 6355, 8027, 2717, 9155, 7232, 1617, 6803, 579, 47, 7186, 8773, 1064, 829, 5729, 1520, 2491, 1782, 9966, 7440, 7948, 2283, 7515, 7152, 8211, 7326, 5624, 4246, 7508, 6936, 5381, 8515, 8576, 2518, 3889, 5294, 10144, 540, 3205, 9643, 9427, 7848, 409, 1907, 4855, 7133, 4040, 4741, 1459, 758, 9786, 9022, 1731, 4678, 2665, 7821, 2315, 5915, 9751, 6565, 3146, 5733, 2991, 1307, 9691, 3190, 6992, 3404, 1573, 5055, 7599, 8560, 6939, 7334, 9102, 3417, 4357, 9790, 5970, 1900, 127, 3434, 6945, 9991, 6693, 6257, 3273, 1150, 906, 5702, 4173, 5902, 7486, 6950, 3173, 4629, 7647, 4817, 8299, 8183, 8316, 4320, 6629, 7236, 5596, 5718, 4540, 7320, 7816, 9621, 7863, 1376, 8845, 6014, 6640, 2761, 9879, 6812, 9245, 1073, 9758, 5017, 268, 9696, 5369, 5278, 1211, 6497, 10000, 6128, 4626, 9925, 3054, 3947, 2762, 5042, 1564, 9112, 874, 3603, 9340, 6909, 3142, 9332, 9370, 7198, 9723, 263, 6431, 3299, 5560, 9409, 3126, 8566, 9242, 5353, 781, 9475, 822, 599, 9260, 5996, 6505, 5318, 9850, 2809, 2585, 1924, 9930, 5660, 2692, 511, 8794, 8375, 5724, 1446, 6512, 7197, 9469, 5697, 978, 2385, 7287, 7714, 9992, 7400, 7241, 9899, 2869, 5797, 6940, 5626, 8402, 7511, 6361, 3185, 1590, 7093, 2500, 678, 3072, 3116, 7025, 9161, 6163, 818, 9919, 850, 2712, 5095, 3659, 812, 9503, 2896, 1633, 9250, 5906, 3041, 4959, 701, 6485, 9799, 1274, 5659, 3194, 8056, 2668, 2759, 6646, 7672, 5379, 612, 2837, 2887, 9526, 6892, 2509, 8411, 6422, 1545, 6498, 1940, 3852, 1751, 6925, 5093, 4751, 8743, 1624, 5663, 8550, 7786, 9959, 4007, 8725, 4762, 201, 2590, 6331, 3751, 1588, 8361, 7772, 1634, 9453, 6540, 9549, 3570, 4708, 5824, 1143, 3089, 9145, 8704, 428, 1227, 6557, 513, 284, 7055, 5546, 2920, 777, 5873, 5316, 7801, 2804, 309, 3369, 2086, 4739, 3303, 4046, 4037, 6330, 10072, 8930, 6861, 9030, 510, 2054, 90, 2019, 454, 902, 2915, 2043, 6313, 1105, 240, 7111, 2996, 6910, 784, 9039, 2778, 865, 6166, 9473, 7522, 7460, 5502, 3044, 566, 6535, 7534, 8009, 5974, 1156, 2279, 4347, 4149, 2885, 1494, 7549, 218, 2517, 4556, 1288, 8600, 2307, 1713, 8800, 1973, 4715, 10131, 5483, 58, 9434, 4255, 8479, 7007, 8352, 2392, 973, 928, 9975, 989, 3457, 8775, 3505, 2403, 5011, 6864, 2473, 442, 6130, 8458, 1894, 7829]

In [None]:
coco_data.keys(), panoptic_data.keys()

In [None]:
panoptic_data['annotations'][0].keys(), coco_data['annotations'][0].keys(), 

In [None]:
coco_data_val = {
    'licenses': coco_data["licenses"], 
    'info': coco_data["info"], 
    'categories': coco_data["categories"],
    'images': [],
    'annotations': []
}

panoptic_data_val = {
    'licenses': panoptic_data["licenses"], 
    'info': panoptic_data["info"], 
    'categories': panoptic_data["categories"],
    'images': [],
    'annotations': []
}

In [None]:
for im in coco_data['images']:
    if im['id'] in val_images:
        coco_data_val['images'].append(im)
        panoptic_data_val['images'].append(im)

In [None]:
for ann in coco_data['annotations']:
    if ann['image_id'] in val_images:
        coco_data_val['annotations'].append(ann)
        
for ann in panoptic_data['annotations']:
    if ann['image_id'] in val_images:
        panoptic_data_val['annotations'].append(ann)

In [None]:
# # Write Files

# with open(os.path.join(ROOT_DIR, VAL_COCO_FILE), "w") as coco_file_val:
#     json.dump(coco_data_val, coco_file_val)
    
# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "w") as panoptic_file_val:
#     json.dump(panoptic_data_val, panoptic_file_val, default=convert)

## Prepare Dataset for AAC Blocks Class

In [None]:
# ## Prepare only one class DS

# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
#     panoptic_data = json.load(panoptic_file)
    
# annotations = []
# images = []

# # Remove those images where we dont have any segmentations
# for ann, img in zip(panoptic_data["annotations"], panoptic_data["images"]):
#     if "aac_blocks" in ann["file_name"] and len(ann["segments_info"]) > 0:
#         annotations.append(ann)
#         images.append(img)

# panoptic_data["images"] = images
# panoptic_data["annotations"] = annotations

# with open(os.path.join(ROOT_DIR, AAC_PANOPTIC_FILE), "w") as aac_panoptic_file:
#     json.dump(panoptic_data, aac_panoptic_file, default=convert)

In [None]:
# ## Prepare only one class DS

# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "r") as val_panoptic_file:
#     val_panoptic_data = json.load(val_panoptic_file)

# annotations = []
# images = []

# # Remove those images where we dont have any segmentations
# for ann, img in zip(val_panoptic_data["annotations"], val_panoptic_data["images"]):
#     if "aac_blocks" in ann["file_name"] and len(ann["segments_info"]) > 0:
#         annotations.append(ann)
#         images.append(img)

# val_panoptic_data["images"] = images
# val_panoptic_data["annotations"] = annotations

# with open(os.path.join(ROOT_DIR, AAC_VAL_PANOPTIC_FILE), "w") as aac_val_panoptic_file:
#     json.dump(val_panoptic_data, aac_val_panoptic_file, default=convert)

## Rename iscroud to iscrowd

In [4]:
# # Rename iscroud to iscrowd

# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "r") as val_panoptic_file:
#     val_panoptic_data = json.load(val_panoptic_file)
    
# for ann in val_panoptic_data['annotations']:
#     for segm in ann["segments_info"]:
#         segm['iscrowd'] = segm['iscroud']
        
# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "w") as val_panoptic_file:
#     json.dump(val_panoptic_data, val_panoptic_file, default=convert)

In [6]:
# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
#     panoptic_data = json.load(panoptic_file)
    
# for ann in panoptic_data['annotations']:
#     for segm in ann["segments_info"]:
#         segm['iscrowd'] = segm['iscroud']
        
# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "w") as panoptic_file:
#     json.dump(panoptic_data, panoptic_file, default=convert)

## Remove annotations and images where segment_info is `[]` empty

In [9]:
# # Remove Empty Segmentation From Dataset

# filtered_ann = []
# filtered_im = []

# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "r") as val_panoptic_file:
#     val_panoptic_data = json.load(val_panoptic_file)
    
# for ann, im in zip(val_panoptic_data['annotations'], val_panoptic_data['images']):
#     if len(ann["segments_info"]):
#         filtered_ann.append(ann)
#         filtered_im.append(im)
#     else:
#         print(im)
        
# val_panoptic_data['annotations'], val_panoptic_data['images'] = filtered_ann, filtered_im
        
# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "w") as val_panoptic_file:
#     json.dump(val_panoptic_data, val_panoptic_file, default=convert)

In [12]:
# # Remove Empty Segmentation From Dataset

# filtered_ann = []
# filtered_im = []

# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
#     panoptic_data = json.load(panoptic_file)
    
# for ann, im in zip(panoptic_data['annotations'], panoptic_data['images']):
#     if len(ann["segments_info"]):
#         filtered_ann.append(ann)
#         filtered_im.append(im)
#     else:
#         print(im)
        
# panoptic_data['annotations'], panoptic_data['images'] = filtered_ann, filtered_im
        
# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "w") as panoptic_file:
#     json.dump(panoptic_data, panoptic_file, default=convert)

## Update Masks with correct colour codes

In [None]:
# import os
# import json
# from panopticapi.utils import rgb2id, IdGenerator, id2rgb
# from categories_meta import NEW_CATEGORIES
# import numpy as np

# def convert(o):
#     if isinstance(o, np.generic): return o.item()  
#     raise TypeError

# id2catdetail = {category['id']: category for category in NEW_CATEGORIES}

# id_generator = IdGenerator(id2catdetail)

# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "r") as val_panoptic_file:
#     val_panoptic_data = json.load(val_panoptic_file)
    
# annotations = []
# images = []

# for ann, img in zip(val_panoptic_data['annotations'], val_panoptic_data['images']):
#     # Check if both image names are same
#     assert ann['file_name'].split('.')[0] == img['file_name'].split('.')[0]
    
#     # Read original Mask Image
#     panoptic_seg = np.array(Image.open(f"../data/masks_orignal/{ann['file_name']}"), dtype=np.uint8).copy()
    
#     # Convert Mask RGB image to id array
#     panoptic_seg_id = rgb2id(panoptic_seg)
    
#     # Create a new empty id array
#     panoptic_seg_id_corrected = np.zeros(panoptic_seg_id.shape)

#     # All unique segments in original id image
#     unique_panoptic_seg_id = np.unique(panoptic_seg_id)
    
#     # All segments for which masks are available
#     segms_corrected = []

#     # Finally we get id for each mask individually
#     for segm in ann["segments_info"]:
#         # Original Segment id
#         segment_id = segm["segment_id"]
        
#         if segment_id in unique_panoptic_seg_id:
#             # Get new Segment Id
#             proper_id = id_generator.get_id(segm['category_id'])
#             # Populate these new segments id
#             panoptic_seg_id_corrected[panoptic_seg_id==segm["segment_id"]] = proper_id
#             # Update segm annotation json
#             segm['id'] = proper_id
#             # if segm id is less than max value for original mask print it
#             if proper_id < panoptic_seg_id.max()+1:
#                 print(proper_id)
                
#             # Append this Segment
#             segms_corrected.append(segm)
    
#     # Update original segments_info
#     ann["segments_info"] = segms_corrected
    
#     # Only consider images where segment info is correctly available
#     if len(segms_corrected):
#         annotations.append(ann)
#         images.append(img)
    
#     # Save Image
#     Image.fromarray(id2rgb(panoptic_seg_id_corrected), 'RGB').save(f"../data/val_panoptic/{ann['file_name']}")

# val_panoptic_data['annotations'], val_panoptic_data['images'] = annotations, images

# with open(os.path.join(ROOT_DIR, VAL_PANOPTIC_FILE), "w") as val_panoptic_file:
#     json.dump(val_panoptic_data, val_panoptic_file, default=convert)

In [None]:
# import os
# import json
# from panopticapi.utils import rgb2id, IdGenerator, id2rgb
# from categories_meta import NEW_CATEGORIES
# import numpy as np

# def convert(o):
#     if isinstance(o, np.generic): return o.item()  
#     raise TypeError

# id2catdetail = {category['id']: category for category in NEW_CATEGORIES}

# id_generator = IdGenerator(id2catdetail)

# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "r") as panoptic_file:
#     panoptic_data = json.load(panoptic_file)
    
# annotations = []
# images = []

# for ann, img in zip(panoptic_data['annotations'], panoptic_data['images']):
#     # Check if both image names are same
#     assert ann['file_name'].split('.')[0] == img['file_name'].split('.')[0]
    
#     # Read original Mask Image
#     panoptic_seg = np.array(Image.open(f"../data/masks_orignal/{ann['file_name']}"), dtype=np.uint8).copy()
    
#     # Convert Mask RGB image to id array
#     panoptic_seg_id = rgb2id(panoptic_seg)
    
#     # Create a new empty id array
#     panoptic_seg_id_corrected = np.zeros(panoptic_seg_id.shape)

#     # All unique segments in original id image
#     unique_panoptic_seg_id = np.unique(panoptic_seg_id)
    
#     # All segments for which masks are available
#     segms_corrected = []

#     # Finally we get id for each mask individually
#     for segm in ann["segments_info"]:
#         # Original Segment id
#         segment_id = segm["segment_id"]
        
#         if segment_id in unique_panoptic_seg_id:
#             # Get new Segment Id
#             proper_id = id_generator.get_id(segm['category_id'])
#             # Populate these new segments id
#             panoptic_seg_id_corrected[panoptic_seg_id==segm["segment_id"]] = proper_id
#             # Update segm annotation json
#             segm['id'] = proper_id
#             # if segm id is less than max value for original mask print it
#             if proper_id < panoptic_seg_id.max()+1:
#                 print(proper_id)
                
#             # Append this Segment
#             segms_corrected.append(segm)
    
#     # Update original segments_info
#     ann["segments_info"] = segms_corrected
    
#     # Only consider images where segment info is correctly available
#     if len(segms_corrected):
#         annotations.append(ann)
#         images.append(img)
    
#     # Save Image
#     Image.fromarray(id2rgb(panoptic_seg_id_corrected), 'RGB').save(f"../data/panoptic/{ann['file_name']}")

# panoptic_data['annotations'], panoptic_data['images'] = annotations, images

# with open(os.path.join(ROOT_DIR, PANOPTIC_FILE), "w") as panoptic_file:
#     json.dump(panoptic_data, panoptic_file, default=convert)