In [1]:
import json
import os
from pathlib import Path
import glob
from copy import deepcopy
import shutil
import random

In [30]:
json_paths = []
for i in glob.glob('./detr/datasets/Dataset/**', recursive=True):
    file_path = Path(i)
    if file_path.suffix == '.json':
        json_paths.append(file_path)


In [31]:
f = open(Path('./detr/datasets/Dataset/aac_blocks/annotations/aac_blocks.json'))
initial_json = json.load(f)
f.close()

COMBINED_JSON = deepcopy(initial_json)
IMAGE_ID = 1

IMAGES = []
ANNOTATIONS = []
for json_path in json_paths:
    f = open(json_path)
    json_data = json.load(f)
    f.close()
    json_data_copy = deepcopy(json_data)

    localid2globalid = {}
    for image_det in json_data_copy['images']:
        localid2globalid[image_det['id']] = IMAGE_ID
        image_det['id'] = IMAGE_ID
        IMAGE_ID += 1

        image_det['file_name'] = json_path.parent.parent.name + '_____' + image_det['file_name']

        IMAGES.append(image_det)

    for ann_det in json_data_copy['annotations']:
        ann_det['image_id'] = localid2globalid[ann_det['image_id']]
        ann_det['file_name'] = json_path.parent.parent.name + '___' + ann_det['file_name']

        ANNOTATIONS.append(ann_det)

    

COMBINED_JSON['images'] = IMAGES
COMBINED_JSON['annotations'] = ANNOTATIONS


In [32]:
out_file = open('./detr/datasets/combined.json', "w")
json.dump(COMBINED_JSON, out_file, indent = 4)
out_file.close()

## Segmentation JSON to BBox JSON

In [33]:
f = open(Path('./detr/datasets/combined.json'))
json_data = json.load(f)
f.close()

json_data_bbox = deepcopy(json_data)

annotations = []
ANN_ID = 1
for ann in json_data['annotations']:
    for seg in ann['segments_info']:
        obj_dic = deepcopy(seg)
        obj_dic['id'] = ANN_ID
        ANN_ID += 1
        obj_dic['image_id'] = ann['image_id']
        annotations.append(obj_dic)


json_data_bbox['annotations'] = annotations



out_file = open('./detr/datasets/combined_bbox.json', "w")
json.dump(json_data_bbox, out_file, indent = 4)
out_file.close()

## Creating Test and Train JSONs

In [34]:
from sklearn.model_selection import train_test_split

f = open('./detr/datasets/combined_bbox.json')
new_json_data = json.load(f)
f.close()

all_images = deepcopy(new_json_data['images'])

train_images, test_images = train_test_split(all_images, test_size=0.1, random_state=42)


actual_image2id = {'train': {}, 'test': {}}
def new_ids(list_data, test_train):
    global actual_image2id
    new_id = 1
    for data_point in list_data:
        actual_image2id[test_train][data_point['id']] = new_id
        data_point['id'] = new_id
        new_id += 1
    
    return list_data

train_data = new_ids(train_images, 'train')
test_data = new_ids(test_images, 'test')

In [35]:
len(train_data), len(test_data)

(9008, 1001)

In [36]:
train_json = deepcopy(new_json_data)
test_json = deepcopy(new_json_data)

train_json['images'] = train_data
test_json['images'] = test_data

train_annotations = []
test_annotations = []

for ann in deepcopy(new_json_data['annotations']):
    if ann['image_id'] in  actual_image2id['train'].keys():
        ann['image_id'] = actual_image2id['train'][ann['image_id']]
        train_annotations.append(ann)

    elif ann['image_id'] in  actual_image2id['test'].keys():
        ann['image_id'] = actual_image2id['test'][ann['image_id']]
        test_annotations.append(ann)
        
    else:
        print('SOMETHING IS WRONG', ann)

train_json['annotations'] = train_annotations
test_json['annotations'] = test_annotations

In [37]:
out_file = open('./detr/datasets/combined_train.json', "w")
json.dump(train_json, out_file, indent = 4)
out_file.close()

out_file = open('./detr/datasets/combined_test.json', "w")
json.dump(test_json, out_file, indent = 4)
out_file.close()

## Making all Images in One

In [38]:
f = open('./detr/datasets/combined_bbox.json')
final_json_data = json.load(f)
f.close()

In [39]:
copy_json = deepcopy(final_json_data)

for image_det in copy_json['images']:
    class_name, image_name = image_det['file_name'].split('_____')
    from_path = f'detr/datasets/Dataset/{class_name}/images/{image_name}'
    to_path = f"detr/datasets/images/{image_det['file_name']}"

    shutil.copy(from_path, to_path)

In [None]:
# Should use Panoptic JSON not BBox JSON
copy_json = deepcopy(final_json_data)

for image_det in copy_json['annotations']:
    class_name, image_name = image_det['file_name'].split('_____')
    from_path = f'detr/datasets/Dataset/{class_name}/annotations/{image_name}'
    to_path = f"detr/datasets/annotations/{image_det['file_name']}"

    shutil.copy(from_path, to_path)

In [5]:
f = open('./detr/datasets/combined_test.json')
final_json_data = json.load(f)
f.close()

copy_json = deepcopy(final_json_data)

total_len = len(copy_json['images'])

random_list = random.sample(copy_json['images'], 500)
random_list = random.sample(random_list, 300)

for image_det in random_list:
    class_name, image_name = image_det['file_name'].split('_____')
    from_path = f'detr/datasets/Dataset/{class_name}/images/{image_name}'
    to_path = f"detr/datasets/sample_test_images/{image_det['file_name']}"

    shutil.copy(from_path, to_path)