### Merge obj det and determiner label files

In [4]:
import os 
import cv2
import pandas as pd 
import math
from collections import defaultdict
import inflection as inf
import json

#######################################################################################
############################# CHANGE FILENAMES HERE ###################################
#######################################################################################

ann_dir = "../../annotations/real/smallv1"
objdet_filename = "iccv-real-small-objdet.json"
det_filename = "iccv-real-small-detlabels.json"
orig_annotations_filepath = "../../annotations/annotations_val.json"

objdet_json = json.load(open(os.path.join(ann_dir, objdet_filename)))
det_json = json.load(open(os.path.join(ann_dir, det_filename)))
orig_json = json.load(open(orig_annotations_filepath))

categories = orig_json["categories"]
orig_cat_map = {}

for cat in categories:
    orig_cat_map[cat["name"]] = cat["id"]
print(categories)

images = []
objdet_images = objdet_json["images"]
det_images = det_json["images"]
det_categories = det_json["categories"]
oracle_annotations = objdet_json["annotations"]

cat_map = {}
id_map = {}

for i, cat in enumerate(det_categories):
    cat_map[cat["id"]] = cat["name"]

for i, img in enumerate(det_images): 
    img = img.copy() 
    image_id = img["id"]
    img["file_name"] = "images/" + img["file_name"].split("-")[0] + ".jpg"
    id_map[image_id] = i 
    images.append(img)

annotations = []
for ann in det_json["annotations"]:
    i = id_map[ann["image_id"]]
    caption = cat_map[ann["category_id"]]
    if "juice" in caption:
        caption = caption.split()[0] + " papaya juice"
    images[i]["caption"] = caption
    name = inf.singularize(" ".join(caption.split()[1:]))
    ann["category_id"] = orig_cat_map[name]
    annotations.append(ann)

new_images = []
for i, image in enumerate(images): 
    if image.get("caption"):
        new_images.append(image)

for ann in oracle_annotations: 
    caption = cat_map[ann["category_id"]]
    if "juice" in caption:
        caption = "papaya juice"
    name = inf.singularize(caption)
    ann["category_id"] = orig_cat_map[name]
    
new_annotations = {
    "images": new_images,
    "annotations": annotations, 
    "input_oracle_annotations": oracle_annotations,
    "categories": categories
}

json.dump(new_annotations, open(os.path.join(ann_dir, "merged.json"), "w"))

[{'id': 0, 'name': 'apple', 'supercategory': 'countable'}, {'id': 1, 'name': 'onion', 'supercategory': 'countable'}, {'id': 2, 'name': 'avocado', 'supercategory': 'countable'}, {'id': 3, 'name': 'orange', 'supercategory': 'countable'}, {'id': 4, 'name': 'egg', 'supercategory': 'countable'}, {'id': 5, 'name': 'carrot', 'supercategory': 'countable'}, {'id': 6, 'name': 'cucumber', 'supercategory': 'countable'}, {'id': 7, 'name': 'lemon', 'supercategory': 'countable'}, {'id': 8, 'name': 'garlic', 'supercategory': 'countable'}, {'id': 9, 'name': 'banana', 'supercategory': 'countable'}, {'id': 10, 'name': 'grape juice', 'supercategory': 'uncountable_liquid'}, {'id': 11, 'name': 'cranberry juice', 'supercategory': 'uncountable_liquid'}, {'id': 12, 'name': 'blueberry juice', 'supercategory': 'uncountable_liquid'}, {'id': 13, 'name': 'papaya juice', 'supercategory': 'uncountable_liquid'}, {'id': 14, 'name': 'vegetable juice', 'supercategory': 'uncountable_liquid'}, {'id': 15, 'name': 'tray', 's