### Merge obj det and determiner label files

In [13]:
import os 
import cv2
import pandas as pd 
import math
from collections import defaultdict
import inflection as inf
import json

#######################################################################################
############################# CHANGE FILENAMES HERE ###################################
#######################################################################################

ann_dir = "../../annotations/real/smallv1"
objdet_filename = "iccv-real-small-objdet.json"
det_filename = "iccv-real-small-detlabels.json"
orig_annotations_filepath = "../../annotations/annotations_val.json"

objdet_json = json.load(open(os.path.join(ann_dir, objdet_filename)))
det_json = json.load(open(os.path.join(ann_dir, det_filename)))
orig_json = json.load(open(orig_annotations_filepath))

categories = orig_json["categories"]

orig_cat_map = {}

for cat in categories:
    orig_cat_map[cat["name"]] = cat["id"]

images = []
objdet_images = objdet_json["images"]
det_images = det_json["images"]
det_categories = det_json["categories"]
oracle_annotations = objdet_json["annotations"]

cat_map = {}
id_map = {}
counts = defaultdict(int)

for i, cat in enumerate(det_categories):
    cat_map[cat["id"]] = cat["name"]

for i, img in enumerate(det_images): 
    img = img.copy() 
    image_id = img["id"]
    img["file_name"] = "images/" + img["file_name"].split("-")[0] + ".jpg"
    id_map[image_id] = i 
    images.append(img)

annotations = []
for ann in det_json["annotations"]:
    i = id_map[ann["image_id"]]
    caption = cat_map[ann["category_id"]]
    if "juice" in caption:
        caption = caption.split()[0] + " papaya juice"
    images[i]["caption"] = caption
    name = inf.singularize(" ".join(caption.split()[1:]))
    ann["category_id"] = orig_cat_map[name]
    counts[ann["category_id"]]  += 1
    annotations.append(ann)

new_images = []
for i, image in enumerate(images): 
    if image.get("caption"):
        new_images.append(image)

oracle_anns_map = defaultdict(list)
oracle_file_name_map = defaultdict(int)
oracle_id_map = defaultdict(int)

for i, img in enumerate(objdet_images):
    oracle_file_name_map[img["file_name"]] = img["id"]
    oracle_id_map[img["id"]] = i

for ann in oracle_annotations:
    oracle_anns_map[ann["image_id"]].append(ann)

for img in objdet_images
    file_name = img["file_name"].split("/")[-1]
    img["id"] = oracle_file_name_map[file_name]
    img["file_name"] = "images/" + file_name

oracle_anns_count = defaultdict(int)
new_input_oracle_annotations = []
for img in new_images:
    file_name = img["file_name"].split("/")[-1]
    oracle_anns_count[oracle_file_name_map[file_name]] += 1
    for ann in oracle_anns_map[oracle_file_name_map[file_name]]:
        ann["image_id"] = img["id"]
        ann["id"] = len(new_input_oracle_annotations)
        new_input_oracle_annotations.append(ann)

print(oracle_anns_count)

new_input_oracle_annotations = []

for ann in oracle_annotations: 
    caption = cat_map[ann["category_id"]]
    if "juice" in caption:
        caption = "papaya juice"
    name = inf.singularize(caption)
    ann["category_id"] = orig_cat_map[name]
    
new_annotations = {
    "images": new_images,
    "annotations": annotations, 
    "input_oracle_annotations": oracle_annotations,
    "categories": categories
}

json.dump(new_annotations, open(os.path.join(ann_dir, "merged.json"), "w"))

defaultdict(<class 'int'>, {2: 10, 31: 5, 12: 4, 4: 2, 17: 18, 23: 6, 13: 5, 3: 5, 5: 2, 15: 5})


In [14]:
oracle_anns_count

defaultdict(int,
            {2: 10,
             31: 5,
             12: 4,
             4: 2,
             17: 18,
             23: 6,
             13: 5,
             3: 5,
             5: 2,
             15: 5})