In [1]:
# Imports
from pathlib import Path
import glob
import json
import os.path

from gat_vqa.config.gqa import GQASplit, GQAVersion

In [2]:
# Files Loading
def load_and_apply_to_file(name, apply):
    """Load a file."""
    print(name)
    # load standard json file
    if os.path.isfile(name):
        with open(name) as file:
            apply(json.load(file))
    # load file chunks if too big
    elif os.path.isdir(os.path.dirname(name)):
        dir_, ext = os.path.splitext(os.path.basename(name))
        chunks = glob.glob(
            os.path.join(
                os.path.dirname(name), "{dir}/{dir}_*{ext}".format(dir=dir_, ext=ext)
            )
        )
        print(chunks)
        for chunk in chunks:
            with open(chunk) as file:
                apply(json.load(file))
    else:
        raise Exception("Can't find {}".format(name))

class ImageIDExtractor:

    def __init__(self):
        self.ids = []

    def __call__(self, questions):
        new_ids = list(set([q_data['imageId'] for q_data in questions.values()]))
        self.ids += new_ids
        self.ids = list(set(self.ids))

In [3]:
# Extract image ids
tiers = [split.value for split in GQASplit]

for tier in tiers:
    question_path = f"data/gqa/questions/{tier}_all_questions.json"
    id_extractor = ImageIDExtractor()
    load_and_apply_to_file(question_path, id_extractor)
    with open(f"temp/{tier}_ids.json", "w") as f:
        json.dump(id_extractor.ids, f)

data/gqa/questions/train_all_questions.json
['data/gqa/questions/train_all_questions/train_all_questions_0.json', 'data/gqa/questions/train_all_questions/train_all_questions_1.json', 'data/gqa/questions/train_all_questions/train_all_questions_2.json', 'data/gqa/questions/train_all_questions/train_all_questions_3.json', 'data/gqa/questions/train_all_questions/train_all_questions_4.json', 'data/gqa/questions/train_all_questions/train_all_questions_5.json', 'data/gqa/questions/train_all_questions/train_all_questions_6.json', 'data/gqa/questions/train_all_questions/train_all_questions_7.json', 'data/gqa/questions/train_all_questions/train_all_questions_8.json', 'data/gqa/questions/train_all_questions/train_all_questions_9.json']
data/gqa/questions/val_all_questions.json
data/gqa/questions/testdev_all_questions.json
data/gqa/questions/test_all_questions.json
data/gqa/questions/challenge_all_questions.json


In [4]:
# Verify length of ids
tiers = [split.value for split in GQASplit]
for tier in tiers:
    with open(f"temp/{tier}_ids.json", "r") as f:
        print(f"{tier}: {len(json.load(f))}")

train: 74256
val: 10564
testdev: 398
test: 2993
challenge: 1590


In [5]:
# Get GQA vocab
gqa_sg_root = Path("./artifacts/gqa-preprocessed:v11")
with open(gqa_sg_root / "preprocessors.json", "r") as f:
    data = json.load(f)
    gt_sg_attr_vocab = data["scene_graphs"]["attr_to_index"]
    gt_sg_obj_vocab = data["scene_graphs"]["object_to_index"]
    gt_sg_rel_vocab = data["scene_graphs"]["rel_to_index"]

In [6]:
# Load image ids
tiers = [split.value for split in GQASplit]
sgs = {tier: {} for tier in tiers}
id_to_tier = {}
for tier in tiers:
    with open(f"temp/{tier}_ids.json", "r") as f:
        tier_ids = json.load(f)
        id_to_tier.update({i: tier for i in tier_ids})

skipped = 0

obj_vocab = None
rel_vocab = None

oov_obj_idxs = None
oov_rel_idxs = None
oov_offset = len(gt_sg_obj_vocab) + len(gt_sg_attr_vocab) + len(gt_sg_rel_vocab)
print(f"{oov_offset=}")

inference_root = Path("./data/gqa/inference")
for child in inference_root.iterdir():
    if child.is_dir():
        print(child)
        with open(child / "custom_data_info.json", "r") as f:
            meta = json.load(f)
        idx_to_img_id = [Path(p).stem for p in meta["idx_to_files"]]
        if obj_vocab is None and rel_vocab is None:
            
            obj_vocab = meta["ind_to_classes"]
            rel_vocab = meta["ind_to_predicates"]
            oov_obj_idxs = [obj for obj in obj_vocab if obj not in gt_sg_obj_vocab.keys() and obj != "__background__"]
            oov_rel_idxs = [rel for rel in rel_vocab if rel not in gt_sg_rel_vocab.keys() and rel != "__background__"]
            oov_obj_idxs = {v: i + oov_offset for i, v in enumerate(oov_obj_idxs)}
            oov_rel_idxs = {v: i + oov_offset + len(oov_obj_idxs) for i, v in enumerate(oov_rel_idxs)}
            print(oov_obj_idxs)
            print(oov_rel_idxs)

        # Assert vocab doesnt change between children directories
        assert obj_vocab == meta["ind_to_classes"]
        assert rel_vocab == meta["ind_to_predicates"]

        for i, img_id in enumerate(idx_to_img_id):
            with open(child / "inference" / f"custom_prediction_{i}.json", "r") as f:
                sg = json.load(f)[str(i)]
                # Ensure there are no __background__ tags (verified and commented out for efficiency)
                # assert 0 not in sg["bbox_labels"] and 0 not in sg["rel_labels"]

                # Keys: bbox, bbox_labels, bbox_scores, rel_pairs, rel_labels, rel_scores
                tier = id_to_tier.get(img_id)
                if tier is None:
                    # print(f"skipping image: {img_id}")
                    skipped += 1
                    continue
                if img_id in sgs[tier].keys():
                    print(f"Duplicate image id: {img_id}")
                sgs[tier][img_id] = {
                    "imageId": img_id,
                    "boxes": sg["bbox"],
                    "box_scores": sg["bbox_scores"],
                    "labels": [obj_vocab[i] for i in sg["bbox_labels"]],
                    "attributes": [],
                    "relations": [rel_vocab[i] for i in sg["rel_labels"]],
                    "relation_scores": sg["rel_scores"],
                    "coos": [list(tup) for tup in zip(*sg["rel_pairs"])]
                }
                # Determine OOV objects and relations
                sgs[tier][img_id].update({
                    "indexed_labels": [gt_sg_obj_vocab[lbl] if lbl in gt_sg_obj_vocab.keys() else oov_obj_idxs[lbl] for lbl in sgs[tier][img_id]["labels"]],
                    "indexed_attributes": [],
                    "indexed_relations": [gt_sg_rel_vocab[lbl] if lbl in gt_sg_rel_vocab.keys() else oov_rel_idxs[lbl] for lbl in sgs[tier][img_id]["relations"]]
                })

with open(gqa_sg_root / "preprocessors.json", "r") as f:
    preprocessor_data = json.load(f)

preprocessor_data["scene_graphs"]["oov_attr_to_index"] = {}
preprocessor_data["scene_graphs"]["oov_object_to_index"] = oov_obj_idxs
preprocessor_data["scene_graphs"]["oov_rel_to_index"] = oov_rel_idxs

print(f"Saving preprocessors:")
with open(f"temp/preprocessors.json", "w") as f:
    json.dump(preprocessor_data, f)

for tier in tiers:
    print(f"Saving tier: {tier}")
    with open(f"temp/{tier}_sceneGraphs.json", "w") as f:
        json.dump(sgs[tier], f)

print(f"{skipped=}")

oov_offset=2630
data/gqa/inference/0
{'board': 2630, 'engine': 2631, 'handle': 2632, 'jean': 2633, 'kid': 2634, 'light': 2635, 'pant': 2636, 'plane': 2637, 'railing': 2638, 'short': 2639, 'sneaker': 2640, 'stand': 2641, 'tile': 2642, 'track': 2643, 'wave': 2644}
{'across': 2645, 'against': 2646, 'along': 2647, 'and': 2648, 'belonging to': 2649, 'for': 2650, 'from': 2651, 'has': 2652, 'laying on': 2653, 'made of': 2654, 'on back of': 2655, 'over': 2656, 'part of': 2657, 'says': 2658, 'to': 2659, 'wears': 2660}
data/gqa/inference/1
data/gqa/inference/10
data/gqa/inference/11
data/gqa/inference/12
data/gqa/inference/13
data/gqa/inference/14
data/gqa/inference/2
data/gqa/inference/3
data/gqa/inference/4
data/gqa/inference/5
data/gqa/inference/6
data/gqa/inference/7
data/gqa/inference/8
data/gqa/inference/9
Saving preprocessors:
Saving tier: train
Saving tier: val
Saving tier: testdev
Saving tier: test
Saving tier: challenge
skipped=59053


In [7]:
# Check vocab overlap with GQA

print([f"{obj_vocab.index(val)}: {val}" for val in oov_obj_idxs])
print([f"{rel_vocab.index(val)}: {val}" for val in oov_rel_idxs])


# obj_class_remap = {obj: obj for obj in all_obj_vocab if obj in gt_sg_obj_vocab.keys()}
# rel_class_remap = {rel: rel for rel in all_rel_vocab if rel not in gt_sg_rel_vocab.keys()}

# obj_class_remap.update({
#     "stand": "nightstand",  # Not ideal, as there are "fruit stands", etc.
#     "kid": "child",
#     "plane": "airplane",
#     "light": "lamp",
#     "short": "shorts",
#     "pant": "pants",
#     "sneaker": "sneakers",
#     "jean": "jeans",
#     "__background__": None
# })


# from torchtext.vocab import GloVe
# import torch

# embeddings = GloVe(dim=300, name="6B")
# oov_obj_embs = embeddings.get_vecs_by_tokens(oov_objs)
# gqa_gt_obj_embs = embeddings.get_vecs_by_tokens(list(gt_sg_obj_vocab.keys()))
# print(oov_obj_embs.size())
# print(gqa_gt_obj_embs.size())
# sim = torch.nn.CosineSimilarity()
# sim = sim(oov_obj_embs.unsqueeze(-1), gqa_gt_obj_embs.t().unsqueeze(0))
# vals, idxs = torch.max(sim, dim=1)
# idxs = idxs.tolist()
# gt_obj_idx_to_key = {v: k for k, v in gt_sg_obj_vocab.items()}
# print([gt_obj_idx_to_key[i] for i in idxs])


['13: board', '42: engine', '59: handle', '67: jean', '68: kid', '76: light', '87: pant', '95: plane', '103: railing', '113: short', '120: sneaker', '123: stand', '129: tile', '134: track', '143: wave']
['2: across', '3: against', '4: along', '5: and', '9: belonging to', '16: for', '17: from', '20: has', '24: laying on', '27: made of', '32: on back of', '33: over', '36: part of', '39: says', '42: to', '49: wears']
