In [1]:
import json
import os
import networkx as nx
import spacy
from tqdm import tqdm

# Load COCO annotations
with open("E:/Download/annotations/instances_train2017.json") as f:
    instance_data = json.load(f)

with open("E:/Download/annotations/captions_train2017.json") as f:
    caption_data = json.load(f)

# Build dict for image_id -> caption
caption_dict = {}
for ann in caption_data["annotations"]:
    image_id = ann["image_id"]
    caption = ann["caption"]
    if image_id not in caption_dict:
        caption_dict[image_id] = []
    caption_dict[image_id].append(caption)

# Build dict for image_id -> list of objects
image_objects = {}
categories = {cat["id"]: cat["name"] for cat in instance_data["categories"]}
for ann in instance_data["annotations"]:
    image_id = ann["image_id"]
    cat_id = ann["category_id"]
    bbox = ann["bbox"]
    label = categories[cat_id]
    if image_id not in image_objects:
        image_objects[image_id] = []
    image_objects[image_id].append({"label": label, "bbox": bbox})

# Load spaCy NLP model
nlp = spacy.load("en_core_web_sm")

# Create full graph
G = nx.DiGraph()

print("Building KG from COCO...")
for image_id in tqdm(image_objects):
    objects = image_objects[image_id]
    labels = [obj["label"] for obj in objects]

    # Add object nodes
    for obj in objects:
        G.add_node(f"{image_id}_{obj['label']}", label=obj["label"], image_id=image_id, type="object")

    # Add spatial edges
    for i in range(len(objects)):
        for j in range(i + 1, len(objects)):
            obj1, obj2 = objects[i], objects[j]
            x1, y1 = obj1["bbox"][0], obj1["bbox"][1]
            x2, y2 = obj2["bbox"][0], obj2["bbox"][1]
            dist = ((x1 - x2)**2 + (y1 - y2)**2) ** 0.5
            if dist < 75:  # Distance threshold
                G.add_edge(f"{image_id}_{obj1['label']}", f"{image_id}_{obj2['label']}",
                           relation="near", source="bbox")

    # Add caption relations
    if image_id in caption_dict:
        for caption in caption_dict[image_id]:
            doc = nlp(caption)
            for token in doc:
                if token.dep_ == "ROOT" and token.pos_ == "VERB":
                    subj = next((w for w in token.lefts if w.dep_ in ["nsubj", "nsubjpass"]), None)
                    obj = next((w for w in token.rights if w.dep_ in ["dobj", "pobj"]), None)
                    if subj and obj:
                        subj_label = subj.text.lower()
                        obj_label = obj.text.lower()
                        node1 = f"{image_id}_{subj_label}"
                        node2 = f"{image_id}_{obj_label}"
                        G.add_node(node1, label=subj_label, image_id=image_id, type="caption")
                        G.add_node(node2, label=obj_label, image_id=image_id, type="caption")
                        G.add_edge(node1, node2, relation=token.lemma_, source="caption")

# Save graph
nx.write_graphml(G, "coco_kg.graphml")
print("KG saved to coco_kg.graphml")

Building KG from COCO...


100%|████████████████████████████████████████████████████████████████████████| 117266/117266 [1:07:36<00:00, 28.91it/s]


KG saved to coco_kg.graphml
