In [1]:
import json
from collections import defaultdict
from copy import deepcopy

In [2]:
def merge_annotations(annotation_lists):
    grouped = defaultdict(list)

    for annotation in annotation_lists:
        key = json.dumps({
            "title": annotation["title"],
            "location": annotation["location"]
        }, ensure_ascii=False, sort_keys=True)
        grouped[key].append(annotation)

    merged = []
    for group in grouped.values():
        base = deepcopy(group[0])

        # Remove top-level annotation fields, including "id"
        for field in ["id", "sentiment", "annotator", "annotation_id", "created_at", "updated_at", "lead_time"]:
            base.pop(field, None)

        base["labels"] = []
        for entry in group:
            label = {
                "id": entry.get("id"),
                "sentiment": entry.get("sentiment"),
                "annotator": entry.get("annotator"),
                "annotation_id": entry.get("annotation_id"),
                "created_at": entry.get("created_at"),
                "updated_at": entry.get("updated_at"),
                "lead_time": entry.get("lead_time")
            }
            base["labels"].append(label)

        merged.append(base)

    return merged


In [4]:
input_files = ["Annotation/Hosein-Output.json", "Annotation/Farnoosh-Output.json"]
all_annotations = []

for file in input_files:
    with open(file, "r", encoding="utf-8") as f:
        data = json.load(f)
        all_annotations.extend(data)

merged_output = merge_annotations(all_annotations)

with open("merged_output.json", "w", encoding="utf-8") as f:
    json.dump(merged_output, f, ensure_ascii=False, indent=2)