In [3]:
import json
import os
from pathlib import Path

# Import paths from your config.
from config import REP_BBOX_JSON, CANONICAL_MAPPING_PATH

def load_gt_annotations():
    """
    Loads the original GT bounding boxes and labels JSON.
    """
    json_path = REP_BBOX_JSON
    if not json_path.exists():
        raise FileNotFoundError(f"GT JSON not found: {json_path}")
    with open(json_path, "r") as f:
        data = json.load(f)
    return data

def load_canonical_mapping():
    """
    Loads the canonical mapping from the specified JSON file.
    """
    mapping_path = CANONICAL_MAPPING_PATH
    if not mapping_path.exists():
        raise FileNotFoundError(f"Canonical mapping file not found: {mapping_path}")
    with open(mapping_path, "r") as f:
        mapping = json.load(f)
    return mapping

def convert_box(box):
    """
    Converts a GT box from [x, y, w, h] to [x, y, x+w, y+h].
    """
    x, y, w, h = box
    return [x, y, x + w, y + h]

def preprocess_gt_annotations():
    """
    Processes the original GT JSON by:
      1. Removing instance suffixes from textual labels (e.g. "person_1" becomes "person")
      2. Mapping the resulting label to its numeric equivalent using the canonical mapping.
      3. Converting GT boxes from [x, y, w, h] to [x, y, x+w, y+h].
      4. Restructuring the annotations so that each sequence becomes a flat dictionary
         with frame filenames as keys and for each frame a dictionary containing:
            - "labels": a list of numeric labels
            - "boxes": a list of bounding boxes (in [x, y, x+w, y+h] format)
            - "mask_files": a list containing the mask filename (assumed to be the same as the frame name)
         
    Returns:
        processed (dict): A dictionary with keys "multi_object" and "single_object", each mapping
                          sequence names to their processed annotations.
    """
    gt_data = load_gt_annotations()
    canonical_mapping = load_canonical_mapping()
    
    processed = {"multi_object": {}, "single_object": {}}
    
    # Process both object types if present.
    for obj_type in ["multi_object", "single_object"]:
        if obj_type not in gt_data:
            continue
        sequences = gt_data[obj_type]
        for seq_name, frames in sequences.items():
            seq_dict = {}
            for frame, ann in frames.items():
                labels = []
                boxes = []
                for raw_label, bbox in ann.items():
                    # Remove instance suffix (e.g. "bicycle_1" -> "bicycle")
                    base_label = raw_label.rsplit("_", 1)[0] if "_" in raw_label else raw_label
                    # Map the textual label to its numeric code using the canonical mapping.
                    numeric_label = canonical_mapping.get(base_label, -1)
                    labels.append(numeric_label)
                    # Convert the GT box from [x, y, w, h] to [x, y, x+w, y+h]
                    boxes.append(convert_box(bbox))
                # Include mask_files info: assume the mask file has the same name as the frame.
                seq_dict[frame] = {"labels": labels, "boxes": boxes, "mask_files": [frame]}
            processed[obj_type][seq_name] = seq_dict
    return processed

def save_processed_gt(processed, output_dir):
    """
    Saves the processed GT annotations as one JSON per sequence.
    
    Args:
        processed (dict): Processed GT annotations organized by object type.
        output_dir (str or Path): Directory where the per-sequence JSON files will be saved.
                                 Files will be named "video-sequence-name_gt.json".
    """
    output_dir = Path(output_dir)
    output_dir.mkdir(parents=True, exist_ok=True)
    
    # Iterate over both object types.
    for obj_type, sequences in processed.items():
        for seq_name, data in sequences.items():
            out_filename = f"{seq_name}_gt.json"
            out_path = output_dir / out_filename
            with open(out_path, "w") as f:
                json.dump(data, f, indent=4)
            print(f"Saved processed GT for sequence '{seq_name}' to {out_path}")

if __name__ == "__main__":
    # Define the output directory for processed GT annotations.
    output_directory = "/Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations"
    processed_annotations = preprocess_gt_annotations()
    save_processed_gt(processed_annotations, output_directory)

Saved processed GT for sequence 'bike-packing' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/bike-packing_gt.json
Saved processed GT for sequence 'skate-park' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/skate-park_gt.json
Saved processed GT for sequence 'stroller' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/stroller_gt.json
Saved processed GT for sequence 'upside-down' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/upside-down_gt.json
Saved processed GT for sequence 'cat-girl' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/cat-girl_gt.json
Saved processed GT for sequence 'classic-car' to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/metrics_artifacts/gt_annotations/classic-car_gt.json
Saved processed GT for