In [3]:
import json
import re
from pathlib import Path

def extract_base_label(label):
    """
    Extracts the base label from a label string.
    For example, converts 'dog_1' to 'dog' and 'person_2' to 'person'.
    """
    match = re.match(r"([a-zA-Z\-\_]+)", label)
    return match.group(1).lower() if match else label.lower()

def gather_unique_labels(json_paths):
    """
    Given a list of JSON file paths (bounding boxes+labels JSONs),
    returns a set of unique base label names.
    """
    unique_labels = set()
    for json_path in json_paths:
        with open(json_path, "r") as f:
            data = json.load(f)
        # Iterate over each sequence.
        for seq, frames in data.items():
            # Iterate over each frame.
            for frame, annotations in frames.items():
                # Each annotation key is a label (e.g., "dog_1", "person_2")
                for raw_label in annotations.keys():
                    base_label = extract_base_label(raw_label)
                    unique_labels.add(base_label)
    return unique_labels

# Paths to your bounding boxes+labels JSON files.
multi_obj_json_path = Path("output/representative_multi_object_boundingboxes_labels.json")
single_obj_json_path = Path("output/representative_single_object_boundingboxes_labels.json")

# Gather unique base labels from both JSON files.
unique_base_labels = gather_unique_labels([multi_obj_json_path, single_obj_json_path])
print("Unique base labels found:", unique_base_labels)

# Create a canonical mapping: assign a unique integer to each base label.
# Optionally, reserve 0 for background if needed.
canonical_mapping = {label: idx for idx, label in enumerate(sorted(unique_base_labels))}
print("Canonical mapping:")
for label, idx in canonical_mapping.items():
    print(f"  {label}: {idx}")

# Save the canonical mapping to a JSON file for later use.
mapping_output_path = Path("canonical_label_mapping.json")
with open(mapping_output_path, "w") as f:
    json.dump(canonical_mapping, f, indent=2)
print(f"Canonical mapping saved to {mapping_output_path.resolve()}")

Unique base labels found: {'paraglide_', 'person', 'person_', 'snowboard_', 'dog', 'pig_', 'car', 'airplane_', 'stick_', 'paper_', 'goat', 'dog_', 'skateboard_', 'horse_', 'varanus', 'elephant', 'car_', 'stroller_', 'cat_', 'bird', 'motorcycle_', 'puck_', 'cow'}
Canonical mapping:
  airplane_: 0
  bird: 1
  car: 2
  car_: 3
  cat_: 4
  cow: 5
  dog: 6
  dog_: 7
  elephant: 8
  goat: 9
  horse_: 10
  motorcycle_: 11
  paper_: 12
  paraglide_: 13
  person: 14
  person_: 15
  pig_: 16
  puck_: 17
  skateboard_: 18
  snowboard_: 19
  stick_: 20
  stroller_: 21
  varanus: 22
Canonical mapping saved to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/canonical_label_mapping.json


In [5]:
import json
import re
from pathlib import Path

def extract_base_label(label):
    """
    Extracts the base label by keeping only alphabetic characters and dashes,
    thereby removing trailing underscores or digits.
    Examples:
      'dog_1' or 'dog_' -> 'dog'
      'person' -> 'person'
    """
    match = re.match(r"([a-zA-Z\-]+)", label)
    return match.group(1).lower() if match else label.lower()

def gather_unique_labels(json_paths):
    """
    Given a list of JSON file paths (bounding boxes+labels JSONs),
    returns a set of unique base label names.
    """
    unique_labels = set()
    for json_path in json_paths:
        with open(json_path, "r") as f:
            data = json.load(f)
        for seq, frames in data.items():
            for frame, annotations in frames.items():
                for raw_label in annotations.keys():
                    base_label = extract_base_label(raw_label)
                    unique_labels.add(base_label)
    return unique_labels

# Paths to your JSON files.
multi_obj_json_path = Path("output/representative_multi_object_boundingboxes_labels.json")
single_obj_json_path = Path("output/representative_single_object_boundingboxes_labels.json")

# Gather unique base labels from both JSON files.
unique_base_labels = gather_unique_labels([multi_obj_json_path, single_obj_json_path])
print("Unique base labels found:", unique_base_labels)

# Create a canonical mapping: assign a unique integer to each base label.
# For example, background is typically 0, and then each label gets a unique number.
canonical_mapping = {label: idx for idx, label in enumerate(sorted(unique_base_labels))}
print("Canonical mapping:")
for label, idx in canonical_mapping.items():
    print(f"  {label}: {idx}")

# Save the canonical mapping to a JSON file for consistency.
mapping_output_path = Path("canonical_label_mapping.json")
with open(mapping_output_path, "w") as f:
    json.dump(canonical_mapping, f, indent=2)
print(f"Canonical mapping saved to {mapping_output_path.resolve()}")

Unique base labels found: {'ball', 'paper', 'snowboard', 'person', 'stroller', 'cat', 'dog', 'bicycle', 'skateboard', 'car', 'motorcycle', 'pig', 'paraglide', 'goat', 'varanus', 'horse', 'stick', 'elephant', 'bird', 'puck', 'cow'}
Canonical mapping:
  ball: 0
  bicycle: 1
  bird: 2
  car: 3
  cat: 4
  cow: 5
  dog: 6
  elephant: 7
  goat: 8
  horse: 9
  motorcycle: 10
  paper: 11
  paraglide: 12
  person: 13
  pig: 14
  puck: 15
  skateboard: 16
  snowboard: 17
  stick: 18
  stroller: 19
  varanus: 20
Canonical mapping saved to /Users/dd/PycharmProjects/CV-ObjectDetection_ImageSegmentation/canonical_label_mapping.json
