In [None]:
import json

# Load the COCO annotations file
with open('/sports-vision/data/tennis_ball_detection/annotations/test.json', 'r') as f:
    data = json.load(f)

# Update all category_id to 0 (merging all categories into one)
for ann in data['annotations']:
    if ann['category_id'] in [0, 1]:
        ann['category_id'] = 0  # merge to a single category

# Keep only one category definition (category 0)
data['categories'] = [{
    "id": 0,
    "name": "tennis ball",  
    "supercategory": "none"
}]

# Save the updated annotations file
with open('/sports-vision/data/tennis_ball_detection/annotations/test.json', 'w') as f:
    json.dump(data, f, indent=2)

In [None]:
import os
import shutil
import json

dataset_dirs = ['/sports-vision/data/ball_detection', '/sports-vision/data/tennis_ball_detection']
output_dir = 'combined_dataset'
splits = ['train', 'val', 'test']

# Create output structure
for split in splits:
    os.makedirs(os.path.join(output_dir, f'{split}'), exist_ok=True)
os.makedirs(os.path.join(output_dir, 'annotations'), exist_ok=True)

# Helper to remap IDs
def merge_split(split):
    next_image_id = 1
    next_ann_id = 1

    merged = {
        "images": [],
        "annotations": [],
        "categories": [{
            "id": 0,
            "name": "tennis ball",
            "supercategory": "none"
        }]
    }

    for dataset_dir in dataset_dirs:
        ann_path = os.path.join(dataset_dir, 'annotations', f'{split}.json')
        img_dir = os.path.join(dataset_dir, f'{split}')
        out_img_dir = os.path.join(output_dir, f'{split}')

        with open(ann_path, 'r') as f:
            data = json.load(f)

        id_map = {}
        for img in data['images']:
            old_id = img['id']
            new_id = next_image_id
            id_map[old_id] = new_id

            img['id'] = new_id
            merged['images'].append(img)

            # Copy image
            src_path = os.path.join(img_dir, img['file_name'])
            dst_path = os.path.join(out_img_dir, img['file_name'])
            if os.path.exists(src_path):
                shutil.copy2(src_path, dst_path)

            next_image_id += 1

        for ann in data['annotations']:
            ann['id'] = next_ann_id
            ann['image_id'] = id_map[ann['image_id']]
            ann['category_id'] = 0  # force all to tennis ball
            merged['annotations'].append(ann)
            next_ann_id += 1

    # Save merged annotation
    out_json = os.path.join(output_dir, 'annotations', f'{split}.json')
    with open(out_json, 'w') as f:
        json.dump(merged, f, indent=2)

    print(f"Merged {split}.json with {len(merged['images'])} images and {len(merged['annotations'])} annotations.")

# Run merging for all splits
for split in splits:
    merge_split(split)