In [2]:
import json
import pandas as pd
import argparse

In [4]:
def process_coco(json_file, output_csv):
    with open(json_file, "r") as file:
            data = json.load(file)

    image_dict = {img["id"]: img["file_name"] for img in data["images"]}

    annotation_counts = {img["file_name"]: {"clump": 0, "seal": 0} for img in data["images"]}

    category_mapping = {cat["id"]: cat["name"] for cat in data["categories"]}

    for annotation in data["annotations"]:
        image_name = image_dict[annotation["image_id"]]
        category_name = category_mapping[annotation["category_id"]]

        if category_name == "clump":
            annotation_counts[image_name]["clump"] += 1
        elif category_name == "seals":  
            annotation_counts[image_name]["seal"] += 1

    df = pd.DataFrame.from_dict(annotation_counts, orient="index").reset_index()
    df.columns = ["image_name", "clump_annotations", "seal_annotations"]

    df.to_csv(output_csv, index=False)
    print(f"Annotation counts saved to {output_csv}")

In [None]:
process_coco('annotations/test_annotations.coco.json', 'data/test_anno.csv')

Annotation counts saved to test_anno.csv


In [6]:
process_coco('annotations/train_annotations.coco.json', 'data/train_anno.csv')

Annotation counts saved to data/train_anno.csv


In [7]:
process_coco('annotations/valid_annotations.coco.json', 'data/valid_anno.csv')

Annotation counts saved to data/valid_anno.csv
