In [12]:
import json
from pycocotools.coco import COCO

# Path to the original annotation file
annotation_file = 'archive/turtles-data/data/annotations.json'
coco = COCO(annotation_file)

# Required categories
required_categories = ["turtle", "head", "flipper"]
cat_ids = coco.getCatIds(catNms=required_categories)
cat_ids_map = dict(zip(required_categories, cat_ids))

# Identify images that have all required labels
valid_image_ids = []
for img_id in coco.getImgIds():
    has_all_labels = True
    for category, cat_id in cat_ids_map.items():
        ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_id)
        if not ann_ids:  # If the category label is missing
            has_all_labels = False
            break
    if has_all_labels:
        valid_image_ids.append(img_id)

# Create a new annotation dictionary containing only images with all labels
filtered_annotations = {
    "images": [img for img in coco.dataset["images"] if img["id"] in valid_image_ids],
    "annotations": [ann for ann in coco.dataset["annotations"] if ann["image_id"] in valid_image_ids],
    "categories": coco.dataset["categories"]
}

# Save the filtered annotations to a new JSON file
filtered_annotation_file = 'archive/turtles-data/data/filtered_annotations.json'
with open(filtered_annotation_file, 'w') as f:
    json.dump(filtered_annotations, f)
print(f"Filtered annotations saved to {filtered_annotation_file}")

# Verify the filtered result
filtered_coco = COCO(filtered_annotation_file)
missing_labels_images = {category: 0 for category in required_categories}

for img_id in filtered_coco.getImgIds():
    for category, cat_id in cat_ids_map.items():
        ann_ids = filtered_coco.getAnnIds(imgIds=img_id, catIds=cat_id)
        if not ann_ids:
            missing_labels_images[category] += 1
            break

# Print the verification results
print("\nValidation of filtered dataset:")
for category, count in missing_labels_images.items():
    print(f"Number of images missing '{category}' label after filtering: {count}")


loading annotations into memory...
Done (t=4.65s)
creating index...
index created!
Filtered annotations saved to archive/turtles-data/data/filtered_annotations.json
loading annotations into memory...
Done (t=2.86s)
creating index...
index created!

Validation of filtered dataset:
Number of images missing 'turtle' label after filtering: 0
Number of images missing 'head' label after filtering: 0
Number of images missing 'flipper' label after filtering: 0


In [10]:

required_categories = ["turtle", "head", "flipper"]
cat_ids = coco.getCatIds(catNms=required_categories)
cat_ids_map = dict(zip(category_names, cat_ids))

missing_labels_images = {category: 0 for category in required_categories}
img_ids = coco.getImgIds()

for img_id in img_ids:
    for category, cat_id in cat_ids_map.items():
        ann_ids = coco.getAnnIds(imgIds=img_id, catIds=cat_id)
        if not ann_ids:
            missing_labels_images[category] += 1
            break

for category, count in missing_labels_images.items():
    print(f"\nNumber of images missing '{category}' label: {count}")


Number of images missing 'turtle' label: 0

Number of images missing 'head' label: 0

Number of images missing 'flipper' label: 0
