# COCO dataset creation

In this notebook we show how you can process the competition images into a [COCO Keypoint dataset](https://cocodataset.org/#format-data).

We will convert the depth maps to images to train our keypoint detector on. However note that the color images can also be used for this.


In [None]:
import os
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
from cloth_tools.dataset.format import load_competition_observation
from airo_camera_toolkit.image_transforms.transforms.crop import Crop
from cloth_tools.annotation.grasp_annotation import GraspAnnotation


data_dir = Path("data")
dataset_dir = data_dir / "cloth_competition_dataset_0000"

In [None]:
os.path.exists(dataset_dir)

In [None]:
subdirs = [ f.path for f in os.scandir(dataset_dir) if f.is_dir() ]
sample_dirs = [d for d in subdirs if "sample_" in d]
sample_dirs = sorted(sample_dirs)
sample_dirs[:5]

In [None]:
images = []
depth_maps = []
clicked_points = []

for sample_dir in sample_dirs:
    observation_start_dir = Path(sample_dir) / "observation_start"
    observation = load_competition_observation(observation_start_dir)
    images.append(observation.image_left)
    depth_maps.append(observation.depth_map)

    grasp_dir = Path(sample_dir) / "grasp"
    grasp_annotation_file = grasp_dir / "grasp_annotation.json"

    with open(grasp_annotation_file, "r") as f:
        grasp_annotation = GraspAnnotation.model_validate_json(f.read())

    clicked_points.append(grasp_annotation.clicked_point_frontal) # .copy())


N_VISUALIZE = 5

plt.figure(figsize=(20, 10))
for i, image in enumerate(images[:N_VISUALIZE]):
    plt.subplot(1, N_VISUALIZE, i + 1)
    plt.imshow(image)

print(clicked_points[:N_VISUALIZE])

In [None]:
x_middle = observation.depth_map.shape[1] // 2
crop_width = 350

x = x_middle - crop_width // 2
y = 240
crop_height = 600

crop_rgb_left = Crop(observation.image_left.shape, x=x, y=y, w=crop_width, h=crop_height)

images_cropped = [crop_rgb_left.transform_image(image) for image in images]

plt.figure(figsize=(20, 10))
for i, (image, clicked_point) in enumerate(zip(images_cropped[:N_VISUALIZE], clicked_points[:N_VISUALIZE])):
    plt.subplot(1, N_VISUALIZE, i + 1)
    
    point_in_crop = crop_rgb_left.transform_point(clicked_point)
    plt.scatter(*point_in_crop, c="lawngreen", s=100, marker="x")
    plt.imshow(image)

In [None]:
distance_max = 1.55
segmentation_masks = [d < distance_max for d in depth_maps]
segmentation_masks_cropped = [crop_rgb_left.transform_image(mask) for mask in segmentation_masks]

plt.figure(figsize=(20, 10))
for i, mask in enumerate(segmentation_masks_cropped[:N_VISUALIZE]):
    plt.subplot(1, N_VISUALIZE, i + 1)
    plt.imshow(mask)

In [None]:
depth_maps_cropped = [crop_rgb_left.transform_image(d) for d in depth_maps]

plt.figure(figsize=(20, 10))
for i, d in enumerate(depth_maps_cropped[:N_VISUALIZE]):
    plt.subplot(1, N_VISUALIZE, i + 1)
    plt.imshow(d)

In [None]:
depth_maps_scaled = []

for depth_cropped, segmentation_cropped in zip(depth_maps_cropped, segmentation_masks_cropped):

    depth_cropped_segmented = depth_cropped * segmentation_cropped

    distance_cloth_min = np.min(depth_cropped)
    distance_cloth_max = np.max(depth_cropped_segmented)

    distance_cloth_range = distance_cloth_max - distance_cloth_min

    print(f"Cloth distance range: {distance_cloth_min:.2f}-{distance_cloth_max:.2f} m ({distance_cloth_range:.2f} m)")

    depth_scaled = (depth_cropped_segmented - distance_cloth_min) / distance_cloth_range

    depth_scaled[segmentation_cropped == False] = 1.0 # set background to max value

    depth_maps_scaled.append(depth_scaled)

print(f"Scaled depth map value range: {np.min(depth_maps_scaled[0])}-{np.max(depth_maps_scaled[0])}")

In [None]:
plt.figure(figsize=(20, 10))
for i, (depth_map, clicked_point) in enumerate(zip(depth_maps_scaled[:N_VISUALIZE], clicked_points[:N_VISUALIZE])):
    plt.subplot(1, N_VISUALIZE, i + 1)
    plt.imshow(depth_map)

    point_in_crop = crop_rgb_left.transform_point(clicked_point)
    plt.scatter(*point_in_crop, c='r', s=300, marker="x")

In [None]:
from airo_dataset_tools.data_parsers.coco import (
    CocoInfo,
    CocoKeypointCategory,
    CocoLicense,
    CocoImage,
    CocoKeypointAnnotation,
    CocoKeypointsDataset,
)


coco_info = CocoInfo(
    description="Preprocessed depth maps from the ICRA 2024 Cloth Competition dataset.",
    url="https://github.com/Victorlouisdg/cloth-competition",
    version="0.1",
    year=2024,
    contributor="Victor-Louis De Gusseme & Thomas Lips",
    date_created="2024/02/22",
)

coco_license = CocoLicense(
    id=1,
    name="Attribution-NonCommercial-ShareAlike License",  # TODO change to a more suitable license
    url="http://creativecommons.org/licenses/by-nc-sa/2.0/",
)

coco_keypoint_category = CocoKeypointCategory(
    supercategory="cloth",
    id=0,
    name="tshirt",
    keypoints=["grasp_annotated"],
)

print(repr(coco_info))
print(repr(coco_license))
print(repr(coco_keypoint_category))

In [None]:
# Make a directory to store the preprocessed images
import datetime

datetime_str = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

preprocessed_dir = data_dir / f"dataset_preprocessed_{datetime_str}"
images_preprocessed_dir = preprocessed_dir / "images"
images_preprocessed_dir.mkdir(parents=True)

In [None]:
import cv2

image_paths = []

for i, depth_map_scaled in enumerate(depth_maps_scaled):
    image_name = f"image_{i:06}.png"
    image_path = str(images_preprocessed_dir / image_name)
    image_paths.append(image_path)
    cv2.imwrite(image_path, depth_map_scaled * 255.0)


image_paths_relative = [os.path.relpath(path, preprocessed_dir) for path in image_paths]

coco_images = []

for i, path in enumerate(image_paths_relative):
    coco_image = CocoImage(
        id=1,
        width=crop_width,
        height=crop_height,
        file_name=path,
        license=1,
        date_captured="2024/02/22",
    )
    coco_images.append(coco_image)

print(repr(coco_images[0]))
print(repr(coco_images[1]))

In [None]:
from airo_dataset_tools.segmentation_mask_converter import BinarySegmentationMask


coco_annotations = []

for i, (clicked_point, segmentation_mask) in enumerate(zip(clicked_points, segmentation_masks_cropped)):
    x, y = clicked_point
    v = 2 # means labelled and visible

    mask = BinarySegmentationMask(segmentation_mask)
    coco_annotation = CocoKeypointAnnotation(
        id=i,
        image_id=i, # Note: image_id == id only when exactly one annotation per image
        category_id=coco_keypoint_category.id,
        segmentation=mask.as_compressed_rle,
        area=mask.area,
        bbox=mask.bbox,
        iscrowd=0,
        keypoints=[x, y, v],
        num_keypoints=1,
    )
    coco_annotations.append(coco_annotation)

print(repr(coco_annotations[0]))

In [None]:
coco_keypoints = CocoKeypointsDataset(
    info=coco_info,
    licenses=[coco_license],
    images=coco_images,
    categories=[coco_keypoint_category],
    annotations=coco_annotations,
)

In [None]:
import json
annotations_file = preprocessed_dir / "annotations.json"

with open(annotations_file, "w") as file:
    json.dump(coco_keypoints.model_dump(exclude_none=True), file, indent=4)
