# Grasp annotation example

In [None]:
from pathlib import Path
import cv2
from airo_camera_toolkit.point_clouds.conversions import open3d_to_point_cloud, point_cloud_to_open3d
from airo_camera_toolkit.point_clouds.operations import filter_point_cloud
from cloth_tools.dataset.format import load_competition_observation
import matplotlib.pyplot as plt
from airo_camera_toolkit.utils.image_converter import ImageConverter
import open3d as o3d

data_dir = Path("../data")
dataset_dir = data_dir / "dataset_dev_0000"

In [None]:
from cloth_tools.dataset.format import load_competition_observation


observation_start_dir = dataset_dir / "sample_000000" / "start_observation"

observation = load_competition_observation(observation_start_dir)

In [None]:
plt.figure(figsize=(20, 10))
plt.subplot(1, 2, 1)
plt.imshow(observation.image_left)

In [None]:
confidence_map = observation.confidence_map
point_cloud = observation.point_cloud

# Transform point cloud to world frame
# X_W_C = observation.camera_pose_in_world  # X_LCB_C (camera pose in the left-arm base frame)
# pcd_in_camera = point_cloud_to_open3d(point_cloud_in_camera)  # X_C_PC, need X_W_C
# pcd = pcd_in_camera.transform(X_W_C)  # transform to world frame
# point_cloud = open3d_to_point_cloud(pcd)

# Filter outs point with low depth confidence (i.e. with high value in the confidence map)
confidence_threshold = 1.0
confidence_mask = (confidence_map <= confidence_threshold).reshape(-1)  # Threshold and flatten
point_cloud_filtered = filter_point_cloud(point_cloud, confidence_mask)
pcd_filtered = point_cloud_to_open3d(point_cloud_filtered)
pcd_filtered.point.positions.dtype, pcd_filtered.point.colors.dtype

In [None]:
from cloth_tools.annotation.grasp_annotation import top_down_camera_pose

virtual_camera_pose = top_down_camera_pose(height=1.5)

In [None]:
from cloth_tools.visualization.open3d import open3d_camera

color_frontal_rgb = (1, 1, 0)
color_topdown_rgb = (0, 1, 1)

world_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3)

# Visualize the cameras
resolution = observation.camera_resolution
intrinsics = observation.camera_intrinsics

X_W_VC = virtual_camera_pose
X_W_C = observation.camera_pose_in_world
camera_frontal_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3)
camera_frontal_frame.transform(X_W_C)
camera_top_down_frame = o3d.geometry.TriangleMesh.create_coordinate_frame(size=0.3)
camera_top_down_frame.transform(X_W_VC)
camera_frontal_lines = open3d_camera(X_W_C, intrinsics, resolution, color_frontal_rgb, scale=0.2)
camera_topdown_lines = open3d_camera(X_W_VC, intrinsics, resolution, color_topdown_rgb, scale=0.2)

o3d.visualization.draw_geometries(
    [
        pcd_filtered.to_legacy(),
        world_frame,
        camera_frontal_frame,
        camera_top_down_frame,
        camera_frontal_lines,
        camera_topdown_lines,
    ]
)

In [None]:
from cloth_tools.annotation.grasp_annotation import project_point_cloud_to_image

image_topdown = project_point_cloud_to_image(
    point_cloud_filtered, X_W_VC, intrinsics, resolution, background_color=(90, 90, 90)
)

# Matplotlib seems to do some anti-aliasing, which makes the image look better than in opencv (without blurring)
plt.figure(figsize=(12, 6))
plt.imshow(ImageConverter.from_opencv_format(image_topdown).image_in_numpy_int_format)
plt.show()

In [None]:
# Some experiments with blurring to make the image look better in opencv
width = 800
height = 400
background_color = (120, 120, 120)

image_topdown = project_point_cloud_to_image(point_cloud_filtered, X_W_VC, intrinsics, resolution, background_color)

window_name_original = "Original"
cv2.namedWindow(window_name_original, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name_original, width, height)
cv2.moveWindow(window_name_original, 0, 0)
cv2.imshow(window_name_original, image_topdown)

window_name_median_blur = "Median Blur"
cv2.namedWindow(window_name_median_blur, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name_median_blur, width, height)
cv2.moveWindow(window_name_median_blur, width, 0)
image_median_blur = cv2.medianBlur(image_topdown, 3)
cv2.imshow(window_name_median_blur, image_median_blur)

window_name_blur = "Blur"
cv2.namedWindow(window_name_blur, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name_blur, width, height)
cv2.moveWindow(window_name_blur, 0, height)
image_blur = cv2.blur(image_topdown, (3, 3))
cv2.imshow(window_name_blur, image_blur)

window_name_combined = "Blur + Median"
cv2.namedWindow(window_name_combined, cv2.WINDOW_NORMAL)
cv2.resizeWindow(window_name_combined, width, height)
cv2.moveWindow(window_name_combined, width, height)
image_blur_median = cv2.medianBlur(image_blur, 3)
cv2.imshow(window_name_combined, image_blur_median)

cv2.waitKey(0)
cv2.destroyAllWindows()

In [None]:
from cloth_tools.annotation.grasp_annotation import get_manual_grasp_annotation

grasp_info = get_manual_grasp_annotation(
    observation.image_left, observation.depth_map, point_cloud_filtered, X_W_C, intrinsics, log_to_rerun=True
)

In [None]:
grasp_info.__dict__.keys()

In [None]:
grasp_info.grasp_pose

In [None]:
from typing import Tuple
from pydantic import BaseModel
from cloth_tools.annotation.grasp_annotation import GraspAnnotationInfo
from airo_dataset_tools.data_parsers.pose import Pose
import json
import os


# class Keypoint2D(BaseModel):
#     x: float
#     y: float

#     @classmethod
#     def from_tuple(cls, point: Tuple[int, int]):
#         return cls(x=point[0], y=point[1])

# clicked_point_frontal=Keypoint2D.from_tuple(grasp_info.clicked_point_frontal),
# clicked_point_topdown=Keypoint2D.from_tuple(grasp_info.clicked_point_topdown),

class GraspAnnotation(BaseModel):
    clicked_point_frontal: Tuple[int, int]
    clicked_point_topdown: Tuple[int, int]
    grasp_depth: float

def save_grasp_info(dir: str, grasp_info: GraspAnnotationInfo):

    os.makedirs(dir, exist_ok=True)

    grasp_pose_file = os.path.join(dir, "grasp_pose.json")

    with open(grasp_pose_file, "w") as f:
        grasp_pose_model = Pose.from_homogeneous_matrix(grasp_info.grasp_pose)
        json.dump(grasp_pose_model.model_dump(exclude_none=True), f, indent=4)

    grasp_annotation_file = os.path.join(dir, "grasp_annotation.json")
    grasp_annotation = GraspAnnotation(
        clicked_point_frontal=grasp_info.clicked_point_frontal,
        clicked_point_topdown=grasp_info.clicked_point_topdown,
        grasp_depth=grasp_info.grasp_depth,
    )

    with open(grasp_annotation_file, "w") as f:
        json.dump(grasp_annotation.model_dump(exclude_none=True), f, indent=4)

    # Save the two images with the grasp visualized
    grasp_frontal_image_file = os.path.join(dir, "frontal_image_grasp.jpg")
    grasp_topdown_image_file = os.path.join(dir, "topdown_image_grasp.jpg")
    cv2.imwrite(grasp_frontal_image_file, grasp_info.image_frontal)
    cv2.imwrite(grasp_topdown_image_file, grasp_info.image_topdown)


grasp_dir = "grasp"
save_grasp_info(grasp_dir, grasp_info)

In [None]:
# TODO FIX
def load_grasp_info(dir: str) -> GraspAnnotationInfo:
    grasp_pose_file = os.path.join(dir, "grasp_pose.json")
    with open(grasp_pose_file, "r") as f:
        grasp_pose_model = Pose(**json.load(f))

    grasp_annotation_file = os.path.join(dir, "grasp_annotation.json")
    with open(grasp_annotation_file, "r") as f:
        grasp_annotation = GraspAnnotation(**json.load(f))

    grasp_frontal_image_file = os.path.join(dir, "frontal_image_grasp.jpg")
    grasp_topdown_image_file = os.path.join(dir, "topdown_image_grasp.jpg")
    image_frontal = cv2.imread(grasp_frontal_image_file)
    image_topdown = cv2.imread(grasp_topdown_image_file)

    return GraspAnnotationInfo(
        grasp_pose=grasp_pose_model.homogeneous_matrix,
        clicked_point_frontal=grasp_annotation.clicked_point_frontal,
        clicked_point_topdown=grasp_annotation.clicked_point_topdown,
        grasp_depth=grasp_annotation.grasp_depth,
        image_frontal=image_frontal,
        image_topdown=image_topdown,
    )

grasp_info_loaded = load_grasp_info(grasp_dir)

In [None]:
!rm -rf grasp