diff --git a/.gitignore b/.gitignore index bdc0a21f..971a12d1 100644 --- a/.gitignore +++ b/.gitignore @@ -23,10 +23,12 @@ *.csv *.log *.mp4 - +exp/ # Sphinx documentation docs/_build/ docs/build/ _build/ .doctrees/ + +jbwang_* diff --git a/docs/datasets/kitti-360.rst b/docs/datasets/kitti-360.rst index 42d549b6..d4fc8a9f 100644 --- a/docs/datasets/kitti-360.rst +++ b/docs/datasets/kitti-360.rst @@ -7,12 +7,12 @@ KiTTI-360 :alt: Dataset sample image :width: 290px - | **Paper:** `Name of Paper `_ - | **Download:** `Documentation `_ - | **Code:** [Code] - | **Documentation:** [License type] + | **Paper:** `KITTI-360: A Novel Dataset and Benchmarks for Urban Scene Understanding in 2D and 3D `_ + | **Download:** `www.cvlibs.net/datasets/kitti-360 `_ + | **Code:** `www.github.com/autonomousvision/kitti360Scripts `_ + | **Documentation:** `kitti-360 Document`_ | **License:** [License type] - | **Duration:** [Duration here] + | **Duration:** 320k image | **Supported Versions:** [Yes/No/Conditions] | **Redistribution:** [Yes/No/Conditions] diff --git a/notebooks/bev_matplotlib.ipynb b/notebooks/bev_matplotlib.ipynb index 910bf63a..94234bd5 100644 --- a/notebooks/bev_matplotlib.ipynb +++ b/notebooks/bev_matplotlib.ipynb @@ -12,7 +12,7 @@ "\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType " + "# from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType " ] }, { @@ -22,16 +22,14 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "# splits = [\"wopd_val\"]\n", + "splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", "# splits = [\"carla_test\"]\n", - "splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", - "# splits = [\"av2-sensor-mini_train\"]\n", - "# splits = [\"pandaset_train\"]\n", - "# log_names = None\n", - "\n", - "\n", - "\n", + "# splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", "log_names = None\n", "scene_uuids = None\n", "\n", @@ -39,18 +37,16 @@ " split_names=splits,\n", " log_names=log_names,\n", " scene_uuids=scene_uuids,\n", - " duration_s=None,\n", + " duration_s=30.0,\n", " history_s=0.0,\n", - " timestamp_threshold_s=20,\n", + " timestamp_threshold_s=30.0,\n", " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", - "# worker = RayDistributed()\n", "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" + "print(f\"Found {len(scenes)} scenes\")\n" ] }, { @@ -117,9 +113,9 @@ ")\n", "\n", "ROAD_EDGE_CONFIG: PlotConfig = PlotConfig(\n", - " fill_color=DARKER_GREY.set_brightness(0.0),\n", + " fill_color=DARKER_GREY,\n", " fill_color_alpha=1.0,\n", - " line_color=DARKER_GREY.set_brightness(0.0),\n", + " line_color=DARKER_GREY,\n", " line_color_alpha=1.0,\n", " line_width=1.0,\n", " line_style=\"-\",\n", @@ -148,10 +144,10 @@ " # MapLayer.LANE,\n", " MapLayer.LANE_GROUP,\n", " MapLayer.GENERIC_DRIVABLE,\n", - " # MapLayer.CARPARK,\n", + " MapLayer.CARPARK,\n", " # MapLayer.CROSSWALK,\n", " # MapLayer.INTERSECTION,\n", - " # MapLayer.WALKWAY,\n", + " MapLayer.WALKWAY,\n", " MapLayer.ROAD_EDGE,\n", " MapLayer.ROAD_LINE,\n", " ]\n", @@ -220,10 +216,10 @@ "\n", " point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d\n", " if map_api is not None:\n", - " # add_debug_map_on_ax(ax, scene.get_map_api(), point_2d, radius=radius, route_lane_group_ids=None)\n", + " add_debug_map_on_ax(ax, scene.get_map_api(), point_2d, radius=radius, route_lane_group_ids=None)\n", "\n", "\n", - " add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", + " # add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", " # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.get_map_api())\n", "\n", " add_box_detections_to_ax(ax, box_detections)\n", @@ -256,7 +252,8 @@ "scene = np.random.choice(scenes)\n", "_plot_scene_on_ax(ax, scene, iteration, radius=80)\n", "# _plot_scene_on_ax(ax[1], scene, iteration, radius=50)\n", - "# _plot_scene_on_ax(ax[2], scene, iteration, radius=100)\n", + "# _plot_scene_on_ax(ax[2], scene, iteration,\n", + "# radius=100)\n", "\n", "plt.show()" ] @@ -285,115 +282,7 @@ "id": "4", "metadata": {}, "outputs": [], - "source": [ - "import shapely\n", - "from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import get_road_edge_linear_rings\n", - "\n", - "# from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d\n", - "from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import (\n", - " _interpolate_z_on_segment,\n", - " _split_continuous_segments,\n", - ")\n", - "from py123d.geometry.geometry_index import Point3DIndex\n", - "from py123d.geometry.occupancy_map import OccupancyMap2D\n", - "from py123d.geometry.polyline import Polyline3D\n", - "\n", - "\n", - "fix, ax = plt.subplots()\n", - "\n", - "\n", - "def lift_outlines_to_3d(\n", - " outlines_2d: List[shapely.LinearRing],\n", - " boundaries: List[Polyline3D],\n", - " max_distance: float = 10.0,\n", - ") -> List[Polyline3D]:\n", - " \"\"\"Lift 2D road edges to 3D by querying elevation from boundary segments.\n", - "\n", - " :param road_edges_2d: List of 2D road edge geometries.\n", - " :param boundaries: List of 3D boundary geometries.\n", - " :param max_distance: Maximum 2D distance for edge-boundary association.\n", - " :return: List of lifted 3D road edge geometries.\n", - " \"\"\"\n", - "\n", - " outlines_3d: List[Polyline3D] = []\n", - "\n", - " if len(outlines_2d) >= 1 and len(boundaries) >= 1:\n", - "\n", - " # 1. Build comprehensive spatial index with all boundary segments\n", - " # NOTE @DanielDauner: We split each boundary polyline into small segments.\n", - " # The spatial indexing uses axis-aligned bounding boxes, where small geometries lead to better performance.\n", - " boundary_segments = []\n", - " for boundary in boundaries:\n", - " coords = boundary.array.reshape(-1, 1, 3)\n", - " segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1)\n", - " boundary_segments.append(segment_coords_boundary)\n", - "\n", - " boundary_segments = np.concatenate(boundary_segments, axis=0)\n", - " boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments)\n", - " occupancy_map = OccupancyMap2D(boundary_segment_linestrings)\n", - "\n", - " for linear_ring in outlines_2d:\n", - " points_2d = np.array(linear_ring.coords, dtype=np.float64)\n", - " points_3d = np.zeros((len(points_2d), len(Point3DIndex)), dtype=np.float64)\n", - " points_3d[..., Point3DIndex.XY] = points_2d\n", - "\n", - " # 3. Batch query for all points\n", - " query_points = shapely.creation.points(points_2d)\n", - " results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True)\n", - "\n", - " for query_idx, geometry_idx in zip(*results):\n", - " query_point = query_points[query_idx]\n", - " segment_coords = boundary_segments[geometry_idx]\n", - " best_z = _interpolate_z_on_segment(query_point, segment_coords)\n", - " points_3d[query_idx, 2] = best_z\n", - "\n", - " outlines_3d.append(Polyline3D.from_array(points_3d))\n", - "\n", - " return outlines_3d\n", - "\n", - "\n", - "def _extract_intersection_outline(lane_groups: List[AbstractLaneGroup], junction_id: str = 0) -> Polyline3D:\n", - " \"\"\"Helper method to extract intersection outline in 3D from lane group helpers.\"\"\"\n", - "\n", - " # 1. Extract the intersection outlines in 2D\n", - " intersection_polygons: List[shapely.Polygon] = [\n", - " lane_group_helper.shapely_polygon for lane_group_helper in lane_groups\n", - " ]\n", - " # for intersection_polygon in intersection_polygons:\n", - " # ax.plot(*intersection_polygon.exterior.xy)\n", - "\n", - " # for lane_group_helper in lane_groups:\n", - " # ax.plot(*lane_group_helper.outline.linestring.xy, color=\"blue\")\n", - " intersection_edges = get_road_edge_linear_rings(intersection_polygons, add_interiors=False)\n", - "\n", - " # for linear_ring in intersection_edges:\n", - " # ax.plot(*linear_ring.xy, color=\"blue\")\n", - "\n", - " # 2. Lift the 2D outlines to 3D\n", - " lane_group_outlines: List[Polyline3D] = [lane_group_helper.outline_3d for lane_group_helper in lane_groups]\n", - " intersection_outlines = lift_outlines_to_3d(intersection_edges, lane_group_outlines)\n", - "\n", - " print(len(intersection_outlines))\n", - "\n", - " # NOTE: When the intersection has multiple non-overlapping outlines, we cannot return a single outline in 3D.\n", - " # For now, we return the longest outline.\n", - "\n", - " valid_outlines = [outline for outline in intersection_outlines if outline.array.shape[0] > 2]\n", - " assert len(valid_outlines) > 0, f\"No valid intersection outlines found for Junction {junction_id}!\"\n", - "\n", - " longest_outline = max(valid_outlines, key=lambda outline: outline.length)\n", - "\n", - " # for linear_ring in intersection_outlines:\n", - " # ax.plot(*linear_ring.linestring.xy, color=\"red\")\n", - "\n", - " # ax.plot(*longest_outline.linestring.xy, color=\"red\")\n", - " # longest_outline.line\n", - " print(longest_outline.array[:, 2])\n", - " return longest_outline\n", - "\n", - "\n", - "_extract_intersection_outline(lane_groups)" - ] + "source": [] }, { "cell_type": "code", @@ -410,14 +299,6 @@ "metadata": {}, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/bev_render.ipynb b/notebooks/bev_render.ipynb index 6e84c122..1bc41014 100644 --- a/notebooks/bev_render.ipynb +++ b/notebooks/bev_render.ipynb @@ -11,7 +11,7 @@ "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType " + "# from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType " ] }, { @@ -21,35 +21,36 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "# splits = [\"wopd_val\"]\n", - "splits = [\"carla_test\"]\n", - "# splits = [\"nuplan-mini_test\"]\n", - "# splits = [\"av2-sensor-mini_train\"]\n", - "# splits = [\"pandaset_train\"]\n", - "# log_names = None\n", - "\n", - "\n", - "\n", + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", "log_names = None\n", - "scene_uuids = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", "\n", "scene_filter = SceneFilter(\n", " split_names=splits,\n", " log_names=log_names,\n", " scene_uuids=scene_uuids,\n", - " duration_s=20.0,\n", + " duration_s=None,\n", " history_s=0.0,\n", - " timestamp_threshold_s=20,\n", + " timestamp_threshold_s=None,\n", " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", - "# worker = RayDistributed()\n", "scenes = scene_builder.get_scenes(scene_filter, worker)\n", "\n", - "print(f\"Found {len(scenes)} scenes\")" + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" ] }, { @@ -61,7 +62,7 @@ "source": [ "from py123d.visualization.matplotlib.plots import render_scene_animation\n", "\n", - "for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:\n", + "for i in [0]:\n", " render_scene_animation(scenes[i], output_path=\"test\", format=\"mp4\", fps=20, step=1, radius=50)" ] }, diff --git a/notebooks/camera_matplotlib.ipynb b/notebooks/camera_matplotlib.ipynb index f9a0433a..b33cfdd8 100644 --- a/notebooks/camera_matplotlib.ipynb +++ b/notebooks/camera_matplotlib.ipynb @@ -11,7 +11,7 @@ "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType" + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType" ] }, { @@ -42,7 +42,7 @@ " history_s=0.0,\n", " timestamp_threshold_s=20,\n", " shuffle=True,\n", - " camera_types=[PinholeCameraType.CAM_F0],\n", + " pinhole_camera_types=[PinholeCameraType.PCAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", @@ -67,24 +67,24 @@ "scene = scenes[0]\n", "\n", "scene: AbstractScene\n", - "print(scene.uuid, scene.available_camera_types)\n", + "print(scene.uuid, scene.available_pinhole_camera_types)\n", "\n", "scale = 3.0\n", "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", "\n", "\n", "camera_ax_mapping = {\n", - " PinholeCameraType.CAM_L0: ax[0, 0],\n", - " PinholeCameraType.CAM_F0: ax[0, 1],\n", - " PinholeCameraType.CAM_R0: ax[0, 2],\n", - " PinholeCameraType.CAM_L1: ax[1, 0],\n", - " PinholeCameraType.CAM_B0: ax[1, 1],\n", - " PinholeCameraType.CAM_R1: ax[1, 2],\n", + " PinholeCameraType.PCAM_L0: ax[0, 0],\n", + " PinholeCameraType.PCAM_F0: ax[0, 1],\n", + " PinholeCameraType.PCAM_R0: ax[0, 2],\n", + " PinholeCameraType.PCAM_L1: ax[1, 0],\n", + " PinholeCameraType.PCAM_B0: ax[1, 1],\n", + " PinholeCameraType.PCAM_R1: ax[1, 2],\n", "}\n", "\n", "\n", "for camera_type, ax_ in camera_ax_mapping.items():\n", - " camera = scene.get_camera_at_iteration(iteration, camera_type)\n", + " camera = scene.get_pinhole_camera_at_iteration(iteration, camera_type)\n", " box_detections = scene.get_box_detections_at_iteration(iteration)\n", " ego_state = scene.get_ego_state_at_iteration(iteration)\n", "\n", diff --git a/notebooks/camera_render.ipynb b/notebooks/camera_render.ipynb new file mode 100644 index 00000000..4365c424 --- /dev/null +++ b/notebooks/camera_render.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType\n", + "\n", + "KITTI360_DATA_ROOT = \"/home/daniel/kitti_360/KITTI-360\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "log_names = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", + "\n", + "scene_filter = SceneFilter(\n", + " split_names=splits,\n", + " log_names=log_names,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=None,\n", + " history_s=0.0,\n", + " timestamp_threshold_s=None,\n", + " shuffle=True,\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", + ")\n", + "scene_builder = ArrowSceneBuilder()\n", + "worker = Sequential()\n", + "scenes = scene_builder.get_scenes(scene_filter, worker)\n", + "\n", + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from py123d.datatypes.scene.abstract_scene import AbstractScene\n", + "from py123d.visualization.matplotlib.camera import add_box_detections_to_camera_ax, add_camera_ax\n", + "import imageio\n", + "import numpy as np\n", + "\n", + "iteration = 0\n", + "scene = scenes[0]\n", + "\n", + "scene: AbstractScene\n", + "fps = 15 # frames per second\n", + "output_file = f\"camera_{scene.log_metadata.split}_{scene.uuid}.mp4\"\n", + "\n", + "writer = imageio.get_writer(output_file, fps=fps)\n", + "\n", + "scale = 3.0\n", + "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", + "\n", + "\n", + "camera_type = PinholeCameraType.CAM_F0\n", + "\n", + "for i in range(scene.number_of_iterations):\n", + " camera = scene.get_camera_at_iteration(i, camera_type)\n", + " box_detections = scene.get_box_detections_at_iteration(i)\n", + " ego_state = scene.get_ego_state_at_iteration(i)\n", + "\n", + " _, image = add_box_detections_to_camera_ax(\n", + " None,\n", + " camera,\n", + " box_detections,\n", + " ego_state,\n", + " return_image=True,\n", + " )\n", + " writer.append_data(image)\n", + "\n", + "writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py123d", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 9284be71..655a2612 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,10 +105,13 @@ nuscenes_expanded = [ "yourdfpy==0.0.58", ] waymo = [ - "protobuf==6.30.2", + "protobuf==4.21.0", "tensorflow==2.13.0", "waymo-open-dataset-tf-2-12-0==1.6.6", ] +ffmpeg = [ + "imageio[ffmpeg]", +] [tool.setuptools.packages.find] where = ["src"] diff --git a/scripts/conversion/kitti360_conversion.sh b/scripts/conversion/kitti360_conversion.sh new file mode 100644 index 00000000..1e939ad5 --- /dev/null +++ b/scripts/conversion/kitti360_conversion.sh @@ -0,0 +1,3 @@ +export KITTI360_DATA_ROOT="/home/daniel/kitti_360/KITTI-360" + +py123d-conversion datasets=["kitti360_dataset"] map_writer.remap_ids=true diff --git a/scripts/download/download_kitti_360.sh b/scripts/download/download_kitti_360.sh new file mode 100644 index 00000000..1cb3e540 --- /dev/null +++ b/scripts/download/download_kitti_360.sh @@ -0,0 +1,86 @@ +# 2D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Fisheye Images (355G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_fisheye.zip + +# Fisheye Calibration Images (11G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_fisheye_calibration.zip + + +# Perspective Images for Train & Val (128G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_perspective.zip + +# Test Semantic (1.5G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test.zip + +# Test NVS 50% Drop (0.3G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop50.zip + +# est NVS 90% Drop (0.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop90.zip + +# Test SLAM (14G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test_slam.zip + + +# Semantics of Left Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics.zip + +# Semantics of Right Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics_image_01.zip + + +# Confidence of Left Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence.zip + +# Confidence of Right Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence_image_01.zip + + + +# 3D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Raw Velodyne Scans (119G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_velodyne.zip + +# Test SLAM (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_3d_raw/data_3d_test_slam.zip + +# Test Completion (35M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_ssc_test.zip + + +# Raw SICK Scans (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_sick.zip + + +# Accumulated Point Clouds for Train & Val (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics.zip + +# Test Semantic (1.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics_test.zip + + +# 3D Bounding Boxes (30M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ffa164387078f48a20f0188aa31b0384bb19ce60/data_3d_bboxes.zip + + + +# Calibrations & Poses +# ---------------------------------------------------------------------------------------------------------------------- + +# Calibrations (3K) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/384509ed5413ccc81328cf8c55cc6af078b8c444/calibration.zip + + +# Vechicle Poses (8.9M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses.zip + + +# OXTS Sync Measurements (37.3M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts.zip + +# OXTS Raw Measurements (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts_extract.zip diff --git a/src/py123d/common/utils/enums.py b/src/py123d/common/utils/enums.py index 33300c00..9f7d233e 100644 --- a/src/py123d/common/utils/enums.py +++ b/src/py123d/common/utils/enums.py @@ -2,6 +2,8 @@ from enum import IntEnum +from pyparsing import Union + class classproperty(object): def __init__(self, f): @@ -27,3 +29,15 @@ def deserialize(cls, key: str) -> SerialIntEnum: def from_int(cls, value: int) -> SerialIntEnum: """Get the enum from an int.""" return cls(value) + + @classmethod + def from_arbitrary(cls, value: Union[int, str, SerialIntEnum]) -> SerialIntEnum: + """Get the enum from an int, string, or enum instance.""" + if isinstance(value, cls): + return value + elif isinstance(value, int): + return cls.from_int(value) + elif isinstance(value, str): + return cls.deserialize(value) + else: + raise ValueError(f"Invalid value for {cls.__name__}: {value}") diff --git a/src/py123d/conversion/dataset_converter_config.py b/src/py123d/conversion/dataset_converter_config.py index 6539e3e4..d4924b01 100644 --- a/src/py123d/conversion/dataset_converter_config.py +++ b/src/py123d/conversion/dataset_converter_config.py @@ -23,9 +23,13 @@ class DatasetConverterConfig: # Traffic Lights include_traffic_lights: bool = False - # Cameras - include_cameras: bool = False - camera_store_option: Literal["path", "binary", "mp4"] = "path" + # Pinhole Cameras + include_pinhole_cameras: bool = False + pinhole_camera_store_option: Literal["path", "binary", "mp4"] = "path" + + # Fisheye MEI Cameras + include_fisheye_mei_cameras: bool = False + fisheye_mei_camera_store_option: Literal["path", "binary", "mp4"] = "path" # LiDARs include_lidars: bool = False @@ -37,11 +41,13 @@ class DatasetConverterConfig: include_route: bool = False def __post_init__(self): - assert self.camera_store_option != "mp4", "MP4 format is not yet supported, but planned for future releases." - assert self.camera_store_option in [ + assert ( + self.pinhole_camera_store_option != "mp4" + ), "MP4 format is not yet supported, but planned for future releases." + assert self.pinhole_camera_store_option in [ "path", "binary", - ], f"Invalid camera store option, got {self.camera_store_option}." + ], f"Invalid camera store option, got {self.pinhole_camera_store_option}." assert self.lidar_store_option in [ "path", diff --git a/src/py123d/conversion/datasets/av2/utils/av2_map_conversion.py b/src/py123d/conversion/datasets/av2/av2_map_conversion.py similarity index 97% rename from src/py123d/conversion/datasets/av2/utils/av2_map_conversion.py rename to src/py123d/conversion/datasets/av2/av2_map_conversion.py index 41851c58..a55a9cf4 100644 --- a/src/py123d/conversion/datasets/av2/utils/av2_map_conversion.py +++ b/src/py123d/conversion/datasets/av2/av2_map_conversion.py @@ -113,6 +113,10 @@ def _get_centerline_from_boundaries( right_boundary=lane_dict["right_lane_boundary"], ) + # NOTE @DanielDauner: Some neighbor lane IDs might not be present in the dataset. + left_lane_id = lane_dict["left_neighbor_id"] if lane_dict["left_neighbor_id"] in lanes else None + right_lane_id = lane_dict["right_neighbor_id"] if lane_dict["right_neighbor_id"] in lanes else None + map_writer.write_lane( CacheLane( object_id=lane_id, @@ -120,8 +124,8 @@ def _get_centerline_from_boundaries( left_boundary=lane_dict["left_lane_boundary"], right_boundary=lane_dict["right_lane_boundary"], centerline=lane_centerline, - left_lane_id=lane_dict["left_neighbor_id"], - right_lane_id=lane_dict["right_neighbor_id"], + left_lane_id=left_lane_id, + right_lane_id=right_lane_id, predecessor_ids=lane_dict["predecessors"], successor_ids=lane_dict["successors"], speed_limit_mps=None, diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py index 172954a9..aebebd19 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py @@ -6,6 +6,7 @@ from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.av2.av2_map_conversion import convert_av2_map from py123d.conversion.datasets.av2.utils.av2_constants import ( AV2_CAMERA_TYPE_MAPPING, AV2_SENSOR_SPLITS, @@ -18,21 +19,20 @@ find_closest_target_fpath, get_slice_with_timestamp_ns, ) -from py123d.conversion.datasets.av2.utils.av2_map_conversion import convert_av2_map from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import AVSensorLidarIndex +from py123d.conversion.registry.lidar_index_registry import AVSensorLiDARIndex from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -52,6 +52,7 @@ def __init__( dataset_converter_config: DatasetConverterConfig, ) -> None: super().__init__(dataset_converter_config) + assert av2_data_root is not None, "The variable `av2_data_root` must be provided." for split in splits: assert ( split in AV2_SENSOR_SPLITS @@ -118,7 +119,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=map_metadata.location, timestep_seconds=0.1, vehicle_parameters=get_av2_ford_fusion_hybrid_parameters(), - camera_metadata=_get_av2_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_av2_pinhole_camera_metadata(source_log_path, self.dataset_converter_config), lidar_metadata=_get_av2_lidar_metadata(source_log_path, self.dataset_converter_config), map_metadata=map_metadata, ) @@ -151,7 +152,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_ns(int(lidar_timestamp_ns)), ego_state=ego_state, box_detections=_extract_av2_sensor_box_detections(annotations_df, lidar_timestamp_ns, ego_state), - cameras=_extract_av2_sensor_camera( + pinhole_cameras=_extract_av2_sensor_pinhole_cameras( lidar_timestamp_ns, egovehicle_se3_sensor_df, synchronization_df, @@ -185,27 +186,25 @@ def _get_av2_sensor_map_metadata(split: str, source_log_path: Path) -> MapMetada ) -def _get_av2_camera_metadata( +def _get_av2_pinhole_camera_metadata( source_log_path: Path, dataset_converter_config: DatasetConverterConfig ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: - camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - - if dataset_converter_config.include_cameras: + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: intrinsics_file = source_log_path / "calibration" / "intrinsics.feather" intrinsics_df = pd.read_feather(intrinsics_file) for _, row in intrinsics_df.iterrows(): row = row.to_dict() camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] - camera_metadata[camera_type] = PinholeCameraMetadata( + pinhole_camera_metadata[camera_type] = PinholeCameraMetadata( camera_type=camera_type, width=row["width_px"], height=row["height_px"], intrinsics=PinholeIntrinsics(fx=row["fx_px"], fy=row["fy_px"], cx=row["cx_px"], cy=row["cy_px"]), distortion=PinholeDistortion(k1=row["k1"], k2=row["k2"], p1=0.0, p2=0.0, k3=row["k3"]), ) - - return camera_metadata + return pinhole_camera_metadata def _get_av2_lidar_metadata( @@ -226,7 +225,7 @@ def _get_av2_lidar_metadata( # top lidar: metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, - lidar_index=AVSensorLidarIndex, + lidar_index=AVSensorLiDARIndex, extrinsic=_row_dict_to_state_se3( calibration_df[calibration_df["sensor_name"] == "up_lidar"].iloc[0].to_dict() ), @@ -234,7 +233,7 @@ def _get_av2_lidar_metadata( # down lidar: metadata[LiDARType.LIDAR_DOWN] = LiDARMetadata( lidar_type=LiDARType.LIDAR_DOWN, - lidar_index=AVSensorLidarIndex, + lidar_index=AVSensorLiDARIndex, extrinsic=_row_dict_to_state_se3( calibration_df[calibration_df["sensor_name"] == "down_lidar"].iloc[0].to_dict() ), @@ -321,7 +320,7 @@ def _extract_av2_sensor_ego_state(city_se3_egovehicle_df: pd.DataFrame, lidar_ti ) -def _extract_av2_sensor_camera( +def _extract_av2_sensor_pinhole_cameras( lidar_timestamp_ns: int, egovehicle_se3_sensor_df: pd.DataFrame, synchronization_df: pd.DataFrame, @@ -333,7 +332,7 @@ def _extract_av2_sensor_camera( split = source_log_path.parent.name log_id = source_log_path.name - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: av2_sensor_data_root = source_log_path.parent.parent for _, row in egovehicle_se3_sensor_df.iterrows(): @@ -341,15 +340,15 @@ def _extract_av2_sensor_camera( if row["sensor_name"] not in AV2_CAMERA_TYPE_MAPPING: continue - camera_name = row["sensor_name"] - camera_type = AV2_CAMERA_TYPE_MAPPING[camera_name] + pinhole_camera_name = row["sensor_name"] + pinhole_camera_type = AV2_CAMERA_TYPE_MAPPING[pinhole_camera_name] relative_image_path = find_closest_target_fpath( split=split, log_id=log_id, src_sensor_name="lidar", src_timestamp_ns=lidar_timestamp_ns, - target_sensor_name=camera_name, + target_sensor_name=pinhole_camera_name, synchronization_df=synchronization_df, ) if relative_image_path is not None: @@ -359,12 +358,12 @@ def _extract_av2_sensor_camera( # TODO: Adjust for finer IMU timestamps to correct the camera extrinsic. camera_extrinsic = _row_dict_to_state_se3(row) camera_data = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(relative_image_path) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(absolute_image_path, "rb") as f: camera_data = f.read() - camera_dict[camera_type] = camera_data, camera_extrinsic + camera_dict[pinhole_camera_type] = camera_data, camera_extrinsic return camera_dict diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_io.py b/src/py123d/conversion/datasets/av2/av2_sensor_io.py index a17e4892..81a3de3a 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_io.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_io.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType def load_av2_sensor_lidar_pcs_from_file(feather_path: Union[Path, str]) -> Dict[LiDARType, np.ndarray]: diff --git a/src/py123d/conversion/datasets/av2/utils/av2_constants.py b/src/py123d/conversion/datasets/av2/utils/av2_constants.py index 7f81f48c..5ac7af9d 100644 --- a/src/py123d/conversion/datasets/av2/utils/av2_constants.py +++ b/src/py123d/conversion/datasets/av2/utils/av2_constants.py @@ -3,7 +3,7 @@ from py123d.common.utils.enums import SerialIntEnum from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.maps.map_datatypes import RoadLineType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType AV2_SENSOR_SPLITS: Set[str] = {"av2-sensor_train", "av2-sensor_val", "av2-sensor_test"} @@ -80,15 +80,15 @@ class AV2SensorBoxDetectionType(SerialIntEnum): AV2_CAMERA_TYPE_MAPPING: Dict[str, PinholeCameraType] = { - "ring_front_center": PinholeCameraType.CAM_F0, - "ring_front_left": PinholeCameraType.CAM_L0, - "ring_front_right": PinholeCameraType.CAM_R0, - "ring_side_left": PinholeCameraType.CAM_L1, - "ring_side_right": PinholeCameraType.CAM_R1, - "ring_rear_left": PinholeCameraType.CAM_L2, - "ring_rear_right": PinholeCameraType.CAM_R2, - "stereo_front_left": PinholeCameraType.CAM_STEREO_L, - "stereo_front_right": PinholeCameraType.CAM_STEREO_R, + "ring_front_center": PinholeCameraType.PCAM_F0, + "ring_front_left": PinholeCameraType.PCAM_L0, + "ring_front_right": PinholeCameraType.PCAM_R0, + "ring_side_left": PinholeCameraType.PCAM_L1, + "ring_side_right": PinholeCameraType.PCAM_R1, + "ring_rear_left": PinholeCameraType.PCAM_L2, + "ring_rear_right": PinholeCameraType.PCAM_R2, + "stereo_front_left": PinholeCameraType.PCAM_STEREO_L, + "stereo_front_right": PinholeCameraType.PCAM_STEREO_R, } # AV2_LIDAR_TYPES: Dict[str, str] = { diff --git a/src/py123d/conversion/log_writer/utils/__init__.py b/src/py123d/conversion/datasets/kitti360/__init__.py similarity index 100% rename from src/py123d/conversion/log_writer/utils/__init__.py rename to src/py123d/conversion/datasets/kitti360/__init__.py diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py new file mode 100644 index 00000000..8bb9b497 --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py @@ -0,0 +1,795 @@ +import datetime +import logging +import pickle +import re +import xml.etree.ElementTree as ET +from collections import defaultdict +from pathlib import Path +from typing import Any, Dict, Final, List, Optional, Tuple, Union + +import numpy as np +import yaml + +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.kitti360.kitti360_map_conversion import convert_kitti360_map_with_writer +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_kitti360_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KITTI360_DETECTION_NAME_DICT, + kittiId2label, +) +from py123d.conversion.datasets.kitti360.utils.preprocess_detection import process_detection +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex +from py123d.datatypes.detections.box_detections import ( + BoxDetectionMetadata, + BoxDetectionSE3, + BoxDetectionWrapper, +) +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import ( + FisheyeMEICameraMetadata, + FisheyeMEICameraType, + FisheyeMEIDistortion, + FisheyeMEIProjection, +) +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_kitti360_vw_passat_parameters, + rear_axle_se3_to_center_se3, +) +from py123d.geometry import BoundingBoxSE3, Quaternion, StateSE3, Vector3D +from py123d.geometry.transform.transform_se3 import convert_se3_array_between_origins, translate_se3_along_body_frame + +KITTI360_DT: Final[float] = 0.1 + +KITTI360_PINHOLE_CAMERA_TYPES = { + PinholeCameraType.PCAM_STEREO_L: "image_00", + PinholeCameraType.PCAM_STEREO_R: "image_01", +} + +KITTI360_FISHEYE_MEI_CAMERA_TYPES = { + FisheyeMEICameraType.FCAM_L: "image_02", + FisheyeMEICameraType.FCAM_R: "image_03", +} + +KITTI360_SPLITS: List[str] = ["kitti360_train", "kitti360_val", "kitti360_test"] +KITTI360_ALL_SEQUENCES: Final[List[str]] = [ + "2013_05_28_drive_0000_sync", + "2013_05_28_drive_0002_sync", + "2013_05_28_drive_0003_sync", + "2013_05_28_drive_0004_sync", + "2013_05_28_drive_0005_sync", + "2013_05_28_drive_0006_sync", + "2013_05_28_drive_0007_sync", + "2013_05_28_drive_0008_sync", + "2013_05_28_drive_0009_sync", + "2013_05_28_drive_0010_sync", + "2013_05_28_drive_0018_sync", +] + +DIR_ROOT = "root" +DIR_2D_RAW = "data_2d_raw" +DIR_2D_SMT = "data_2d_semantics" +DIR_3D_RAW = "data_3d_raw" +DIR_3D_SMT = "data_3d_semantics" +DIR_3D_BBOX = "data_3d_bboxes" +DIR_POSES = "data_poses" +DIR_CALIB = "calibration" + + +def _get_kitti360_paths_from_root(kitti_data_root: Path) -> Dict[str, Path]: + return { + DIR_ROOT: kitti_data_root, + DIR_2D_RAW: kitti_data_root / DIR_2D_RAW, + DIR_2D_SMT: kitti_data_root / DIR_2D_SMT, + DIR_3D_RAW: kitti_data_root / DIR_3D_RAW, + DIR_3D_SMT: kitti_data_root / DIR_3D_SMT, + DIR_3D_BBOX: kitti_data_root / DIR_3D_BBOX, + DIR_POSES: kitti_data_root / DIR_POSES, + DIR_CALIB: kitti_data_root / DIR_CALIB, + } + + +def _get_kitti360_required_modality_roots(kitti360_folders: Dict[str, Path]) -> Dict[str, Path]: + return { + DIR_2D_RAW: kitti360_folders[DIR_2D_RAW], + DIR_3D_RAW: kitti360_folders[DIR_3D_RAW], + DIR_POSES: kitti360_folders[DIR_POSES], + DIR_3D_BBOX: kitti360_folders[DIR_3D_BBOX] / "train", + } + + +class Kitti360Converter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + kitti360_data_root: Union[Path, str], + detection_cache_root: Union[Path, str], + detection_radius: float, + dataset_converter_config: DatasetConverterConfig, + train_sequences: List[str], + val_sequences: List[str], + test_sequences: List[str], + ) -> None: + assert kitti360_data_root is not None, "The variable `kitti360_data_root` must be provided." + super().__init__(dataset_converter_config) + for split in splits: + assert split in KITTI360_SPLITS, f"Split {split} is not available. Available splits: {KITTI360_SPLITS}" + + self._splits: List[str] = splits + self._kitti360_data_root: Path = Path(kitti360_data_root) + self._kitti360_folders: Dict[str, Path] = _get_kitti360_paths_from_root(self._kitti360_data_root) + + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + self._detection_cache_root: Path = Path(detection_cache_root) + self._detection_radius: float = detection_radius + + self._train_sequences: List[str] = train_sequences + self._val_sequences: List[str] = val_sequences + self._test_sequences: List[str] = test_sequences + + self._log_names_and_split: List[Tuple[str, str]] = self._collect_valid_logs() + self._total_maps = len(self._log_names_and_split) # Each log has its own map + self._total_logs = len(self._log_names_and_split) + + def _collect_valid_logs(self) -> List[Tuple[str, str]]: + """Helper function to collect valid KITTI sequences ("logs") from the dataset root + + :raises FileNotFoundError: If required modality roots are missing + :return: A list of tuples containing the log name and split name + """ + + def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: + if modality_name == DIR_3D_BBOX: + # expected: data_3d_bboxes/train/.xml + xml_path = root / f"{seq_name}.xml" + return xml_path.exists() + else: + return (root / seq_name).exists() + + required_modality_roots = _get_kitti360_required_modality_roots(self._kitti360_folders) + missing_roots = [str(p) for p in required_modality_roots.values() if not p.exists()] + if missing_roots: + raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") + + # Find all sequences in the 2D raw data directory, and add to split + split_sequence_candidates: Dict[str, List[str]] = defaultdict(list) + for sequence_path in required_modality_roots[DIR_2D_RAW].iterdir(): + if sequence_path.is_dir() and sequence_path.name.endswith("_sync"): + seq_name = sequence_path.name + if seq_name in self._train_sequences: + split_sequence_candidates["kitti360_train"].append(seq_name) + elif seq_name in self._val_sequences: + split_sequence_candidates["kitti360_val"].append(seq_name) + elif seq_name in self._test_sequences: + split_sequence_candidates["kitti360_test"].append(seq_name) + + # Iterate all candidates, check that modalities available, and add to flat list + log_paths_and_split: List[Tuple[Path, str]] = [] + for split, sequence_names in split_sequence_candidates.items(): + if split not in self._splits: + continue + for sequence_name in sequence_names: + missing_modalities = [ + modality_name + for modality_name, root in required_modality_roots.items() + if not _has_modality(sequence_name, modality_name, root) + ] + if len(missing_modalities) == 0: + log_paths_and_split.append((sequence_name, split)) + else: + logging.info( + f"Sequence '{sequence_name}' skipped: missing modalities {missing_modalities}. " + f"Root: {self._kitti360_data_root}" + ) + + logging.info(f"Valid sequences found: {len(log_paths_and_split)}") + return log_paths_and_split + + def get_number_of_maps(self) -> int: + """Returns the number of available raw data maps for conversion.""" + return self._total_maps + + def get_number_of_logs(self) -> int: + """Returns the number of available raw data logs for conversion.""" + return self._total_logs + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """ + Convert a single map in raw data format to the uniform 123D format. + :param map_index: The index of the map to convert. + :param map_writer: The map writer to use for writing the converted map. + """ + log_name, split = self._log_names_and_split[map_index] + map_metadata = _get_kitti360_map_metadata(split, log_name) + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + if map_needs_writing: + convert_kitti360_map_with_writer(log_name, map_writer) + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """ + Convert a single log in raw data format to the uniform 123D format. + :param log_index: The index of the log to convert. + :param log_writer: The log writer to use for writing the converted log. + """ + log_name, split = self._log_names_and_split[log_index] + + # Create log metadata + log_metadata = LogMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + timestep_seconds=KITTI360_DT, + vehicle_parameters=get_kitti360_vw_passat_parameters(), + pinhole_camera_metadata=_get_kitti360_pinhole_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + fisheye_mei_camera_metadata=_get_kitti360_fisheye_mei_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + lidar_metadata=_get_kitti360_lidar_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + map_metadata=_get_kitti360_map_metadata(split, log_name), + ) + + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + if log_needs_writing: + ts_list: List[TimePoint] = _read_timestamps(log_name, self._kitti360_folders) + ego_state_all, valid_timestamp = _extract_ego_state_all(log_name, self._kitti360_folders) + ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all], dtype=np.float64) + box_detection_wrapper_all = _extract_kitti360_box_detections_all( + log_name, + len(ts_list), + ego_states_xyz, + valid_timestamp, + self._kitti360_folders, + self._detection_cache_root, + self._detection_radius, + ) + camera_calibration = _load_kitti_360_calibration(self._kitti360_data_root) + logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") + + for idx in range(len(valid_timestamp)): + valid_idx = valid_timestamp[idx] + + pinhole_cameras = _extract_kitti360_pinhole_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + fisheye_cameras = _extract_kitti360_fisheye_mei_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + lidars = _extract_kitti360_lidar( + log_name, + valid_idx, + self._kitti360_folders, + self.dataset_converter_config, + ) + + log_writer.write( + timestamp=ts_list[valid_idx], + ego_state=ego_state_all[idx], + box_detections=box_detection_wrapper_all[valid_idx], + traffic_lights=None, + pinhole_cameras=pinhole_cameras, + fisheye_mei_cameras=fisheye_cameras, + lidars=lidars, + scenario_tags=None, + route_lane_group_ids=None, + ) + + log_writer.close() + + +def _get_kitti360_pinhole_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + pinhole_cam_metadatas: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: + persp = kitti360_folders[DIR_CALIB] / "perspective.txt" + assert persp.exists() + persp_result = {"image_00": {}, "image_01": {}} + + with open(persp, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + key, value = ln.split(" ", 1) + cam_id = key.split("_")[-1][:2] + if key.startswith("P_rect_"): + persp_result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) + elif key.startswith("S_rect_"): + persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] + elif key.startswith("D_"): + persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + + for pcam_type, pcam_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + pinhole_cam_metadatas[pcam_type] = PinholeCameraMetadata( + camera_type=pcam_type, + width=persp_result[pcam_name]["wh"][0], + height=persp_result[pcam_name]["wh"][1], + intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[pcam_name]["intrinsic"])), + distortion=PinholeDistortion.from_array(np.array(persp_result[pcam_name]["distortion"])), + ) + + return pinhole_cam_metadatas + + +def _get_kitti360_fisheye_mei_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata]: + fisheye_cam_metadatas: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = {} + if dataset_converter_config.include_fisheye_mei_cameras: + + fisheye_camera02_path = kitti360_folders[DIR_CALIB] / "image_02.yaml" + fisheye_camera03_path = kitti360_folders[DIR_CALIB] / "image_03.yaml" + + assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() + fisheye02 = _readYAMLFile(fisheye_camera02_path) + fisheye03 = _readYAMLFile(fisheye_camera03_path) + fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} + + for fcam_type, fcam_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + + distortion_params = fisheye_result[fcam_name]["distortion_parameters"] + distortion = FisheyeMEIDistortion( + k1=distortion_params["k1"], + k2=distortion_params["k2"], + p1=distortion_params["p1"], + p2=distortion_params["p2"], + ) + + projection_params = fisheye_result[fcam_name]["projection_parameters"] + projection = FisheyeMEIProjection( + gamma1=projection_params["gamma1"], + gamma2=projection_params["gamma2"], + u0=projection_params["u0"], + v0=projection_params["v0"], + ) + + fisheye_cam_metadatas[fcam_type] = FisheyeMEICameraMetadata( + camera_type=fcam_type, + width=fisheye_result[fcam_name]["image_width"], + height=fisheye_result[fcam_name]["image_height"], + mirror_parameter=fisheye_result[fcam_name]["mirror_parameters"], + distortion=distortion, + projection=projection, + ) + + return fisheye_cam_metadatas + + +def _get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: + return MapMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + map_has_z=True, + map_is_local=True, + ) + + +def _read_projection_matrix(p_line: str) -> np.ndarray: + parts = p_line.split(" ", 1) + if len(parts) != 2: + raise ValueError(f"Bad projection line: {p_line}") + vals = [float(x) for x in parts[1].strip().split()] + P = np.array(vals, dtype=np.float64).reshape(3, 4) + K = P[:, :3] + return K + + +def _readYAMLFile(fileName: Path) -> Dict[str, Any]: + """make OpenCV YAML file compatible with python""" + ret = {} + skip_lines = 1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" + with open(fileName) as fin: + for i in range(skip_lines): + fin.readline() + yamlFileOut = fin.read() + myRe = re.compile(r":([^ ])") # Add space after ":", if it doesn't exist. Python yaml requirement + yamlFileOut = myRe.sub(r": \1", yamlFileOut) + ret = yaml.safe_load(yamlFileOut) + return ret + + +def _get_kitti360_lidar_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: + metadata: Dict[LiDARType, LiDARMetadata] = {} + if dataset_converter_config.include_lidars: + extrinsic = get_kitti360_lidar_extrinsic(kitti360_folders[DIR_CALIB]) + extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) + extrinsic_state_se3 = _extrinsic_from_imu_to_rear_axle(extrinsic_state_se3) + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LiDARIndex, + extrinsic=extrinsic_state_se3, + ) + return metadata + + +def _read_timestamps(log_name: str, kitti360_folders: Dict[str, Path]) -> Optional[List[TimePoint]]: + """ + Read KITTI-360 timestamps for the given sequence and return Unix epoch timestamps. + """ + ts_files = [ + kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_00" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_01" / "timestamps.txt", + ] + + if log_name == "2013_05_28_drive_0002_sync": + ts_files = ts_files[1:] + + for ts_file in ts_files: + if ts_file.exists(): + tps: List[TimePoint] = [] + with open(ts_file, "r") as f: + for line in f: + s = line.strip() + if not s: + continue + dt_str, ns_str = s.split(".") + dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") + dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) + unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + total_seconds = (dt_obj - unix_epoch).total_seconds() + ns_value = int(ns_str) + us_from_ns = ns_value // 1000 + total_us = int(total_seconds * 1_000_000) + us_from_ns + tps.append(TimePoint.from_us(total_us)) + return tps + return None + + +def _extract_ego_state_all(log_name: str, kitti360_folders: Dict[str, Path]) -> Tuple[List[EgoStateSE3], List[int]]: + + ego_state_all: List[List[float]] = [] + pose_file = kitti360_folders[DIR_POSES] / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) + oxts_path = kitti360_folders[DIR_POSES] / log_name / "oxts" / "data" + + for idx in range(len(valid_timestamp)): + oxts_path_file = oxts_path / f"{int(valid_timestamp[idx]):010d}.txt" + oxts_data = np.loadtxt(oxts_path_file) + + vehicle_parameters = get_kitti360_vw_passat_parameters() + + pos = idx + if log_name == "2013_05_28_drive_0004_sync" and pos == 0: + pos = 1 + + # NOTE you can use oxts_data[3:6] as roll, pitch, yaw for simplicity + # roll, pitch, yaw = oxts_data[3:6] + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] + + ego_quaternion = Quaternion.from_rotation_matrix(R_mat_cali) + imu_pose = StateSE3( + x=poses[pos, 4], + y=poses[pos, 8], + z=poses[pos, 12], + qw=ego_quaternion.qw, + qx=ego_quaternion.qx, + qy=ego_quaternion.qy, + qz=ego_quaternion.qz, + ) + + rear_axle_pose = translate_se3_along_body_frame( + imu_pose, + Vector3D(0.05, -0.32, 0.0), + ) + + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + dynamic_state = DynamicStateSE3( + velocity=Vector3D( + x=oxts_data[8], + y=oxts_data[9], + z=oxts_data[10], + ), + acceleration=Vector3D( + x=oxts_data[14], + y=oxts_data[15], + z=oxts_data[16], + ), + angular_velocity=Vector3D( + x=oxts_data[20], + y=oxts_data[21], + z=oxts_data[22], + ), + ) + ego_state_all.append( + EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, + ) + ) + return ego_state_all, valid_timestamp + + +def _extract_kitti360_box_detections_all( + log_name: str, + ts_len: int, + ego_states_xyz: np.ndarray, + valid_timestamp: List[int], + kitti360_folders: Dict[str, Path], + detection_cache_root: Path, + detection_radius: float, +) -> List[BoxDetectionWrapper]: + + detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_velocity: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_tokens: List[List[str]] = [[] for _ in range(ts_len)] + detections_types: List[List[int]] = [[] for _ in range(ts_len)] + + if log_name == "2013_05_28_drive_0004_sync": + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train_full" / f"{log_name}.xml" + else: + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train" / f"{log_name}.xml" + if not bbox_3d_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") + + tree = ET.parse(bbox_3d_path) + root = tree.getroot() + + detection_preprocess_path = detection_cache_root / f"{log_name}_detection_preprocessed.pkl" + if not detection_preprocess_path.exists(): + process_detection( + kitti360_data_root=kitti360_folders[DIR_ROOT], + log_name=log_name, + radius_m=detection_radius, + output_dir=detection_cache_root, + ) + with open(detection_preprocess_path, "rb") as f: + detection_preprocess_result = pickle.load(f) + static_records_dict = { + record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"] + } + logging.info(f"Loaded detection preprocess data from {detection_preprocess_path}") + + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + + for child in root: + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) + name = kittiId2label[semanticIdKITTI].name + else: + lable = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, "unknown") + if child.find("transform") is None or name not in KITTI360_DETECTION_NAME_DICT.keys(): + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + + # static object + if obj.timestamp == -1: + if detection_preprocess_result is None: + obj.filter_by_radius(ego_states_xyz, valid_timestamp, radius=50.0) + else: + obj.load_detection_preprocess(static_records_dict) + for record in obj.valid_frames["records"]: + frame = record["timestamp"] + detections_states[frame].append(obj.get_state_array()) + detections_velocity[frame].append(np.array([0.0, 0.0, 0.0])) + detections_tokens[frame].append(str(obj.globalID)) + detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) + else: + global_ID = obj.globalID + dynamic_objs[global_ID].append(obj) + + # dynamic object + for global_id, obj_list in dynamic_objs.items(): + obj_list.sort(key=lambda obj: obj.timestamp) + num_frames = len(obj_list) + + positions = [obj.get_state_array()[:3] for obj in obj_list] + timestamps = [int(obj.timestamp) for obj in obj_list] + + velocities = [] + + for i in range(1, num_frames - 1): + dt_frames = timestamps[i + 1] - timestamps[i - 1] + if dt_frames > 0: + dt = dt_frames * KITTI360_DT + vel = (positions[i + 1] - positions[i - 1]) / dt + vel = KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] @ obj_list[i].Rm.T @ vel + else: + vel = np.zeros(3) + velocities.append(vel) + + if num_frames > 1: + # first and last frame + velocities.insert(0, velocities[0]) + velocities.append(velocities[-1]) + elif num_frames == 1: + velocities.append(np.zeros(3)) + + for obj, vel in zip(obj_list, velocities): + frame = obj.timestamp + detections_states[frame].append(obj.get_state_array()) + detections_velocity[frame].append(vel) + detections_tokens[frame].append(str(obj.globalID)) + detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) + + box_detection_wrapper_all: List[BoxDetectionWrapper] = [] + for frame in range(ts_len): + box_detections: List[BoxDetectionSE3] = [] + for state, velocity, token, detection_type in zip( + detections_states[frame], + detections_velocity[frame], + detections_tokens[frame], + detections_types[frame], + ): + if state is None: + break + detection_metadata = BoxDetectionMetadata( + box_detection_type=detection_type, + timepoint=None, + track_token=token, + confidence=None, + ) + bounding_box_se3 = BoundingBoxSE3.from_array(state) + velocity_vector = Vector3D.from_array(velocity) + box_detection = BoxDetectionSE3( + metadata=detection_metadata, + bounding_box_se3=bounding_box_se3, + velocity=velocity_vector, + ) + box_detections.append(box_detection) + box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) + return box_detection_wrapper_all + + +def _extract_kitti360_lidar( + log_name: str, + idx: int, + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> List[LiDARData]: + + lidars: List[LiDARData] = [] + if data_converter_config.include_lidars: + # NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 + if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: + return lidars + + lidar_full_path = kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" + if lidar_full_path.exists(): + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_TOP, + timestamp=None, + iteration=idx, + dataset_root=kitti360_folders[DIR_ROOT], + relative_path=lidar_full_path.relative_to(kitti360_folders[DIR_ROOT]), + ) + ) + else: + raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") + + return lidars + + +def _extract_kitti360_pinhole_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: + + pinhole_camera_dict: Dict[PinholeCameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {} + if data_converter_config.include_pinhole_cameras: + + for camera_type, cam_dir_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] + + if img_path_png.exists(): + if data_converter_config.pinhole_camera_store_option == "path": + camera_data = str(img_path_png) + elif data_converter_config.pinhole_camera_store_option == "binary": + with open(img_path_png, "rb") as f: + camera_data = f.read() + else: + camera_data = None + + pinhole_camera_dict[camera_type] = camera_data, camera_extrinsic + return pinhole_camera_dict + + +def _extract_kitti360_fisheye_mei_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: + + fisheye_camera_dict: Dict[FisheyeMEICameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {} + for camera_type, cam_dir_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] + if img_path_png.exists(): + if data_converter_config.pinhole_camera_store_option == "path": + camera_data = str(img_path_png) + elif data_converter_config.pinhole_camera_store_option == "binary": + with open(img_path_png, "rb") as f: + camera_data = f.read() + else: + camera_data = None + fisheye_camera_dict[camera_type] = camera_data, camera_extrinsic + return fisheye_camera_dict + + +def _load_kitti_360_calibration(kitti_360_data_root: Path) -> Dict[str, StateSE3]: + calib_file = kitti_360_data_root / DIR_CALIB / "calib_cam_to_pose.txt" + if not calib_file.exists(): + raise FileNotFoundError(f"Calibration file not found: {calib_file}") + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + calib_dict: Dict[str, StateSE3] = {} + with open(calib_file, "r") as f: + for line in f: + parts = line.strip().split() + key = parts[0][:-1] + values = list(map(float, parts[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_extrinsic = _extrinsic_from_imu_to_rear_axle(camera_extrinsic) + calib_dict[key] = camera_extrinsic + return calib_dict + + +def _extrinsic_from_imu_to_rear_axle(extrinsic: StateSE3) -> StateSE3: + imu_se3 = StateSE3(x=-0.05, y=0.32, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + rear_axle_se3 = StateSE3(x=0.0, y=0.0, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + return StateSE3.from_array(convert_se3_array_between_origins(imu_se3, rear_axle_se3, extrinsic.array)) diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py new file mode 100644 index 00000000..847250eb --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py @@ -0,0 +1,116 @@ +import os +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import List + +import numpy as np +import shapely.geometry as geom + +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import KITTI360_MAP_Bbox3D +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheGenericDrivable, + CacheRoadEdge, + CacheWalkway, +) +from py123d.datatypes.maps.map_datatypes import RoadEdgeType +from py123d.geometry.polyline import Polyline3D + +MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) + +DIR_3D_BBOX = "data_3d_bboxes" + +PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX + +KITTI360_MAP_BBOX = [ + "road", + "sidewalk", + # "railtrack", + # "ground", + "driveway", +] + + +def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWriter) -> None: + """ + Convert KITTI-360 map data using the provided map writer. + This function extracts map data from KITTI-360 XML files and writes them using the map writer interface. + + :param log_name: The name of the log to convert + :param map_writer: The map writer to use for writing the converted map + """ + xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + if not xml_path.exists(): + xml_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + + tree = ET.parse(xml_path) + root = tree.getroot() + objs: List[KITTI360_MAP_Bbox3D] = [] + + for child in root: + label = child.find("label").text + if child.find("transform") is None or label not in KITTI360_MAP_BBOX: + continue + obj = KITTI360_MAP_Bbox3D() + obj.parseBbox(child) + objs.append(obj) + + # 1. Write roads, sidewalks, driveways, and collect road geometries + road_outlines_3d: List[Polyline3D] = [] + for obj in objs: + if obj.label == "road": + map_writer.write_generic_drivable( + CacheGenericDrivable( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + road_outline_array = np.concatenate([obj.vertices.array[:, :3], obj.vertices.array[0:, :3]]) + road_outlines_3d.append(Polyline3D.from_array(road_outline_array)) + elif obj.label == "sidewalk": + map_writer.write_walkway( + CacheWalkway( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + elif obj.label == "driveway": + map_writer.write_carpark( + CacheCarpark( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + + # 2. Use road geometries to create road edges + + # NOTE @DanielDauner: We merge all drivable areas in 2D and lift the outlines to 3D. + # Currently the method assumes that the drivable areas do not overlap and all road surfaces are included. + road_polygons_2d = [geom.Polygon(road_outline.array[:, :2]) for road_outline in road_outlines_3d] + road_edges_2d = get_road_edge_linear_rings(road_polygons_2d) + road_edges_3d = lift_road_edges_to_3d(road_edges_2d, road_outlines_3d) + road_edges_linestrings_3d = [polyline.linestring for polyline in road_edges_3d] + road_edges_linestrings_3d = split_line_geometry_by_max_length(road_edges_linestrings_3d, MAX_ROAD_EDGE_LENGTH) + + for idx in range(len(road_edges_linestrings_3d)): + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edges_linestrings_3d[idx]), + ) + ) diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py new file mode 100644 index 00000000..e58b165d --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py @@ -0,0 +1,29 @@ +import logging +from pathlib import Path +from typing import Dict + +import numpy as np + +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.geometry.se import StateSE3 +from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins + + +def load_kitti360_lidar_pcs_from_file(filepath: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: + if not filepath.exists(): + logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") + return {LiDARType.LIDAR_TOP: np.zeros((1, len(Kitti360LiDARIndex)), dtype=np.float32)} + + lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic + lidar_pc = np.fromfile(filepath, dtype=np.float32) + lidar_pc = np.reshape(lidar_pc, [-1, len(Kitti360LiDARIndex)]) + + lidar_pc[..., Kitti360LiDARIndex.XYZ] = convert_points_3d_array_between_origins( + from_origin=lidar_extrinsic, + to_origin=StateSE3(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0), + points_3d_array=lidar_pc[..., Kitti360LiDARIndex.XYZ], + ) + + return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/datatypes/sensors/camera/__init__.py b/src/py123d/conversion/datasets/kitti360/utils/__init__.py similarity index 100% rename from src/py123d/datatypes/sensors/camera/__init__.py rename to src/py123d/conversion/datasets/kitti360/utils/__init__.py diff --git a/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py new file mode 100644 index 00000000..ef3511c4 --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py @@ -0,0 +1,263 @@ +import copy +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import numpy as np +from scipy.linalg import polar + +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import BBOX_LABLES_TO_DETECTION_NAME_DICT, kittiId2label +from py123d.geometry import BoundingBoxSE3, StateSE3 +from py123d.geometry.polyline import Polyline3D +from py123d.geometry.rotation import EulerAngles + +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_CALIB = "calibration" +# PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB + +KITTI3602NUPLAN_IMU_CALIBRATION = np.array( + [ + [1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1], + ], + dtype=np.float64, +) +MAX_N = 1000 + + +def local2global(semanticId: int, instanceId: int) -> int: + globalId = semanticId * MAX_N + instanceId + if isinstance(globalId, np.ndarray): + return globalId.astype(np.int32) + else: + return int(globalId) + + +def global2local(globalId: int) -> Tuple[int, int]: + semanticId = globalId // MAX_N + instanceId = globalId % MAX_N + if isinstance(globalId, np.ndarray): + return semanticId.astype(np.int32), instanceId.astype(np.int32) + else: + return int(semanticId), int(instanceId) + + +class KITTI360Bbox3D: + + # global id(only used for sequence 0004) + dynamic_global_id = 2000000 + static_global_id = 1000000 + + # Constructor + def __init__(self): + + # the ID of the corresponding object + self.semanticId = -1 + self.instanceId = -1 + self.annotationId = -1 + self.globalID = -1 + + # the window that contains the bbox + self.start_frame = -1 + self.end_frame = -1 + + # timestamp of the bbox (-1 if statis) + self.timestamp = -1 + + # name + self.name = "" + + # label + self.label = "" + + def parseBbox(self, child): + self.timestamp = int(child.find("timestamp").text) + + self.annotationId = int(child.find("index").text) + 1 + + self.label = child.find("label").text + + if child.find("semanticId") is None: + self.name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(self.label, "unknown") + self.is_dynamic = int(child.find("dynamic").text) + if self.is_dynamic != 0: + dynamicSeq = int(child.find("dynamicSeq").text) + self.globalID = KITTI360Bbox3D.dynamic_global_id + dynamicSeq + else: + self.globalID = KITTI360Bbox3D.static_global_id + KITTI360Bbox3D.static_global_id += 1 + else: + self.start_frame = int(child.find("start_frame").text) + self.end_frame = int(child.find("end_frame").text) + + semanticIdKITTI = int(child.find("semanticId").text) + self.semanticId = kittiId2label[semanticIdKITTI].id + self.instanceId = int(child.find("instanceId").text) + self.name = kittiId2label[semanticIdKITTI].name + + self.globalID = local2global(self.semanticId, self.instanceId) + + self.valid_frames = {"global_id": self.globalID, "records": []} + + self.parseVertices(child) + self.parse_scale_rotation() + + def parseVertices(self, child): + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + vertices = parseOpencvMatrix(child.find("vertices")) + self.vertices_template = copy.deepcopy(vertices) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = vertices + + self.R = R + self.T = T + + def parse_scale_rotation(self): + Rm, Sm = polar(self.R) + if np.linalg.det(Rm) < 0: + Rm[0] = -Rm[0] + scale = np.diag(Sm) + # yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + euler_angles = EulerAngles.from_rotation_matrix(Rm) + yaw, pitch, roll = euler_angles.yaw, euler_angles.pitch, euler_angles.roll + obj_quaternion = euler_angles.quaternion + # obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion + + self.Rm = np.array(Rm) + self.Sm = np.array(Sm) + self.scale = scale + self.yaw = yaw + self.pitch = pitch + self.roll = roll + self.qw = obj_quaternion.qw + self.qx = obj_quaternion.qx + self.qy = obj_quaternion.qy + self.qz = obj_quaternion.qz + + def get_state_array(self) -> np.ndarray: + center = StateSE3( + x=self.T[0], + y=self.T[1], + z=self.T[2], + qw=self.qw, + qx=self.qx, + qy=self.qy, + qz=self.qz, + ) + scale = self.scale + bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) + + return bounding_box_se3.array + + def filter_by_radius(self, ego_state_xyz: np.ndarray, valid_timestamp: List[int], radius: float = 50.0) -> None: + """first stage of detection, used to filter out detections by radius""" + d = np.linalg.norm(ego_state_xyz - self.T[None, :], axis=1) + idxs = np.where(d <= radius)[0] + for idx in idxs: + self.valid_frames["records"].append( + { + "timestamp": valid_timestamp[idx], + "points_in_box": None, + } + ) + + def box_visible_in_point_cloud(self, points: np.ndarray) -> Tuple[bool, int]: + """points: (N,3) , box: (8,3)""" + box = self.vertices.copy() + # avoid calculating ground point cloud + z_offset = 0.1 + box[:, 2] += z_offset + O, A, B, C = box[0], box[1], box[2], box[5] + OA = A - O + OB = B - O + OC = C - O + POA, POB, POC = (points @ OA[..., None])[:, 0], (points @ OB[..., None])[:, 0], (points @ OC[..., None])[:, 0] + mask = ( + (np.dot(O, OA) < POA) + & (POA < np.dot(A, OA)) + & (np.dot(O, OB) < POB) + & (POB < np.dot(B, OB)) + & (np.dot(O, OC) < POC) + & (POC < np.dot(C, OC)) + ) + + points_in_box = np.sum(mask) + visible = True if points_in_box > 40 else False + return visible, points_in_box + + def load_detection_preprocess(self, records_dict: Dict[int, Any]): + if self.globalID in records_dict: + self.valid_frames["records"] = records_dict[self.globalID]["records"] + + +class KITTI360_MAP_Bbox3D: + def __init__(self): + self.id = -1 + self.label = " " + + self.vertices: Polyline3D = None + self.R = None + self.T = None + + def parseVertices_plane(self, child): + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + if child.find("transform_plane").find("rows").text == "0": + vertices = parseOpencvMatrix(child.find("vertices")) + else: + vertices = parseOpencvMatrix(child.find("vertices_plane")) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = Polyline3D.from_array(vertices) + + self.R = R + self.T = T + + def parseBbox(self, child): + self.id = int(child.find("index").text) + self.label = child.find("label").text + self.parseVertices_plane(child) + + +def parseOpencvMatrix(node): + rows = int(node.find("rows").text) + cols = int(node.find("cols").text) + data = node.find("data").text.split(" ") + + mat = [] + for d in data: + d = d.replace("\n", "") + if len(d) < 1: + continue + mat.append(float(d)) + mat = np.reshape(mat, [rows, cols]) + return mat + + +def get_kitti360_lidar_extrinsic(kitti360_calibration_root: Path) -> np.ndarray: + cam2pose_txt = kitti360_calibration_root / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + cam2velo_txt = kitti360_calibration_root / "calib_cam_to_velo.txt" + if not cam2velo_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + + with open(cam2pose_txt, "r") as f: + image_00 = next(f) + values = list(map(float, image_00.strip().split()[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + + cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3, 4), lastrow)) + extrinsic = cam2pose @ np.linalg.inv(cam2velo) + + return extrinsic diff --git a/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py new file mode 100644 index 00000000..6feafc1d --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py @@ -0,0 +1,217 @@ +#!/usr/bin/python +# +# KITTI-360 labels +# + +from collections import namedtuple + +# -------------------------------------------------------------------------------- +# Definitions +# -------------------------------------------------------------------------------- + +# a label and all meta information +Label = namedtuple( + "Label", + [ + "name", # The identifier of this label, e.g. 'car', 'person', ... . + # We use them to uniquely name a class + "id", # An integer ID that is associated with this label. + # The IDs are used to represent the label in ground truth images + # An ID of -1 means that this label does not have an ID and thus + # is ignored when creating ground truth images (e.g. license plate). + # Do not modify these IDs, since exactly these IDs are expected by the + # evaluation server. + "kittiId", # An integer ID that is associated with this label for KITTI-360 + # NOT FOR RELEASING + "trainId", # Feel free to modify these IDs as suitable for your method. Then create + # ground truth images with train IDs, using the tools provided in the + # 'preparation' folder. However, make sure to validate or submit results + # to our evaluation server using the regular IDs above! + # For trainIds, multiple labels might have the same ID. Then, these labels + # are mapped to the same class in the ground truth images. For the inverse + # mapping, we use the label that is defined first in the list below. + # For example, mapping all void-type classes to the same ID in training, + # might make sense for some approaches. + # Max value is 255! + "category", # The name of the category that this label belongs to + "categoryId", # The ID of this category. Used to create ground truth images + # on category level. + "hasInstances", # Whether this label distinguishes between single instances or not + "ignoreInEval", # Whether pixels having this class as ground truth label are ignored + # during evaluations or not + "ignoreInInst", # Whether pixels having this class as ground truth label are ignored + # during evaluations of instance segmentation or not + "color", # The color of this label + ], +) + + +# -------------------------------------------------------------------------------- +# A list of all labels +# -------------------------------------------------------------------------------- + +# Please adapt the train IDs as appropriate for your approach. +# Note that you might want to ignore labels with ID 255 during training. +# Further note that the current train IDs are only a suggestion. You can use whatever you like. +# Make sure to provide your results using the original IDs and not the training IDs. +# Note that many IDs are ignored in evaluation and thus you never need to predict these! + +labels = [ + # name id kittiId, trainId category catId hasInstances ignoreInEval ignoreInInst color + Label("unlabeled", 0, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("ego vehicle", 1, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("rectification border", 2, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("out of roi", 3, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("static", 4, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("dynamic", 5, -1, 255, "void", 0, False, True, True, (111, 74, 0)), + Label("ground", 6, -1, 255, "void", 0, False, True, True, (81, 0, 81)), + Label("road", 7, 1, 0, "flat", 1, False, False, False, (128, 64, 128)), + Label("sidewalk", 8, 3, 1, "flat", 1, False, False, False, (244, 35, 232)), + Label("parking", 9, 2, 255, "flat", 1, False, True, True, (250, 170, 160)), + Label("rail track", 10, 10, 255, "flat", 1, False, True, True, (230, 150, 140)), + Label("building", 11, 11, 2, "construction", 2, True, False, False, (70, 70, 70)), + Label("wall", 12, 7, 3, "construction", 2, False, False, False, (102, 102, 156)), + Label("fence", 13, 8, 4, "construction", 2, False, False, False, (190, 153, 153)), + Label("guard rail", 14, 30, 255, "construction", 2, False, True, True, (180, 165, 180)), + Label("bridge", 15, 31, 255, "construction", 2, False, True, True, (150, 100, 100)), + Label("tunnel", 16, 32, 255, "construction", 2, False, True, True, (150, 120, 90)), + Label("pole", 17, 21, 5, "object", 3, True, False, True, (153, 153, 153)), + Label("polegroup", 18, -1, 255, "object", 3, False, True, True, (153, 153, 153)), + Label("traffic light", 19, 23, 6, "object", 3, True, False, True, (250, 170, 30)), + Label("traffic sign", 20, 24, 7, "object", 3, True, False, True, (220, 220, 0)), + Label("vegetation", 21, 5, 8, "nature", 4, False, False, False, (107, 142, 35)), + Label("terrain", 22, 4, 9, "nature", 4, False, False, False, (152, 251, 152)), + Label("sky", 23, 9, 10, "sky", 5, False, False, False, (70, 130, 180)), + Label("person", 24, 19, 11, "human", 6, True, False, False, (220, 20, 60)), + Label("rider", 25, 20, 12, "human", 6, True, False, False, (255, 0, 0)), + Label("car", 26, 13, 13, "vehicle", 7, True, False, False, (0, 0, 142)), + Label("truck", 27, 14, 14, "vehicle", 7, True, False, False, (0, 0, 70)), + Label("bus", 28, 34, 15, "vehicle", 7, True, False, False, (0, 60, 100)), + Label("caravan", 29, 16, 255, "vehicle", 7, True, True, True, (0, 0, 90)), + Label("trailer", 30, 15, 255, "vehicle", 7, True, True, True, (0, 0, 110)), + Label("train", 31, 33, 16, "vehicle", 7, True, False, False, (0, 80, 100)), + Label("motorcycle", 32, 17, 17, "vehicle", 7, True, False, False, (0, 0, 230)), + Label("bicycle", 33, 18, 18, "vehicle", 7, True, False, False, (119, 11, 32)), + Label("garage", 34, 12, 2, "construction", 2, True, True, True, (64, 128, 128)), + Label("gate", 35, 6, 4, "construction", 2, False, True, True, (190, 153, 153)), + Label("stop", 36, 29, 255, "construction", 2, True, True, True, (150, 120, 90)), + Label("smallpole", 37, 22, 5, "object", 3, True, True, True, (153, 153, 153)), + Label("lamp", 38, 25, 255, "object", 3, True, True, True, (0, 64, 64)), + Label("trash bin", 39, 26, 255, "object", 3, True, True, True, (0, 128, 192)), + Label("vending machine", 40, 27, 255, "object", 3, True, True, True, (128, 64, 0)), + Label("box", 41, 28, 255, "object", 3, True, True, True, (64, 64, 128)), + Label("unknown construction", 42, 35, 255, "void", 0, False, True, True, (102, 0, 0)), + Label("unknown vehicle", 43, 36, 255, "void", 0, False, True, True, (51, 0, 51)), + Label("unknown object", 44, 37, 255, "void", 0, False, True, True, (32, 32, 32)), + Label("license plate", -1, -1, -1, "vehicle", 7, False, True, True, (0, 0, 142)), +] + +# -------------------------------------------------------------------------------- +# Create dictionaries for a fast lookup +# -------------------------------------------------------------------------------- + +# Please refer to the main method below for example usages! + +# name to label object +name2label = {label.name: label for label in labels} +# id to label object +id2label = {label.id: label for label in labels} +# trainId to label object +trainId2label = {label.trainId: label for label in reversed(labels)} +# KITTI-360 ID to cityscapes ID +kittiId2label = {label.kittiId: label for label in labels} +# category to list of label objects +category2labels = {} +for label in labels: + category = label.category + if category in category2labels: + category2labels[category].append(label) + else: + category2labels[category] = [label] + +# -------------------------------------------------------------------------------- +# Assure single instance name +# -------------------------------------------------------------------------------- + + +# returns the label name that describes a single instance (if possible) +# e.g. input | output +# ---------------------- +# car | car +# cargroup | car +# foo | None +# foogroup | None +# skygroup | None +def assureSingleInstanceName(name): + # if the name is known, it is not a group + if name in name2label: + return name + # test if the name actually denotes a group + if not name.endswith("group"): + return None + # remove group + name = name[: -len("group")] + # test if the new name exists + if name not in name2label: + return None + # test if the new name denotes a label that actually has instances + if not name2label[name].hasInstances: + return None + # all good then + return name + + +from py123d.datatypes.detections.box_detection_types import BoxDetectionType + +BBOX_LABLES_TO_DETECTION_NAME_DICT = { + "car": "car", + "truck": "truck", + "bicycle": "bicycle", + "trafficLight": "traffic light", + "trailer": "trailer", + "bus": "bus", + "pedestrian": "person", + "motorcycle": "motorcycle", + "stop": "stop", + "trafficSign": "traffic sign", + "rider": "rider", + "caravan": "caravan", +} + +KITTI360_DETECTION_NAME_DICT = { + "traffic light": BoxDetectionType.SIGN, + "traffic sign": BoxDetectionType.SIGN, + "person": BoxDetectionType.PEDESTRIAN, + "rider": BoxDetectionType.BICYCLE, + "car": BoxDetectionType.VEHICLE, + "truck": BoxDetectionType.VEHICLE, + "bus": BoxDetectionType.VEHICLE, + "caravan": BoxDetectionType.VEHICLE, + "trailer": BoxDetectionType.VEHICLE, + "train": BoxDetectionType.VEHICLE, + "motorcycle": BoxDetectionType.BICYCLE, + "bicycle": BoxDetectionType.BICYCLE, + "stop": BoxDetectionType.SIGN, +} + +# KIITI360_DETECTION_NAME_DICT = { +# "pole": DetectionType.GENERIC_OBJECT, +# "traffic light": DetectionType.SIGN, +# "traffic sign": DetectionType.SIGN, +# "person": DetectionType.PEDESTRIAN, +# "rider": DetectionType.BICYCLE, +# "car": DetectionType.VEHICLE, +# "truck": DetectionType.VEHICLE, +# "bus": DetectionType.VEHICLE, +# "caravan": DetectionType.VEHICLE, +# "trailer": DetectionType.VEHICLE, +# "train": DetectionType.VEHICLE, +# "motorcycle": DetectionType.BICYCLE, +# "bicycle": DetectionType.BICYCLE, +# "stop": DetectionType.SIGN, +# "smallpole": DetectionType.GENERIC_OBJECT, +# "lamp": DetectionType.GENERIC_OBJECT, +# "trash bin": DetectionType.GENERIC_OBJECT, +# "vending machine": DetectionType.GENERIC_OBJECT, +# "box": DetectionType.GENERIC_OBJECT, +# } diff --git a/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py new file mode 100644 index 00000000..3f65b375 --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py @@ -0,0 +1,227 @@ +""" +This script precomputes static detection records for KITTI-360: + - Stage 1: radius filtering using ego positions (from poses.txt). + - Stage 2: LiDAR visibility check to fill per-frame point counts. +It writes a pickle containing, for each static object, all feasible frames and +their point counts to avoid recomputation in later pipelines. +We have precomputed and saved the pickle for all training logs, you can either +download them or run this script to generate +""" + +from __future__ import annotations + +import concurrent.futures +import logging +import os +import pickle +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np +import numpy.typing as npt + +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_kitti360_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KITTI360_DETECTION_NAME_DICT, + kittiId2label, +) + +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_3D_RAW = "data_3d_raw" +# DIR_3D_BBOX = "data_3d_bboxes" +# DIR_POSES = "data_poses" + +# PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +# PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX +# PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES + + +def _bbox_xml_path(kitti360_dataset_root: Path, log_name: str) -> Path: + if log_name == "2013_05_28_drive_0004_sync": + return kitti360_dataset_root / "data_3d_bboxes" / "train_full" / f"{log_name}.xml" + return kitti360_dataset_root / "data_3d_bboxes" / "train" / f"{log_name}.xml" + + +def _lidar_frame_path(kitti360_dataset_root: Path, log_name: str, frame_idx: int) -> Path: + return kitti360_dataset_root / "data_3d_raw" / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" + + +def _load_lidar_xyz(filepath: Path) -> np.ndarray: + """Load one LiDAR frame and return Nx3 xyz.""" + arr = np.fromfile(filepath, dtype=np.float32) + return arr.reshape(-1, 4)[:, :3] + + +def _collect_static_objects(kitti360_dataset_root: Path, log_name: str) -> List[KITTI360Bbox3D]: + """Parse XML and collect static objects with valid class names.""" + xml_path = _bbox_xml_path(kitti360_dataset_root, log_name) + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + tree = ET.parse(xml_path) + root = tree.getroot() + + static_objs: List[KITTI360Bbox3D] = [] + + for child in root: + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) + name = kittiId2label[semanticIdKITTI].name + else: + lable = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, "unknown") + timestamp = int(child.find("timestamp").text) # -1 for static objects + if child.find("transform") is None or name not in KITTI360_DETECTION_NAME_DICT or timestamp != -1: + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + static_objs.append(obj) + return static_objs + + +def _collect_ego_states(kitti360_data_root: Path, log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: + """Load ego states from poses.txt.""" + + pose_file = kitti360_data_root / "data_poses" / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) + + ego_states = [] + for time_idx in range(len(valid_timestamp)): + pos = time_idx + state_item = np.eye(4) + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] + ego_state_xyz = np.array( + [ + poses[pos, 4], + poses[pos, 8], + poses[pos, 12], + ] + ) + + state_item[:3, :3] = R_mat_cali + state_item[:3, 3] = ego_state_xyz + ego_states.append(state_item) + + # [N,4,4] + return np.array(ego_states), valid_timestamp + + +def process_detection( + kitti360_data_root: Path, + log_name: str, + radius_m: float = 60.0, + output_dir: Optional[Path] = None, +) -> None: + """ + Precompute detections filtering + for static objects: + 1) filter by ego-centered radius over all frames + 2) filter by LiDAR point cloud visibility + Save per-frame detections to a pickle to avoid recomputation. + """ + + lidar_dir = kitti360_data_root / "data_3d_raw" / log_name / "velodyne_points" / "data" + if not lidar_dir.exists(): + raise FileNotFoundError(f"LiDAR data folder not found: {lidar_dir}") + ts_len = len(list(lidar_dir.glob("*.bin"))) + logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") + + # 1) Parse objects from XML + static_objs: List[KITTI360Bbox3D] = _collect_static_objects(kitti360_data_root, log_name) + logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") + + # 2) Filter static objs by ego-centered radius + ego_states, valid_timestamp = _collect_ego_states(kitti360_data_root, log_name) + logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") + for obj in static_objs: + obj.filter_by_radius(ego_states[:, :3, 3], valid_timestamp, radius_m) + + # 3) Filter static objs by LiDAR point cloud visibility + lidar_extrinsic = get_kitti360_lidar_extrinsic(kitti360_data_root / "calibration") + + def process_one_frame(time_idx: int) -> None: + valid_time_idx = valid_timestamp[time_idx] + logging.info(f"[preprocess] {log_name}: t={valid_time_idx}") + lidar_path = _lidar_frame_path(kitti360_data_root, log_name, valid_time_idx) + if not lidar_path.exists(): + logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") + return + + lidar_xyz = _load_lidar_xyz(lidar_path) + + # lidar to pose + lidar_h = np.concatenate((lidar_xyz, np.ones((lidar_xyz.shape[0], 1), dtype=lidar_xyz.dtype)), axis=1) + lidar_in_imu = lidar_h @ lidar_extrinsic.T + lidar_in_imu = lidar_in_imu[:, :3] + + # pose to world + lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3, :3].T + ego_states[time_idx][:3, 3] + + for obj in static_objs: + if not any(record["timestamp"] == valid_time_idx for record in obj.valid_frames["records"]): + continue + visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) + if not visible: + obj.valid_frames["records"] = [ + record for record in obj.valid_frames["records"] if record["timestamp"] != valid_time_idx + ] + else: + for record in obj.valid_frames["records"]: + if record["timestamp"] == valid_time_idx: + record["points_in_box"] = points_in_box + break + + max_workers = os.cpu_count() * 2 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + list(executor.map(process_one_frame, range(len(valid_timestamp)))) + + # 4) Save pickle + static_records: List[Dict[str, Any]] = [] + for obj in static_objs: + static_records.append(obj.valid_frames) + + if output_dir is None: + output_dir = kitti360_data_root / "data_3d_bboxes" / "preprocess" + output_dir.mkdir(parents=True, exist_ok=True) + out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" + + payload = { + "log_name": log_name, + "static": static_records, + } + with open(out_path, "wb") as f: + pickle.dump(payload, f) + logging.info(f"[preprocess] saved: {out_path}") + + +if __name__ == "__main__": + import argparse + + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") + parser.add_argument("--kitti360_data_root", type=Path, default=".", help="KITTI-360 data root directory") + parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") + parser.add_argument("--radius", type=float, default=60.0) + parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") + args = parser.parse_args() + + process_detection( + kitti360_data_root=args.kitti360_data_root, + log_name=args.log_name, + radius_m=args.radius, + output_dir=args.out, + ) diff --git a/src/py123d/conversion/datasets/kitti_360/.gitkeep b/src/py123d/conversion/datasets/kitti_360/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py index c837e559..8c77169c 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py @@ -24,18 +24,18 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import NuPlanLidarIndex +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetection, TrafficLightDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -52,14 +52,14 @@ # NOTE: Leaving this constant here, to avoid having a nuplan dependency in nuplan_constants.py NUPLAN_CAMERA_MAPPING = { - PinholeCameraType.CAM_F0: CameraChannel.CAM_F0, - PinholeCameraType.CAM_B0: CameraChannel.CAM_B0, - PinholeCameraType.CAM_L0: CameraChannel.CAM_L0, - PinholeCameraType.CAM_L1: CameraChannel.CAM_L1, - PinholeCameraType.CAM_L2: CameraChannel.CAM_L2, - PinholeCameraType.CAM_R0: CameraChannel.CAM_R0, - PinholeCameraType.CAM_R1: CameraChannel.CAM_R1, - PinholeCameraType.CAM_R2: CameraChannel.CAM_R2, + PinholeCameraType.PCAM_F0: CameraChannel.CAM_F0, + PinholeCameraType.PCAM_B0: CameraChannel.CAM_B0, + PinholeCameraType.PCAM_L0: CameraChannel.CAM_L0, + PinholeCameraType.PCAM_L1: CameraChannel.CAM_L1, + PinholeCameraType.PCAM_L2: CameraChannel.CAM_L2, + PinholeCameraType.PCAM_R0: CameraChannel.CAM_R0, + PinholeCameraType.PCAM_R1: CameraChannel.CAM_R1, + PinholeCameraType.PCAM_R2: CameraChannel.CAM_R2, } TARGET_DT: Final[float] = 0.1 # TODO: make configurable @@ -84,7 +84,9 @@ def __init__( dataset_converter_config: DatasetConverterConfig, ) -> None: super().__init__(dataset_converter_config) - + assert nuplan_data_root is not None, "The variable `nuplan_data_root` must be provided." + assert nuplan_maps_root is not None, "The variable `nuplan_maps_root` must be provided." + assert nuplan_sensor_root is not None, "The variable `nuplan_sensor_root` must be provided." for split in splits: assert ( split in NUPLAN_DATA_SPLITS @@ -176,7 +178,9 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=nuplan_log_db.log.map_version, timestep_seconds=TARGET_DT, vehicle_parameters=get_nuplan_chrysler_pacifica_parameters(), - camera_metadata=_get_nuplan_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_nuplan_camera_metadata( + source_log_path, self._nuplan_sensor_root, self.dataset_converter_config + ), lidar_metadata=_get_nuplan_lidar_metadata( self._nuplan_sensor_root, log_name, self.dataset_converter_config ), @@ -196,7 +200,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: ego_state=_extract_nuplan_ego_state(nuplan_lidar_pc), box_detections=_extract_nuplan_box_detections(nuplan_lidar_pc, source_log_path), traffic_lights=_extract_nuplan_traffic_lights(nuplan_log_db, lidar_pc_token), - cameras=_extract_nuplan_cameras( + pinhole_cameras=_extract_nuplan_cameras( nuplan_log_db=nuplan_log_db, nuplan_lidar_pc=nuplan_lidar_pc, source_log_path=source_log_path, @@ -235,6 +239,7 @@ def _get_nuplan_map_metadata(location: str) -> MapMetadata: def _get_nuplan_camera_metadata( source_log_path: Path, + nuplan_sensor_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: @@ -256,9 +261,12 @@ def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadat ) camera_metadata: Dict[str, PinholeCameraMetadata] = {} - if dataset_converter_config.include_cameras: - for camera_type in NUPLAN_CAMERA_MAPPING.keys(): - camera_metadata[camera_type] = _get_camera_metadata(camera_type) + if dataset_converter_config.include_pinhole_cameras: + log_name = source_log_path.stem + for camera_type, nuplan_camera_type in NUPLAN_CAMERA_MAPPING.items(): + camera_folder = nuplan_sensor_root / log_name / f"{nuplan_camera_type.value}" + if camera_folder.exists() and camera_folder.is_dir(): + camera_metadata[camera_type] = _get_camera_metadata(camera_type) return camera_metadata @@ -277,7 +285,7 @@ def _get_nuplan_lidar_metadata( for lidar_type in NUPLAN_LIDAR_DICT.values(): metadata[lidar_type] = LiDARMetadata( lidar_type=lidar_type, - lidar_index=NuPlanLidarIndex, + lidar_index=NuPlanLiDARIndex, extrinsic=None, # NOTE: LiDAR extrinsic are unknown ) return metadata @@ -350,7 +358,7 @@ def _extract_nuplan_cameras( camera_dict: Dict[str, Union[str, bytes]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: log_cam_infos = {camera.token: camera for camera in nuplan_log_db.log.cameras} for camera_type, camera_channel in NUPLAN_CAMERA_MAPPING.items(): camera_data: Optional[Union[str, bytes]] = None @@ -387,9 +395,9 @@ def _extract_nuplan_cameras( # Store camera data, either as path or binary camera_data: Optional[Union[str, bytes]] = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(filename_jpg) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(filename_jpg, "rb") as f: camera_data = f.read() diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py index bff709be..b8b010cb 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py @@ -135,8 +135,8 @@ def _write_nuplan_lane_connectors(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_w # 1. predecessor_ids, successor_ids lane_connector_row = get_row_with_value(nuplan_gdf["lane_connectors"], "fid", str(lane_id)) - predecessor_ids = lane_connector_row["entry_lane_fid"] - successor_ids = lane_connector_row["exit_lane_fid"] + predecessor_ids = [lane_connector_row["entry_lane_fid"]] + successor_ids = [lane_connector_row["exit_lane_fid"]] # 2. left_boundaries, right_boundaries lane_connector_polygons_row = get_row_with_value( diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py index fed2d508..8c2506f0 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py @@ -6,8 +6,8 @@ from py123d.common.utils.dependencies import check_dependencies from py123d.conversion.datasets.nuplan.utils.nuplan_constants import NUPLAN_LIDAR_DICT -from py123d.datatypes.sensors.lidar.lidar import LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuPlanLidarIndex +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType check_dependencies(["nuplan"], "nuplan") from nuplan.database.utils.pointclouds.lidar import LidarPointCloud @@ -23,7 +23,7 @@ def load_nuplan_lidar_pcs_from_file(pcd_path: Path) -> Dict[LiDARType, np.ndarra lidar_pcs_dict: Dict[LiDARType, np.ndarray] = {} for lidar_id, lidar_type in NUPLAN_LIDAR_DICT.items(): mask = merged_lidar_pc[-1, :] == lidar_id - lidar_pc = merged_lidar_pc[: len(NuPlanLidarIndex), mask].T.astype(np.float32) + lidar_pc = merged_lidar_pc[: len(NuPlanLiDARIndex), mask].T.astype(np.float32) lidar_pcs_dict[lidar_type] = lidar_pc return lidar_pcs_dict diff --git a/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py index 4b074d53..d1159dc4 100644 --- a/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py +++ b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py @@ -4,7 +4,7 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.traffic_light_detections import TrafficLightStatus from py123d.datatypes.maps.map_datatypes import RoadLineType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType from py123d.datatypes.time.time_point import TimePoint diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py index c4e1627e..6c22d6ce 100644 --- a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py @@ -12,24 +12,23 @@ from py123d.conversion.datasets.nuscenes.nuscenes_map_conversion import NUSCENES_MAPS, write_nuscenes_map from py123d.conversion.datasets.nuscenes.utils.nuscenes_constants import ( NUSCENES_CAMERA_TYPES, - NUSCENES_DATA_ROOT, NUSCENES_DATA_SPLITS, NUSCENES_DETECTION_NAME_DICT, NUSCENES_DT, ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuScenesLidarIndex from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_nuscenes_renault_zoe_parameters @@ -56,11 +55,19 @@ def __init__( ) -> None: super().__init__(dataset_converter_config) + assert nuscenes_data_root is not None, "The variable `nuscenes_data_root` must be provided." + assert nuscenes_map_root is not None, "The variable `nuscenes_map_root` must be provided." for split in splits: assert ( split in NUSCENES_DATA_SPLITS ), f"Split {split} is not available. Available splits: {NUSCENES_DATA_SPLITS}" + if dataset_converter_config.include_lidars: + assert dataset_converter_config.lidar_store_option in ["path", "binary"], ( + f"Invalid lidar_store_option: {dataset_converter_config.lidar_store_option}. " + f"Supported options are 'path' and 'binary'." + ) + self._splits: List[str] = splits self._nuscenes_data_root: Path = Path(nuscenes_data_root) @@ -148,7 +155,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=log_record["location"], timestep_seconds=TARGET_DT, vehicle_parameters=get_nuscenes_renault_zoe_parameters(), - camera_metadata=_get_nuscenes_camera_metadata(nusc, scene, self.dataset_converter_config), + pinhole_camera_metadata=_get_nuscenes_pinhole_camera_metadata(nusc, scene, self.dataset_converter_config), lidar_metadata=_get_nuscenes_lidar_metadata(nusc, scene, self.dataset_converter_config), map_metadata=_get_nuscenes_map_metadata(log_record["location"]), ) @@ -172,14 +179,16 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_us(sample["timestamp"]), ego_state=_extract_nuscenes_ego_state(nusc, sample, can_bus), box_detections=_extract_nuscenes_box_detections(nusc, sample), - cameras=_extract_nuscenes_cameras( + pinhole_cameras=_extract_nuscenes_cameras( nusc=nusc, sample=sample, + nuscenes_data_root=self._nuscenes_data_root, dataset_converter_config=self.dataset_converter_config, ), lidars=_extract_nuscenes_lidars( nusc=nusc, sample=sample, + nuscenes_data_root=self._nuscenes_data_root, dataset_converter_config=self.dataset_converter_config, ), ) @@ -192,14 +201,14 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: gc.collect() -def _get_nuscenes_camera_metadata( +def _get_nuscenes_pinhole_camera_metadata( nusc: NuScenes, scene: Dict[str, Any], dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: first_sample_token = scene["first_sample_token"] first_sample = nusc.get("sample", first_sample_token) @@ -246,7 +255,7 @@ def _get_nuscenes_lidar_metadata( metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, - lidar_index=NuScenesLidarIndex, + lidar_index=NuScenesLiDARIndex, extrinsic=extrinsic, ) @@ -385,11 +394,12 @@ def _extract_nuscenes_box_detections(nusc: NuScenes, sample: Dict[str, Any]) -> def _extract_nuscenes_cameras( nusc: NuScenes, sample: Dict[str, Any], + nuscenes_data_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]: camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: for camera_type, camera_channel in NUSCENES_CAMERA_TYPES.items(): cam_token = sample["data"][camera_channel] cam_data = nusc.get("sample_data", cam_token) @@ -407,12 +417,12 @@ def _extract_nuscenes_cameras( extrinsic_matrix[:3, 3] = translation extrinsic = StateSE3.from_transformation_matrix(extrinsic_matrix) - cam_path = NUSCENES_DATA_ROOT / cam_data["filename"] + cam_path = nuscenes_data_root / cam_data["filename"] if cam_path.exists() and cam_path.is_file(): - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(cam_path) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(cam_path, "rb") as f: camera_data = f.read() else: @@ -426,6 +436,7 @@ def _extract_nuscenes_cameras( def _extract_nuscenes_lidars( nusc: NuScenes, sample: Dict[str, Any], + nuscenes_data_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> List[LiDARData]: lidars: List[LiDARData] = [] @@ -433,15 +444,14 @@ def _extract_nuscenes_lidars( if dataset_converter_config.include_lidars: lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = nusc.get("sample_data", lidar_token) - absolute_lidar_path = NUSCENES_DATA_ROOT / lidar_data["filename"] + absolute_lidar_path = nuscenes_data_root / lidar_data["filename"] if absolute_lidar_path.exists() and absolute_lidar_path.is_file(): lidar = LiDARData( - lidar_type=LiDARType.LIDAR_MERGED, - relative_path=absolute_lidar_path.relative_to(NUSCENES_DATA_ROOT), - dataset_root=NUSCENES_DATA_ROOT, + lidar_type=LiDARType.LIDAR_TOP, + relative_path=absolute_lidar_path.relative_to(nuscenes_data_root), + dataset_root=nuscenes_data_root, iteration=lidar_data.get("iteration"), ) lidars.append(lidar) - return lidars diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py index eccf0124..e09caae6 100644 --- a/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py @@ -3,21 +3,21 @@ import numpy as np +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.lidar.lidar import LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuScenesLidarIndex +from py123d.datatypes.sensors.lidar import LiDARType from py123d.geometry.se import StateSE3 from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins def load_nuscenes_lidar_pcs_from_file(pcd_path: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: - lidar_pc = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, len(NuScenesLidarIndex)) + lidar_pc = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, len(NuScenesLiDARIndex)) # convert lidar to ego frame lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic - lidar_pc[..., NuScenesLidarIndex.XYZ] = convert_points_3d_array_between_origins( + lidar_pc[..., NuScenesLiDARIndex.XYZ] = convert_points_3d_array_between_origins( from_origin=lidar_extrinsic, to_origin=StateSE3(0, 0, 0, 1.0, 0, 0, 0), - points_3d_array=lidar_pc[..., NuScenesLidarIndex.XYZ], + points_3d_array=lidar_pc[..., NuScenesLiDARIndex.XYZ], ) return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py index dd04d91a..8878401d 100644 --- a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py +++ b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py @@ -1,9 +1,7 @@ -import os -from pathlib import Path from typing import Final, List from py123d.datatypes.detections.box_detection_types import BoxDetectionType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType NUSCENES_MAPS: List[str] = ["boston-seaport", "singapore-hollandvillage", "singapore-onenorth", "singapore-queenstown"] @@ -50,11 +48,10 @@ } NUSCENES_CAMERA_TYPES = { - PinholeCameraType.CAM_F0: "CAM_FRONT", - PinholeCameraType.CAM_B0: "CAM_BACK", - PinholeCameraType.CAM_L0: "CAM_FRONT_LEFT", - PinholeCameraType.CAM_L1: "CAM_BACK_LEFT", - PinholeCameraType.CAM_R0: "CAM_FRONT_RIGHT", - PinholeCameraType.CAM_R1: "CAM_BACK_RIGHT", + PinholeCameraType.PCAM_F0: "CAM_FRONT", + PinholeCameraType.PCAM_B0: "CAM_BACK", + PinholeCameraType.PCAM_L0: "CAM_FRONT_LEFT", + PinholeCameraType.PCAM_L1: "CAM_BACK_LEFT", + PinholeCameraType.PCAM_R0: "CAM_FRONT_RIGHT", + PinholeCameraType.PCAM_R1: "CAM_BACK_RIGHT", } -NUSCENES_DATA_ROOT = Path(os.environ["NUSCENES_DATA_ROOT"]) diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py index 49a81d19..0f177af1 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py @@ -6,7 +6,7 @@ from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig -from py123d.conversion.datasets.pandaset.pandaset_constants import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_constants import ( PANDASET_BOX_DETECTION_FROM_STR, PANDASET_BOX_DETECTION_TO_DEFAULT, PANDASET_CAMERA_DISTORTIONS, @@ -16,7 +16,7 @@ PANDASET_LOG_NAMES, PANDASET_SPLITS, ) -from py123d.conversion.datasets.pandaset.pandaset_utlis import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( main_lidar_to_rear_axle, pandaset_pose_dict_to_state_se3, read_json, @@ -25,15 +25,15 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import PandasetLidarIndex +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -62,6 +62,7 @@ def __init__( super().__init__(dataset_converter_config) for split in splits: assert split in PANDASET_SPLITS, f"Split {split} is not available. Available splits: {PANDASET_SPLITS}" + assert pandaset_data_root is not None, "The variable `pandaset_data_root` must be provided." self._splits: List[str] = splits self._pandaset_data_root: Path = Path(pandaset_data_root) @@ -114,7 +115,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=None, # TODO: Add location information. timestep_seconds=0.1, vehicle_parameters=get_pandaset_chrysler_pacifica_parameters(), - camera_metadata=_get_pandaset_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_pandaset_camera_metadata(source_log_path, self.dataset_converter_config), lidar_metadata=_get_pandaset_lidar_metadata(source_log_path, self.dataset_converter_config), map_metadata=None, # NOTE: Pandaset does not have maps. ) @@ -142,7 +143,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_s(timestep_s), ego_state=ego_state, box_detections=_extract_pandaset_box_detections(source_log_path, iteration, ego_state), - cameras=_extract_pandaset_sensor_camera( + pinhole_cameras=_extract_pandaset_sensor_camera( source_log_path, iteration, ego_state, @@ -167,7 +168,7 @@ def _get_pandaset_camera_metadata( camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_config.include_cameras: + if dataset_config.include_pinhole_cameras: all_cameras_folder = source_log_path / "camera" for camera_folder in all_cameras_folder.iterdir(): camera_name = camera_folder.name @@ -204,7 +205,7 @@ def _get_pandaset_lidar_metadata( for lidar_name, lidar_type in PANDASET_LIDAR_MAPPING.items(): lidar_metadata[lidar_type] = LiDARMetadata( lidar_type=lidar_type, - lidar_index=PandasetLidarIndex, + lidar_index=PandasetLiDARIndex, extrinsic=PANDASET_LIDAR_EXTRINSICS[ lidar_name ], # TODO: These extrinsics are incorrect, and need to be transformed correctly. @@ -343,7 +344,7 @@ def _extract_pandaset_sensor_camera( camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} iteration_str = f"{iteration:02d}" - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: for camera_name, camera_type in PANDASET_CAMERA_MAPPING.items(): @@ -359,10 +360,10 @@ def _extract_pandaset_sensor_camera( ) camera_data = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": pandaset_data_root = source_log_path.parent camera_data = str(image_abs_path.relative_to(pandaset_data_root)) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(image_abs_path, "rb") as f: camera_data = f.read() camera_dict[camera_type] = camera_data, camera_extrinsic diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py index 30fff374..14f1f236 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py @@ -4,14 +4,14 @@ import numpy as np import pandas as pd -from py123d.conversion.datasets.pandaset.pandaset_utlis import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( main_lidar_to_rear_axle, pandaset_pose_dict_to_state_se3, read_json, read_pkl_gz, ) -from py123d.conversion.registry.lidar_index_registry import PandasetLidarIndex -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_points_3d_array @@ -46,9 +46,9 @@ def load_pandaset_lidars_pcs_from_file( ) for lidar_type in lidar_pc_dict.keys(): - lidar_pc_dict[lidar_type][..., PandasetLidarIndex.XYZ] = convert_absolute_to_relative_points_3d_array( + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ] = convert_absolute_to_relative_points_3d_array( ego_pose, - lidar_pc_dict[lidar_type][..., PandasetLidarIndex.XYZ], + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ], ) return lidar_pc_dict diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_constants.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py similarity index 95% rename from src/py123d/conversion/datasets/pandaset/pandaset_constants.py rename to src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py index 93ef4bc8..1e65b509 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_constants.py +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py @@ -2,19 +2,19 @@ from py123d.common.utils.enums import SerialIntEnum from py123d.datatypes.detections.box_detection_types import BoxDetectionType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType, PinholeDistortion, PinholeIntrinsics -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType, PinholeDistortion, PinholeIntrinsics from py123d.geometry import StateSE3 PANDASET_SPLITS: List[str] = ["pandaset_train", "pandaset_val", "pandaset_test"] PANDASET_CAMERA_MAPPING: Dict[str, PinholeCameraType] = { - "front_camera": PinholeCameraType.CAM_F0, - "back_camera": PinholeCameraType.CAM_B0, - "front_left_camera": PinholeCameraType.CAM_L0, - "front_right_camera": PinholeCameraType.CAM_R0, - "left_camera": PinholeCameraType.CAM_L1, - "right_camera": PinholeCameraType.CAM_R1, + "front_camera": PinholeCameraType.PCAM_F0, + "back_camera": PinholeCameraType.PCAM_B0, + "front_left_camera": PinholeCameraType.PCAM_L0, + "front_right_camera": PinholeCameraType.PCAM_R0, + "left_camera": PinholeCameraType.PCAM_L1, + "right_camera": PinholeCameraType.PCAM_R1, } PANDASET_LIDAR_MAPPING: Dict[str, LiDARType] = {"main_pandar64": LiDARType.LIDAR_TOP, "front_gt": LiDARType.LIDAR_FRONT} diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_utlis.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py similarity index 89% rename from src/py123d/conversion/datasets/pandaset/pandaset_utlis.py rename to src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py index e179a41c..68575e7e 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_utlis.py +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py @@ -61,10 +61,6 @@ def rotate_pandaset_pose_to_iso_coordinates(pose: StateSE3) -> StateSE3: transformation_matrix = pose.transformation_matrix.copy() transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F - # transformation_matrix[0, 3] = pose.y - # transformation_matrix[1, 3] = -pose.x - # transformation_matrix[2, 3] = pose.z - return StateSE3.from_transformation_matrix(transformation_matrix) @@ -78,15 +74,11 @@ def main_lidar_to_rear_axle(pose: StateSE3) -> StateSE3: ], dtype=np.float64, ).T - # F = np.eye(3, dtype=np.float64) transformation_matrix = pose.transformation_matrix.copy() transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F rotated_pose = StateSE3.from_transformation_matrix(transformation_matrix) - imu_pose = translate_se3_along_body_frame( - rotated_pose, - vector_3d=Vector3D(x=-0.840, y=0.0, z=0.0), - ) + imu_pose = translate_se3_along_body_frame(rotated_pose, vector_3d=Vector3D(x=-0.840, y=0.0, z=0.0)) return imu_pose diff --git a/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py index 963a056d..82b0c891 100644 --- a/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py +++ b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py @@ -2,8 +2,8 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.maps.map_datatypes import LaneType, RoadEdgeType, RoadLineType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType WOPD_AVAILABLE_SPLITS: List[str] = [ "wopd_train", @@ -22,11 +22,11 @@ # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L50 WOPD_CAMERA_TYPES: Dict[int, PinholeCameraType] = { - 1: PinholeCameraType.CAM_F0, # front_camera - 2: PinholeCameraType.CAM_L0, # front_left_camera - 3: PinholeCameraType.CAM_R0, # front_right_camera - 4: PinholeCameraType.CAM_L1, # left_camera - 5: PinholeCameraType.CAM_R1, # right_camera + 1: PinholeCameraType.PCAM_F0, # front_camera + 2: PinholeCameraType.PCAM_L0, # front_left_camera + 3: PinholeCameraType.PCAM_R0, # front_right_camera + 4: PinholeCameraType.PCAM_L1, # left_camera + 5: PinholeCameraType.PCAM_R1, # right_camera } # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L66 diff --git a/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py b/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py deleted file mode 100644 index 0cc69d25..00000000 --- a/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py +++ /dev/null @@ -1,390 +0,0 @@ -# from collections import defaultdict -# from pathlib import Path -# from typing import Dict, List, Optional - -# import geopandas as gpd -# import numpy as np -# import numpy.typing as npt -# import pandas as pd -# import shapely.geometry as geom - -# from py123d.common.utils.dependencies import check_dependencies -# from py123d.conversion.datasets.wopd.waymo_map_utils.womp_boundary_utils import extract_lane_boundaries -# from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType -# from py123d.geometry import Point3DIndex, Polyline3D -# from py123d.geometry.utils.units import mph_to_mps - -# check_dependencies(modules=["waymo_open_dataset"], optional_name="waymo") -# from waymo_open_dataset import dataset_pb2 - -# # TODO: -# # - Implement stop signs -# # - Implement speed bumps -# # - Implement driveways with a different semantic type if needed -# # - Implement intersections and lane group logic - -# WAYMO_ROAD_LINE_CONVERSION = { -# 0: RoadLineType.UNKNOWN, # aka. UNKNOWN -# 1: RoadLineType.DASHED_WHITE, # aka. BROKEN_SINGLE_WHITE -# 2: RoadLineType.SOLID_WHITE, # aka. SOLID_SINGLE_WHITE -# 3: RoadLineType.DOUBLE_SOLID_WHITE, # aka. SOLID_DOUBLE_WHITE -# 4: RoadLineType.DASHED_YELLOW, # aka. BROKEN_SINGLE_YELLOW -# 5: RoadLineType.DOUBLE_DASH_YELLOW, # aka. BROKEN_DOUBLE_YELLOW -# 6: RoadLineType.SOLID_YELLOW, # aka. SOLID_SINGLE_YELLOW -# 7: RoadLineType.DOUBLE_SOLID_YELLOW, # aka. SOLID_DOUBLE_YELLOW -# 8: RoadLineType.DOUBLE_DASH_YELLOW, # aka. PASSING_DOUBLE_YELLOW -# } - -# WAYMO_ROAD_EDGE_CONVERSION = { -# 0: RoadEdgeType.UNKNOWN, -# 1: RoadEdgeType.ROAD_EDGE_BOUNDARY, -# 2: RoadEdgeType.ROAD_EDGE_MEDIAN, -# } - - -# def convert_wopd_map(frame: dataset_pb2.Frame, map_file_path: Path) -> None: - -# def _extract_polyline(data) -> npt.NDArray[np.float64]: -# polyline = np.array([[p.x, p.y, p.z] for p in data.polyline], dtype=np.float64) -# return polyline - -# def _extract_polygon(data) -> npt.NDArray[np.float64]: -# polygon = np.array([[p.x, p.y, p.z] for p in data.polygon], dtype=np.float64) -# assert polygon.shape[0] >= 3, "Polygon must have at least 3 points" -# assert polygon.shape[1] == 3, "Polygon must have 3 coordinates (x, y, z)" -# return polygon - -# def _extract_neighbors(data) -> List[Dict[str, int]]: -# neighbors = [] -# for neighbor in data: -# neighbors.append( -# { -# "lane_id": neighbor.feature_id, -# "self_start_index": neighbor.self_start_index, -# "self_end_index": neighbor.self_end_index, -# "neighbor_start_index": neighbor.neighbor_start_index, -# "neighbor_end_index": neighbor.neighbor_end_index, -# } -# ) -# return neighbors - -# lanes: Dict[int, npt.NDArray[np.float64]] = {} -# lanes_successors = defaultdict(list) -# lanes_predecessors = defaultdict(list) -# lanes_speed_limit_mps: Dict[int, float] = {} -# lanes_type: Dict[int, int] = {} -# lanes_left_neighbors: Dict[int, List[Dict[str, int]]] = {} -# lanes_right_neighbors: Dict[int, List[Dict[str, int]]] = {} - -# road_lines: Dict[int, npt.NDArray[np.float64]] = {} -# road_lines_type: Dict[int, RoadLineType] = {} - -# road_edges: Dict[int, npt.NDArray[np.float64]] = {} -# road_edges_type: Dict[int, int] = {} - -# crosswalks: Dict[int, npt.NDArray[np.float64]] = {} -# carparks: Dict[int, npt.NDArray[np.float64]] = {} - -# for map_feature in frame.map_features: -# if map_feature.HasField("lane"): -# polyline = _extract_polyline(map_feature.lane) -# # Ignore lanes with less than 2 points or not 2D -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# lanes[map_feature.id] = polyline -# for lane_id_ in map_feature.lane.exit_lanes: -# lanes_successors[map_feature.id].append(lane_id_) -# for lane_id_ in map_feature.lane.exit_lanes: -# lanes_predecessors[map_feature.id].append(lane_id_) -# lanes_speed_limit_mps[map_feature.id] = mph_to_mps(map_feature.lane.speed_limit_mph) -# lanes_type[map_feature.id] = map_feature.lane.type -# lanes_left_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.left_neighbors) -# lanes_right_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.right_neighbors) -# elif map_feature.HasField("road_line"): -# polyline = _extract_polyline(map_feature.road_line) -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# road_lines[map_feature.id] = polyline -# road_lines_type[map_feature.id] = WAYMO_ROAD_LINE_CONVERSION.get( -# map_feature.road_line.type, RoadLineType.UNKNOWN -# ) -# elif map_feature.HasField("road_edge"): -# polyline = _extract_polyline(map_feature.road_edge) -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# road_edges[map_feature.id] = polyline -# road_edges_type[map_feature.id] = WAYMO_ROAD_EDGE_CONVERSION.get( -# map_feature.road_edge.type, RoadEdgeType.UNKNOWN -# ) -# elif map_feature.HasField("stop_sign"): -# # TODO: implement stop signs -# pass -# elif map_feature.HasField("crosswalk"): -# crosswalks[map_feature.id] = _extract_polygon(map_feature.crosswalk) -# elif map_feature.HasField("speed_bump"): -# # TODO: implement speed bumps -# pass -# elif map_feature.HasField("driveway"): -# # NOTE: Determine whether to use a different semantic type for driveways. -# carparks[map_feature.id] = _extract_polygon(map_feature.driveway) - -# lane_left_boundaries_3d, lane_right_boundaries_3d = extract_lane_boundaries( -# lanes, lanes_successors, lanes_predecessors, road_lines, road_edges -# ) - -# lane_df = get_lane_df( -# lanes, -# lanes_successors, -# lanes_predecessors, -# lanes_speed_limit_mps, -# lane_left_boundaries_3d, -# lane_right_boundaries_3d, -# lanes_type, -# lanes_left_neighbors, -# lanes_right_neighbors, -# ) -# lane_group_df = get_lane_group_df( -# lanes, -# lanes_successors, -# lanes_predecessors, -# lane_left_boundaries_3d, -# lane_right_boundaries_3d, -# ) -# intersection_df = get_intersections_df() -# crosswalk_df = get_crosswalk_df(crosswalks) -# walkway_df = get_walkway_df() -# carpark_df = get_carpark_df(carparks) -# generic_drivable_df = get_generic_drivable_df() -# road_edge_df = get_road_edge_df(road_edges, road_edges_type) -# road_line_df = get_road_line_df(road_lines, road_lines_type) - -# map_file_path.unlink(missing_ok=True) -# if not map_file_path.parent.exists(): -# map_file_path.parent.mkdir(parents=True, exist_ok=True) - -# lane_df.to_file(map_file_path, layer=MapLayer.LANE.serialize(), driver="GPKG") -# lane_group_df.to_file(map_file_path, layer=MapLayer.LANE_GROUP.serialize(), driver="GPKG", mode="a") -# intersection_df.to_file(map_file_path, layer=MapLayer.INTERSECTION.serialize(), driver="GPKG", mode="a") -# crosswalk_df.to_file(map_file_path, layer=MapLayer.CROSSWALK.serialize(), driver="GPKG", mode="a") -# walkway_df.to_file(map_file_path, layer=MapLayer.WALKWAY.serialize(), driver="GPKG", mode="a") -# carpark_df.to_file(map_file_path, layer=MapLayer.CARPARK.serialize(), driver="GPKG", mode="a") -# generic_drivable_df.to_file(map_file_path, layer=MapLayer.GENERIC_DRIVABLE.serialize(), driver="GPKG", mode="a") -# road_edge_df.to_file(map_file_path, layer=MapLayer.ROAD_EDGE.serialize(), driver="GPKG", mode="a") -# road_line_df.to_file(map_file_path, layer=MapLayer.ROAD_LINE.serialize(), driver="GPKG", mode="a") - - -# def get_lane_df( -# lanes: Dict[int, npt.NDArray[np.float64]], -# lanes_successors: Dict[int, List[int]], -# lanes_predecessors: Dict[int, List[int]], -# lanes_speed_limit_mps: Dict[int, float], -# lanes_left_boundaries_3d: Dict[int, Polyline3D], -# lanes_right_boundaries_3d: Dict[int, Polyline3D], -# lanes_type: Dict[int, int], -# lanes_left_neighbors: Dict[int, List[Dict[str, int]]], -# lanes_right_neighbors: Dict[int, List[Dict[str, int]]], -# ) -> gpd.GeoDataFrame: - -# ids = [] -# lane_types = [] -# lane_group_ids = [] -# speed_limits_mps = [] -# predecessor_ids = [] -# successor_ids = [] -# left_boundaries = [] -# right_boundaries = [] -# left_lane_ids = [] -# right_lane_ids = [] -# baseline_paths = [] -# geometries = [] - -# def _get_majority_neighbor(neighbors: List[Dict[str, int]]) -> Optional[int]: -# if len(neighbors) == 0: -# return None -# length = { -# neighbor["lane_id"]: neighbor["self_end_index"] - neighbor["self_start_index"] for neighbor in neighbors -# } -# return str(max(length, key=length.get)) - -# for lane_id, lane_centerline_array in lanes.items(): -# if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: -# continue -# lane_centerline = Polyline3D.from_array(lane_centerline_array) -# lane_speed_limit_mps = lanes_speed_limit_mps[lane_id] if lanes_speed_limit_mps[lane_id] > 0.0 else None - -# ids.append(lane_id) -# lane_types.append(lanes_type[lane_id]) -# lane_group_ids.append([lane_id]) -# speed_limits_mps.append(lane_speed_limit_mps) -# predecessor_ids.append(lanes_predecessors[lane_id]) -# successor_ids.append(lanes_successors[lane_id]) -# left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) -# right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) -# left_lane_ids.append(_get_majority_neighbor(lanes_left_neighbors[lane_id])) -# right_lane_ids.append(_get_majority_neighbor(lanes_right_neighbors[lane_id])) -# baseline_paths.append(lane_centerline.linestring) - -# geometry = geom.Polygon( -# np.vstack( -# [ -# lanes_left_boundaries_3d[lane_id].array[:, :2], -# lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], -# ] -# ) -# ) -# geometries.append(geometry) - -# data = pd.DataFrame( -# { -# "id": ids, -# "lane_type": lane_types, -# "lane_group_id": lane_group_ids, -# "speed_limit_mps": speed_limits_mps, -# "predecessor_ids": predecessor_ids, -# "successor_ids": successor_ids, -# "left_boundary": left_boundaries, -# "right_boundary": right_boundaries, -# "left_lane_id": left_lane_ids, -# "right_lane_id": right_lane_ids, -# "baseline_path": baseline_paths, -# } -# ) - -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_lane_group_df( -# lanes: Dict[int, npt.NDArray[np.float64]], -# lanes_successors: Dict[int, List[int]], -# lanes_predecessors: Dict[int, List[int]], -# lanes_left_boundaries_3d: Dict[int, Polyline3D], -# lanes_right_boundaries_3d: Dict[int, Polyline3D], -# ) -> gpd.GeoDataFrame: - -# ids = [] -# lane_ids = [] -# intersection_ids = [] -# predecessor_lane_group_ids = [] -# successor_lane_group_ids = [] -# left_boundaries = [] -# right_boundaries = [] -# geometries = [] - -# # NOTE: WOPD does not provide lane groups, so we create a lane group for each lane. -# for lane_id in lanes.keys(): -# if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: -# continue -# ids.append(lane_id) -# lane_ids.append([lane_id]) -# intersection_ids.append(None) # WOPD does not provide intersections -# predecessor_lane_group_ids.append(lanes_predecessors[lane_id]) -# successor_lane_group_ids.append(lanes_successors[lane_id]) -# left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) -# right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) -# geometry = geom.Polygon( -# np.vstack( -# [ -# lanes_left_boundaries_3d[lane_id].array[:, :2], -# lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], -# ] -# ) -# ) -# geometries.append(geometry) - -# data = pd.DataFrame( -# { -# "id": ids, -# "lane_ids": lane_ids, -# "intersection_id": intersection_ids, -# "predecessor_lane_group_ids": predecessor_lane_group_ids, -# "successor_lane_group_ids": successor_lane_group_ids, -# "left_boundary": left_boundaries, -# "right_boundary": right_boundaries, -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_intersections_df() -> gpd.GeoDataFrame: -# ids = [] -# lane_group_ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide intersections, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_carpark_df(carparks) -> gpd.GeoDataFrame: -# ids = list(carparks.keys()) -# outlines = [geom.LineString(outline) for outline in carparks.values()] -# geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in carparks.values()] - -# data = pd.DataFrame({"id": ids, "outline": outlines}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_walkway_df() -> gpd.GeoDataFrame: -# ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide walkways, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_crosswalk_df(crosswalks: Dict[int, npt.NDArray[np.float64]]) -> gpd.GeoDataFrame: -# ids = list(crosswalks.keys()) -# outlines = [geom.LineString(outline) for outline in crosswalks.values()] -# geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in crosswalks.values()] - -# data = pd.DataFrame({"id": ids, "outline": outlines}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_generic_drivable_df() -> gpd.GeoDataFrame: -# ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide generic drivable areas, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_road_edge_df( -# road_edges: Dict[int, npt.NDArray[np.float64]], road_edges_type: Dict[int, RoadEdgeType] -# ) -> gpd.GeoDataFrame: -# ids = list(road_edges.keys()) -# geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_edges.values()] - -# data = pd.DataFrame( -# { -# "id": ids, -# "road_edge_type": [int(road_edge_type) for road_edge_type in road_edges_type.values()], -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_road_line_df( -# road_lines: Dict[int, npt.NDArray[np.float64]], road_lines_type: Dict[int, RoadLineType] -# ) -> gpd.GeoDataFrame: -# ids = list(road_lines.keys()) -# geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_lines.values()] - -# data = pd.DataFrame( -# { -# "id": ids, -# "road_line_type": [int(road_line_type) for road_line_type in road_lines_type.values()], -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf diff --git a/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py index ca32c3d8..cf25274c 100644 --- a/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py +++ b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py @@ -6,8 +6,8 @@ from py123d.common.utils.dependencies import check_dependencies from py123d.conversion.datasets.wopd.utils.wopd_constants import WOPD_CAMERA_TYPES, WOPD_LIDAR_TYPES from py123d.conversion.datasets.wopd.utils.wopd_utils import parse_range_image_and_camera_projection -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType check_dependencies(modules=["tensorflow", "waymo_open_dataset"], optional_name="waymo") import tensorflow as tf diff --git a/src/py123d/conversion/datasets/wopd/wopd_converter.py b/src/py123d/conversion/datasets/wopd/wopd_converter.py index c2fe667f..cc42ab4d 100644 --- a/src/py123d/conversion/datasets/wopd/wopd_converter.py +++ b/src/py123d/conversion/datasets/wopd/wopd_converter.py @@ -19,18 +19,18 @@ from py123d.conversion.datasets.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import DefaultLidarIndex, WOPDLidarIndex +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex, WOPDLiDARIndex from py123d.conversion.utils.sensor_utils.camera_conventions import CameraConvention, convert_camera_convention from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_wopd_chrysler_pacifica_parameters @@ -143,7 +143,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=str(initial_frame.context.stats.location), timestep_seconds=0.1, vehicle_parameters=get_wopd_chrysler_pacifica_parameters(), - camera_metadata=_get_wopd_camera_metadata( + pinhole_camera_metadata=_get_wopd_camera_metadata( initial_frame, self.dataset_converter_config, ), @@ -178,7 +178,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: ego_state=_extract_wopd_ego_state(frame, map_pose_offset), box_detections=_extract_wopd_box_detections(frame, map_pose_offset, self._zero_roll_pitch), traffic_lights=None, # TODO: Check if WOPD has traffic light information - cameras=_extract_wopd_cameras(frame, self.dataset_converter_config), + pinhole_cameras=_extract_wopd_cameras(frame, self.dataset_converter_config), lidars=_extract_wopd_lidars( frame, self._keep_polar_features, @@ -232,7 +232,7 @@ def _get_wopd_camera_metadata( camera_metadata_dict: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_converter_config.camera_store_option is not None: + if dataset_converter_config.pinhole_camera_store_option is not None: for calibration in initial_frame.context.camera_calibrations: camera_type = WOPD_CAMERA_TYPES[calibration.name] # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L96 @@ -261,7 +261,7 @@ def _get_wopd_lidar_metadata( laser_metadatas: Dict[LiDARType, LiDARMetadata] = {} # NOTE: Using - lidar_index = WOPDLidarIndex if keep_polar_features else DefaultLidarIndex + lidar_index = WOPDLiDARIndex if keep_polar_features else DefaultLiDARIndex if dataset_converter_config.lidar_store_option is not None: for laser_calibration in initial_frame.context.laser_calibrations: @@ -381,7 +381,7 @@ def _extract_wopd_cameras( camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: # NOTE @DanielDauner: The extrinsic matrix in frame.context.camera_calibration is fixed to model the ego to camera transformation. # The poses in frame.images[idx] are the motion compensated ego poses when the camera triggers. diff --git a/src/py123d/conversion/log_writer/abstract_log_writer.py b/src/py123d/conversion/log_writer/abstract_log_writer.py index 6e5185a2..b367ea69 100644 --- a/src/py123d/conversion/log_writer/abstract_log_writer.py +++ b/src/py123d/conversion/log_writer/abstract_log_writer.py @@ -9,8 +9,9 @@ from py123d.datatypes.detections.box_detections import BoxDetectionWrapper from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -39,7 +40,8 @@ def write( ego_state: Optional[EgoStateSE3] = None, box_detections: Optional[BoxDetectionWrapper] = None, traffic_lights: Optional[TrafficLightDetectionWrapper] = None, - cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + pinhole_cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + fisheye_mei_cameras: Optional[Dict[FisheyeMEICameraType, Tuple[Any, ...]]] = None, lidars: Optional[List[LiDARData]] = None, scenario_tags: Optional[List[str]] = None, route_lane_group_ids: Optional[List[int]] = None, diff --git a/src/py123d/conversion/log_writer/arrow_log_writer.py b/src/py123d/conversion/log_writer/arrow_log_writer.py index f48a18e6..446b1126 100644 --- a/src/py123d/conversion/log_writer/arrow_log_writer.py +++ b/src/py123d/conversion/log_writer/arrow_log_writer.py @@ -15,8 +15,9 @@ from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import add_log_metadata_to_arrow_schema from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3, EgoStateSE3Index from py123d.geometry import BoundingBoxSE3Index, StateSE3, StateSE3Index, Vector3DIndex @@ -83,7 +84,8 @@ def write( ego_state: Optional[EgoStateSE3] = None, box_detections: Optional[BoxDetectionWrapper] = None, traffic_lights: Optional[TrafficLightDetectionWrapper] = None, - cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + pinhole_cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + fisheye_mei_cameras: Optional[Dict[FisheyeMEICameraType, Tuple[Any, ...]]] = None, lidars: Optional[List[LiDARData]] = None, scenario_tags: Optional[List[str]] = None, route_lane_group_ids: Optional[List[int]] = None, @@ -158,34 +160,68 @@ def write( record_batch_data["traffic_light_types"] = [traffic_light_types] # -------------------------------------------------------------------------------------------------------------- - # Cameras + # Pinhole Cameras # -------------------------------------------------------------------------------------------------------------- - if self._dataset_converter_config.include_cameras: - assert cameras is not None, "Camera data is required but not provided." - provided_cameras = set(cameras.keys()) - expected_cameras = set(self._log_metadata.camera_metadata.keys()) - for camera_type in expected_cameras: - camera_name = camera_type.serialize() + if self._dataset_converter_config.include_pinhole_cameras: + assert pinhole_cameras is not None, "Pinhole camera data is required but not provided." + provided_pinhole_cameras = set(pinhole_cameras.keys()) + expected_pinhole_cameras = set(self._log_metadata.pinhole_camera_metadata.keys()) + for pinhole_camera_type in expected_pinhole_cameras: + pinhole_camera_name = pinhole_camera_type.serialize() # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. # In this case, we write None/null to the arrow table. - camera_data: Optional[Any] = None - camera_pose: Optional[StateSE3] = None - if camera_type in provided_cameras: - camera_data, camera_pose = cameras[camera_type] + pinhole_camera_data: Optional[Any] = None + pinhole_camera_pose: Optional[StateSE3] = None + if pinhole_camera_type in provided_pinhole_cameras: + pinhole_camera_data, pinhole_camera_pose = pinhole_cameras[pinhole_camera_type] # TODO: Refactor how camera data handed to the writer. # This should be combined with configurations to write to log, sensor_root, or sensor_root as mp4. - if isinstance(camera_data, Path) or isinstance(camera_data, str): - camera_data = str(camera_data) - elif isinstance(camera_data, bytes): - camera_data = camera_data - elif isinstance(camera_data, np.ndarray): - _, encoded_img = cv2.imencode(".jpg", camera_data) - camera_data = encoded_img.tobytes() + if isinstance(pinhole_camera_data, Path) or isinstance(pinhole_camera_data, str): + pinhole_camera_data = str(pinhole_camera_data) + elif isinstance(pinhole_camera_data, bytes): + pinhole_camera_data = pinhole_camera_data + elif isinstance(pinhole_camera_data, np.ndarray): + _, encoded_img = cv2.imencode(".jpg", pinhole_camera_data) + pinhole_camera_data = encoded_img.tobytes() + + record_batch_data[f"{pinhole_camera_name}_data"] = [pinhole_camera_data] + record_batch_data[f"{pinhole_camera_name}_extrinsic"] = [ + pinhole_camera_pose.array if pinhole_camera_pose else None + ] + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_fisheye_mei_cameras: + assert fisheye_mei_cameras is not None, "Fisheye MEI camera data is required but not provided." + provided_fisheye_mei_cameras = set(fisheye_mei_cameras.keys()) + expected_fisheye_mei_cameras = set(self._log_metadata.fisheye_mei_camera_metadata.keys()) + for fisheye_mei_camera_type in expected_fisheye_mei_cameras: + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. + # In this case, we write None/null to the arrow table. + fisheye_mei_camera_data: Optional[Any] = None + fisheye_mei_camera_pose: Optional[StateSE3] = None + if fisheye_mei_camera_type in provided_fisheye_mei_cameras: + fisheye_mei_camera_data, fisheye_mei_camera_pose = fisheye_mei_cameras[fisheye_mei_camera_type] - record_batch_data[f"{camera_name}_data"] = [camera_data] - record_batch_data[f"{camera_name}_extrinsic"] = [camera_pose.array if camera_pose else None] + # TODO: Refactor how camera data handed to the writer. + # This should be combined with configurations to write to log, sensor_root, or sensor_root as mp4. + if isinstance(fisheye_mei_camera_data, Path) or isinstance(fisheye_mei_camera_data, str): + fisheye_mei_camera_data = str(fisheye_mei_camera_data) + elif isinstance(fisheye_mei_camera_data, bytes): + fisheye_mei_camera_data = fisheye_mei_camera_data + elif isinstance(fisheye_mei_camera_data, np.ndarray): + _, encoded_img = cv2.imencode(".jpg", fisheye_mei_camera_data) + fisheye_mei_camera_data = encoded_img.tobytes() + + record_batch_data[f"{fisheye_mei_camera_name}_data"] = [fisheye_mei_camera_data] + record_batch_data[f"{fisheye_mei_camera_name}_extrinsic"] = [ + fisheye_mei_camera_pose.array if fisheye_mei_camera_pose else None + ] # -------------------------------------------------------------------------------------------------------------- # LiDARs @@ -285,21 +321,44 @@ def _build_schema(dataset_converter_config: DatasetConverterConfig, log_metadata ) # -------------------------------------------------------------------------------------------------------------- - # Cameras + # Pinhole Cameras # -------------------------------------------------------------------------------------------------------------- - if dataset_converter_config.include_cameras: - for camera_type in log_metadata.camera_metadata.keys(): - camera_name = camera_type.serialize() + if dataset_converter_config.include_pinhole_cameras: + for pinhole_camera_type in log_metadata.pinhole_camera_metadata.keys(): + pinhole_camera_name = pinhole_camera_type.serialize() # Depending on the storage option, define the schema for camera data - if dataset_converter_config.camera_store_option == "path": - schema_list.append((f"{camera_name}_data", pa.string())) + if dataset_converter_config.pinhole_camera_store_option == "path": + schema_list.append((f"{pinhole_camera_name}_data", pa.string())) + + elif dataset_converter_config.pinhole_camera_store_option == "binary": + schema_list.append((f"{pinhole_camera_name}_data", pa.binary())) + + elif dataset_converter_config.pinhole_camera_store_option == "mp4": + raise NotImplementedError("MP4 format is not yet supported, but planned for future releases.") + + # Add camera pose + schema_list.append((f"{pinhole_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_fisheye_mei_cameras: + for fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata.keys(): + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # Depending on the storage option, define the schema for camera data + if dataset_converter_config.fisheye_mei_camera_store_option == "path": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.string())) + + elif dataset_converter_config.fisheye_mei_camera_store_option == "binary": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.binary())) - elif dataset_converter_config.camera_store_option == "binary": - schema_list.append((f"{camera_name}_data", pa.binary())) + elif dataset_converter_config.fisheye_mei_camera_store_option == "mp4": + raise NotImplementedError("MP4 format is not yet supported, but planned for future releases.") # Add camera pose - schema_list.append((f"{camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + schema_list.append((f"{fisheye_mei_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) # -------------------------------------------------------------------------------------------------------------- # LiDARs diff --git a/src/py123d/conversion/map_writer/gpkg_map_writer.py b/src/py123d/conversion/map_writer/gpkg_map_writer.py index 5e68a411..289e1cc2 100644 --- a/src/py123d/conversion/map_writer/gpkg_map_writer.py +++ b/src/py123d/conversion/map_writer/gpkg_map_writer.py @@ -188,45 +188,70 @@ def _write_line_layer(self, layer: MapLayer, line_object: AbstractLineMapObject) self._map_data[layer]["geometry"].append(line_object.shapely_linestring) -def _map_ids_to_integer( - map_dfs: Dict[MapLayer, gpd.GeoDataFrame], -) -> None: +def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: + """Helper function to remap string IDs to integers in the map dataframes.""" # initialize id mappings lane_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE]["id"]) + lane_group_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE_GROUP]["id"]) + intersection_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.INTERSECTION]["id"]) + walkway_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.WALKWAY]["id"]) carpark_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.CARPARK]["id"]) generic_drivable_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.GENERIC_DRIVABLE]["id"]) - lane_group_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE_GROUP]["id"]) - - # Adjust cross reference in map_dfs[MapLayer.LANE] and map_dfs[MapLayer.LANE_GROUP] - map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].map( - lane_group_id_mapping.str_to_int - ) - map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( - lambda x: lane_id_mapping.map_list(x) - ) - - # Adjust predecessor/successor in map_dfs[MapLayer.LANE] and map_dfs[MapLayer.LANE_GROUP] - for column in ["predecessor_ids", "successor_ids"]: - map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) - map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( + road_line_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_LINE]["id"]) + road_edge_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_EDGE]["id"]) + + # 1. Remap lane ids in LANE layer + if len(map_dfs[MapLayer.LANE]) > 0: + map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].apply(lambda x: lane_id_mapping.map(x)) + map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].apply( + lambda x: lane_group_id_mapping.map(x) + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) + for column in ["left_lane_id", "right_lane_id"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map(x)) + + # 2. Remap lane group ids in LANE_GROUP + if len(map_dfs[MapLayer.LANE_GROUP]) > 0: + map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].apply( + lambda x: lane_group_id_mapping.map(x) + ) + map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( + lambda x: lane_id_mapping.map_list(x) + ) + map_dfs[MapLayer.LANE_GROUP]["intersection_id"] = map_dfs[MapLayer.LANE_GROUP]["intersection_id"].apply( + lambda x: intersection_id_mapping.map(x) + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( + lambda x: lane_group_id_mapping.map_list(x) + ) + + # 3. Remap lane group ids in INTERSECTION + if len(map_dfs[MapLayer.INTERSECTION]) > 0: + map_dfs[MapLayer.INTERSECTION]["id"] = map_dfs[MapLayer.INTERSECTION]["id"].apply( + lambda x: intersection_id_mapping.map(x) + ) + map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( lambda x: lane_group_id_mapping.map_list(x) ) - for column in ["left_lane_id", "right_lane_id"]: - map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply( - lambda x: str(lane_id_mapping.str_to_int[x]) if pd.notna(x) and x is not None else x + # 4. Remap ids in other layers + if len(map_dfs[MapLayer.WALKWAY]) > 0: + map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].apply(lambda x: walkway_id_mapping.map(x)) + if len(map_dfs[MapLayer.CARPARK]) > 0: + map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].apply(lambda x: carpark_id_mapping.map(x)) + if len(map_dfs[MapLayer.GENERIC_DRIVABLE]) > 0: + map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].apply( + lambda x: generic_drivable_id_mapping.map(x) + ) + if len(map_dfs[MapLayer.ROAD_LINE]) > 0: + map_dfs[MapLayer.ROAD_LINE]["id"] = map_dfs[MapLayer.ROAD_LINE]["id"].apply( + lambda x: road_line_id_mapping.map(x) + ) + if len(map_dfs[MapLayer.ROAD_EDGE]) > 0: + map_dfs[MapLayer.ROAD_EDGE]["id"] = map_dfs[MapLayer.ROAD_EDGE]["id"].apply( + lambda x: road_edge_id_mapping.map(x) ) - - map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].map(lane_id_mapping.str_to_int) - map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].map(walkway_id_mapping.str_to_int) - map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].map(carpark_id_mapping.str_to_int) - map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].map( - generic_drivable_id_mapping.str_to_int - ) - map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].map(lane_group_id_mapping.str_to_int) - - map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( - lambda x: lane_group_id_mapping.map_list(x) - ) diff --git a/src/py123d/conversion/map_writer/utils/gpkg_utils.py b/src/py123d/conversion/map_writer/utils/gpkg_utils.py index 6ad4e559..2b9ab334 100644 --- a/src/py123d/conversion/map_writer/utils/gpkg_utils.py +++ b/src/py123d/conversion/map_writer/utils/gpkg_utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import pandas as pd @@ -16,14 +16,29 @@ def __post_init__(self): @classmethod def from_series(cls, series: pd.Series) -> IntIDMapping: - unique_ids = series.unique() + # Drop NaN values and convert all to strings + unique_ids = series.dropna().astype(str).unique() str_to_int = {str_id: idx for idx, str_id in enumerate(unique_ids)} return IntIDMapping(str_to_int) - def map_list(self, id_list: Optional[List[str]]) -> pd.Series: + def map(self, str_like: Any) -> Optional[int]: + # Handle NaN and None values + if pd.isna(str_like) or str_like is None: + return None + + # Convert to string for uniform handling + str_key = str(str_like) + return self.str_to_int.get(str_key, None) + + def map_list(self, id_list: Optional[List[str]]) -> List[int]: if id_list is None: return [] - return [self.str_to_int.get(id_str, -1) for id_str in id_list] + list_ = [] + for id_str in id_list: + mapped_id = self.map(id_str) + if mapped_id is not None: + list_.append(mapped_id) + return list_ class IncrementalIntIDMapping: diff --git a/src/py123d/conversion/registry/lidar_index_registry.py b/src/py123d/conversion/registry/lidar_index_registry.py index b58e4fce..a65903b4 100644 --- a/src/py123d/conversion/registry/lidar_index_registry.py +++ b/src/py123d/conversion/registry/lidar_index_registry.py @@ -28,14 +28,14 @@ def XYZ(self) -> slice: @register_lidar_index -class DefaultLidarIndex(LiDARIndex): +class DefaultLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @register_lidar_index -class NuPlanLidarIndex(LiDARIndex): +class NuPlanLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -44,7 +44,7 @@ class NuPlanLidarIndex(LiDARIndex): @register_lidar_index -class CARLALidarIndex(LiDARIndex): +class CARLALiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -52,7 +52,7 @@ class CARLALidarIndex(LiDARIndex): @register_lidar_index -class WOPDLidarIndex(LiDARIndex): +class WOPDLiDARIndex(LiDARIndex): RANGE = 0 INTENSITY = 1 ELONGATION = 2 @@ -62,7 +62,15 @@ class WOPDLidarIndex(LiDARIndex): @register_lidar_index -class AVSensorLidarIndex(LiDARIndex): +class Kitti360LiDARIndex(LiDARIndex): + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + + +@register_lidar_index +class AVSensorLiDARIndex(LiDARIndex): """Argoverse Sensor LiDAR Indexing Scheme. NOTE: The LiDAR files also include, 'offset_ns', which we do not currently include. @@ -75,7 +83,7 @@ class AVSensorLidarIndex(LiDARIndex): @register_lidar_index -class PandasetLidarIndex(LiDARIndex): +class PandasetLiDARIndex(LiDARIndex): """Pandaset LiDAR Indexing Scheme.""" X = 0 @@ -85,7 +93,7 @@ class PandasetLidarIndex(LiDARIndex): @register_lidar_index -class NuScenesLidarIndex(LiDARIndex): +class NuScenesLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 diff --git a/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py index 327db77c..4e9684e7 100644 --- a/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py +++ b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py @@ -4,7 +4,7 @@ from omegaconf import DictConfig from pyparsing import Union -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraMetadata +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraMetadata from py123d.script.utils.dataset_path_utils import get_dataset_paths DATASET_PATHS: DictConfig = get_dataset_paths() diff --git a/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py index 80948ef3..61473f08 100644 --- a/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py @@ -4,7 +4,7 @@ import numpy as np import numpy.typing as npt -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata # TODO: add to config DRACO_QUANTIZATION_BITS: Final[int] = 14 diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py index 82c08efc..1a9e2583 100644 --- a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -6,7 +6,7 @@ from omegaconf import DictConfig from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType from py123d.script.utils.dataset_path_utils import get_dataset_paths DATASET_PATHS: DictConfig = get_dataset_paths() @@ -15,6 +15,7 @@ "av2-sensor": DATASET_PATHS.av2_sensor_data_root, "wopd": DATASET_PATHS.wopd_data_root, "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, "nuscenes": DATASET_PATHS.nuscenes_sensor_root, } @@ -35,7 +36,7 @@ def load_lidar_pcs_from_file( assert sensor_root is not None, f"Dataset path for sensor loading not found for dataset: {log_metadata.dataset}" full_lidar_path = Path(sensor_root) / relative_path - assert full_lidar_path.exists(), f"LiDAR file not found: {full_lidar_path}" + assert full_lidar_path.exists(), f"LiDAR file not found: {sensor_root} / {relative_path}" # NOTE: We move data specific import into if-else block, to avoid data specific import errors if log_metadata.dataset == "nuplan": @@ -58,10 +59,16 @@ def load_lidar_pcs_from_file( lidar_pcs_dict = load_pandaset_lidars_pcs_from_file(full_lidar_path, index) + elif log_metadata.dataset == "kitti360": + from py123d.conversion.datasets.kitti360.kitti360_sensor_io import load_kitti360_lidar_pcs_from_file + + lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path, log_metadata) + elif log_metadata.dataset == "nuscenes": from py123d.conversion.datasets.nuscenes.nuscenes_sensor_io import load_nuscenes_lidar_pcs_from_file lidar_pcs_dict = load_nuscenes_lidar_pcs_from_file(full_lidar_path, log_metadata) + else: raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") diff --git a/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py index cedfb2b6..b109c7ca 100644 --- a/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py @@ -4,7 +4,7 @@ import numpy as np import numpy.typing as npt -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata def encode_lidar_pc_as_laz_binary(point_cloud: npt.NDArray[np.float32], lidar_metadata: LiDARMetadata) -> bytes: diff --git a/src/py123d/datatypes/maps/abstract_map_objects.py b/src/py123d/datatypes/maps/abstract_map_objects.py index de43bc81..baea8d87 100644 --- a/src/py123d/datatypes/maps/abstract_map_objects.py +++ b/src/py123d/datatypes/maps/abstract_map_objects.py @@ -2,11 +2,10 @@ import abc from typing import List, Optional, Tuple, Union -from typing_extensions import TypeAlias - import shapely.geometry as geom import trimesh +from typing_extensions import TypeAlias from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType from py123d.geometry import Polyline2D, Polyline3D, PolylineSE2 diff --git a/src/py123d/datatypes/scene/abstract_scene.py b/src/py123d/datatypes/scene/abstract_scene.py index 0270db09..33611539 100644 --- a/src/py123d/datatypes/scene/abstract_scene.py +++ b/src/py123d/datatypes/scene/abstract_scene.py @@ -7,8 +7,9 @@ from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.maps.abstract_map import AbstractMap from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -53,7 +54,15 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: raise NotImplementedError @abc.abstractmethod - def get_camera_at_iteration(self, iteration: int, camera_type: PinholeCameraType) -> Optional[PinholeCamera]: + def get_pinhole_camera_at_iteration( + self, iteration: int, camera_type: PinholeCameraType + ) -> Optional[PinholeCamera]: + raise NotImplementedError + + @abc.abstractmethod + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: raise NotImplementedError @abc.abstractmethod @@ -78,8 +87,12 @@ def vehicle_parameters(self) -> VehicleParameters: return self.log_metadata.vehicle_parameters @property - def available_camera_types(self) -> List[PinholeCameraType]: - return list(self.log_metadata.camera_metadata.keys()) + def available_pinhole_camera_types(self) -> List[PinholeCameraType]: + return list(self.log_metadata.pinhole_camera_metadata.keys()) + + @property + def available_fisheye_mei_camera_types(self) -> List[FisheyeMEICameraType]: + return list(self.log_metadata.fisheye_mei_camera_metadata.keys()) @property def available_lidar_types(self) -> List[LiDARType]: diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene.py b/src/py123d/datatypes/scene/arrow/arrow_scene.py index 7d89b786..79fd4d87 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene.py @@ -19,8 +19,9 @@ ) from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -37,11 +38,11 @@ def __init__( self._log_metadata: LogMetadata = get_log_metadata_from_arrow(arrow_file_path) with pa.memory_map(str(self._arrow_file_path), "r") as source: - reader = pa.ipc.open_file(source) - table = reader.read_all() + reader = pa.ipc.open_file(source) + table = reader.read_all() num_rows = table.num_rows - initial_uuid = table['uuid'][0].as_py() - + initial_uuid = table["uuid"][0].as_py() + if scene_extraction_metadata is None: scene_extraction_metadata = SceneExtractionMetadata( initial_uuid=initial_uuid, @@ -127,16 +128,31 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: route_lane_group_ids = table["route_lane_group_ids"][self._get_table_index(iteration)].as_py() return route_lane_group_ids - def get_camera_at_iteration(self, iteration: int, camera_type: PinholeCameraType) -> Optional[PinholeCamera]: - camera: Optional[PinholeCamera] = None - if camera_type in self.available_camera_types: - camera = get_camera_from_arrow_table( + def get_pinhole_camera_at_iteration( + self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] + ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + pinhole_camera: Optional[PinholeCamera] = None + if camera_type in self.available_pinhole_camera_types: + pinhole_camera = get_camera_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + camera_type, + self.log_metadata, + ) + return pinhole_camera + + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: + fisheye_mei_camera: Optional[FisheyeMEICamera] = None + if camera_type in self.available_pinhole_camera_types: + fisheye_mei_camera = get_camera_from_arrow_table( self._get_recording_table(), self._get_table_index(iteration), camera_type, self.log_metadata, ) - return camera + return fisheye_mei_camera def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> Optional[LiDAR]: lidar: Optional[LiDAR] = None diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py index e5840f76..2afebbba 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py @@ -102,22 +102,23 @@ def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFil recording_table = get_lru_cached_arrow_table(log_path) log_metadata = get_log_metadata_from_arrow(log_path) + start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 + end_idx = ( + len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) + if filter.duration_s is not None + else len(recording_table) + ) + # 1. Filter location if ( filter.locations is not None and log_metadata.map_metadata is not None and log_metadata.map_metadata.location not in filter.locations ): - return scene_extraction_metadatas + pass - start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 - end_idx = ( - len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) - if filter.duration_s is not None - else len(recording_table) - ) - if filter.duration_s is None: - return [ + elif filter.duration_s is None: + scene_extraction_metadatas.append( SceneExtractionMetadata( initial_uuid=str(recording_table["uuid"][start_idx].as_py()), initial_idx=start_idx, @@ -125,48 +126,75 @@ def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFil history_s=filter.history_s if filter.history_s is not None else 0.0, iteration_duration_s=log_metadata.timestep_seconds, ) - ] - - scene_uuid_set = set(filter.scene_uuids) if filter.scene_uuids is not None else None - - for idx in range(start_idx, end_idx): - scene_extraction_metadata: Optional[SceneExtractionMetadata] = None - - if scene_uuid_set is None: - scene_extraction_metadata = SceneExtractionMetadata( - initial_uuid=str(recording_table["uuid"][idx].as_py()), - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) - elif str(recording_table["uuid"][idx]) in scene_uuid_set: - scene_extraction_metadata = SceneExtractionMetadata( - initial_uuid=str(recording_table["uuid"][idx].as_py()), - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) + ) + else: + scene_uuid_set = set(filter.scene_uuids) if filter.scene_uuids is not None else None + for idx in range(start_idx, end_idx): + scene_extraction_metadata: Optional[SceneExtractionMetadata] = None + + if scene_uuid_set is None: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) + elif str(recording_table["uuid"][idx]) in scene_uuid_set: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) - if scene_extraction_metadata is not None: - # Check of timestamp threshold exceeded between previous scene, if specified in filter - if filter.timestamp_threshold_s is not None and len(scene_extraction_metadatas) > 0: - iteration_delta = idx - scene_extraction_metadatas[-1].initial_idx - if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: + if scene_extraction_metadata is not None: + # Check of timestamp threshold exceeded between previous scene, if specified in filter + if filter.timestamp_threshold_s is not None and len(scene_extraction_metadatas) > 0: + iteration_delta = idx - scene_extraction_metadatas[-1].initial_idx + if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: + continue + + scene_extraction_metadatas.append(scene_extraction_metadata) + + scene_extraction_metadatas_ = [] + for scene_extraction_metadata in scene_extraction_metadatas: + + add_scene = True + start_idx = scene_extraction_metadata.initial_idx + if filter.pinhole_camera_types is not None: + for pinhole_camera_type in filter.pinhole_camera_types: + column_name = f"{pinhole_camera_type.serialize()}_data" + + if ( + pinhole_camera_type in log_metadata.pinhole_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): continue - - # Check if camera data is available for the scene, if specified in filter - # NOTE: We only check camera availability at the initial index of the scene. - if filter.camera_types is not None: - cameras_available = [ - recording_table[f"{camera_type.serialize()}_data"][start_idx].as_py() is not None - for camera_type in filter.camera_types - ] - if not all(cameras_available): + else: + add_scene = False + break + + if filter.fisheye_mei_camera_types is not None: + for fisheye_mei_camera_type in filter.fisheye_mei_camera_types: + column_name = f"{fisheye_mei_camera_type.serialize()}_data" + + if ( + fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): continue + else: + add_scene = False + break + + if add_scene: + scene_extraction_metadatas_.append(scene_extraction_metadata) - scene_extraction_metadatas.append(scene_extraction_metadata) + scene_extraction_metadatas = scene_extraction_metadatas_ del recording_table, log_metadata return scene_extraction_metadatas diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py index 6345df02..8aea7801 100644 --- a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import cv2 import numpy as np @@ -7,6 +7,7 @@ import pyarrow as pa from omegaconf import DictConfig +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex from py123d.conversion.sensor_io.lidar.draco_lidar_io import load_lidar_from_draco_binary from py123d.conversion.sensor_io.lidar.file_lidar_io import load_lidar_pcs_from_file from py123d.conversion.sensor_io.lidar.laz_lidar_io import load_lidar_from_laz_binary @@ -23,9 +24,9 @@ TrafficLightStatus, ) from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import DefaultLidarIndex +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -39,6 +40,7 @@ "nuscenes": DATASET_PATHS.nuscenes_data_root, "wopd": DATASET_PATHS.wopd_data_root, "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, } @@ -106,9 +108,9 @@ def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: def get_camera_from_arrow_table( arrow_table: pa.Table, index: int, - camera_type: PinholeCameraType, + camera_type: Union[PinholeCameraType, FisheyeMEICameraType], log_metadata: LogMetadata, -) -> PinholeCamera: +) -> Union[PinholeCamera, FisheyeMEICamera]: camera_name = camera_type.serialize() table_data = arrow_table[f"{camera_name}_data"][index].as_py() @@ -135,11 +137,19 @@ def get_camera_from_arrow_table( else: raise NotImplementedError("Only string file paths for camera data are supported.") - return PinholeCamera( - metadata=log_metadata.camera_metadata[camera_type], - image=image, - extrinsic=extrinsic, - ) + camera_metadata = log_metadata.pinhole_camera_metadata[camera_type] + if hasattr(camera_metadata, "mirror_parameter") and camera_metadata.mirror_parameter is not None: + return FisheyeMEICamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) + else: + return PinholeCamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) def get_lidar_from_arrow_table( @@ -159,7 +169,6 @@ def get_lidar_from_arrow_table( if lidar_column_name in arrow_table.schema.names: lidar_data = arrow_table[lidar_column_name][index].as_py() - if isinstance(lidar_data, str): lidar_pc_dict = load_lidar_pcs_from_file(relative_path=lidar_data, log_metadata=log_metadata, index=index) if lidar_type == LiDARType.LIDAR_MERGED: @@ -168,7 +177,7 @@ def get_lidar_from_arrow_table( lidar = LiDAR( metadata=LiDARMetadata( lidar_type=LiDARType.LIDAR_MERGED, - lidar_index=DefaultLidarIndex, + lidar_index=DefaultLiDARIndex, extrinsic=None, ), point_cloud=merged_pc, @@ -182,7 +191,7 @@ def get_lidar_from_arrow_table( lidar_metadata = log_metadata.lidar_metadata[lidar_type] if lidar_data.startswith(b"DRACO"): # NOTE: DRACO only allows XYZ compression, so we need to override the lidar index here. - lidar_metadata.lidar_index = DefaultLidarIndex + lidar_metadata.lidar_index = DefaultLiDARIndex lidar = load_lidar_from_draco_binary(lidar_data, lidar_metadata) elif lidar_data.startswith(b"LASF"): diff --git a/src/py123d/datatypes/scene/scene_filter.py b/src/py123d/datatypes/scene/scene_filter.py index 8d6cf102..62ad9301 100644 --- a/src/py123d/datatypes/scene/scene_filter.py +++ b/src/py123d/datatypes/scene/scene_filter.py @@ -1,7 +1,9 @@ from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Union -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.common.utils.enums import SerialIntEnum +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType # TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) @@ -11,7 +13,6 @@ class SceneFilter: split_types: Optional[List[str]] = None split_names: Optional[List[str]] = None - # scene_tags: List[str] = None log_names: Optional[List[str]] = None locations: Optional[List[str]] = None # TODO: @@ -23,22 +24,21 @@ class SceneFilter: duration_s: Optional[float] = 10.0 history_s: Optional[float] = 3.0 - camera_types: Optional[List[PinholeCameraType]] = None + pinhole_camera_types: Optional[List[PinholeCameraType]] = None + fisheye_mei_camera_types: Optional[List[FisheyeMEICameraType]] = None max_num_scenes: Optional[int] = None shuffle: bool = False def __post_init__(self): - if self.camera_types is not None: - assert isinstance(self.camera_types, list), "camera_types must be a list of CameraType" - camera_types = [] - for camera_type in self.camera_types: - if isinstance(camera_type, str): - camera_type = PinholeCameraType.deserialize[camera_type] - camera_types.append(camera_type) - elif isinstance(camera_type, int): - camera_type = PinholeCameraType(camera_type) - camera_types.append(camera_type) - else: - raise ValueError(f"Invalid camera type: {camera_type}") - self.camera_types = camera_types + def _resolve_enum_arguments( + serial_enum_cls: SerialIntEnum, input: Optional[List[Union[int, str, SerialIntEnum]]] + ) -> List[SerialIntEnum]: + + if input is None: + return None + assert isinstance(input, list), f"input must be a list of {serial_enum_cls.__name__}" + return [serial_enum_cls.from_arbitrary(value) for value in input] + + self.pinhole_camera_types = _resolve_enum_arguments(PinholeCameraType, self.pinhole_camera_types) + self.fisheye_mei_camera_types = _resolve_enum_arguments(FisheyeMEICameraType, self.fisheye_mei_camera_types) diff --git a/src/py123d/datatypes/scene/scene_metadata.py b/src/py123d/datatypes/scene/scene_metadata.py index eb42019c..751b9e04 100644 --- a/src/py123d/datatypes/scene/scene_metadata.py +++ b/src/py123d/datatypes/scene/scene_metadata.py @@ -5,8 +5,9 @@ import py123d from py123d.datatypes.maps.map_metadata import MapMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraMetadata, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraMetadata, PinholeCameraType from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -20,7 +21,8 @@ class LogMetadata: timestep_seconds: float vehicle_parameters: Optional[VehicleParameters] = None - camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = field(default_factory=dict) + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = field(default_factory=dict) + fisheye_mei_camera_metadata: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = field(default_factory=dict) lidar_metadata: Dict[LiDARType, LiDARMetadata] = field(default_factory=dict) map_metadata: Optional[MapMetadata] = None @@ -29,17 +31,31 @@ class LogMetadata: @classmethod def from_dict(cls, data_dict: Dict) -> LogMetadata: + # Ego Vehicle Parameters if data_dict["vehicle_parameters"] is not None: data_dict["vehicle_parameters"] = VehicleParameters.from_dict(data_dict["vehicle_parameters"]) - data_dict["camera_metadata"] = { - PinholeCameraType.deserialize(key): PinholeCameraMetadata.from_dict(value) - for key, value in data_dict.get("camera_metadata", {}).items() - } + # Pinhole Camera Metadata + pinhole_camera_metadata = {} + for key, value in data_dict.get("pinhole_camera_metadata", {}).items(): + pinhole_camera_metadata[PinholeCameraType.deserialize(key)] = PinholeCameraMetadata.from_dict(value) + data_dict["pinhole_camera_metadata"] = pinhole_camera_metadata + + # Fisheye MEI Camera Metadata + fisheye_mei_camera_metadata = {} + for key, value in data_dict.get("fisheye_mei_camera_metadata", {}).items(): + fisheye_mei_camera_metadata[FisheyeMEICameraType.deserialize(key)] = FisheyeMEICameraMetadata.from_dict( + value + ) + data_dict["fisheye_mei_camera_metadata"] = fisheye_mei_camera_metadata + + # LiDAR Metadata data_dict["lidar_metadata"] = { LiDARType.deserialize(key): LiDARMetadata.from_dict(value) for key, value in data_dict.get("lidar_metadata", {}).items() } + + # Map Metadata if data_dict["map_metadata"] is not None: data_dict["map_metadata"] = MapMetadata.from_dict(data_dict["map_metadata"]) @@ -48,7 +64,12 @@ def from_dict(cls, data_dict: Dict) -> LogMetadata: def to_dict(self) -> Dict: data_dict = asdict(self) data_dict["vehicle_parameters"] = self.vehicle_parameters.to_dict() if self.vehicle_parameters else None - data_dict["camera_metadata"] = {key.serialize(): value.to_dict() for key, value in self.camera_metadata.items()} + data_dict["pinhole_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.pinhole_camera_metadata.items() + } + data_dict["fisheye_mei_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.fisheye_mei_camera_metadata.items() + } data_dict["lidar_metadata"] = {key.serialize(): value.to_dict() for key, value in self.lidar_metadata.items()} data_dict["map_metadata"] = self.map_metadata.to_dict() if self.map_metadata else None return data_dict diff --git a/src/py123d/datatypes/sensors/__init__.py b/src/py123d/datatypes/sensors/__init__.py index 89175f33..54cd70a1 100644 --- a/src/py123d/datatypes/sensors/__init__.py +++ b/src/py123d/datatypes/sensors/__init__.py @@ -1,4 +1,4 @@ -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraType, PinholeCamera, PinholeIntrinsicsIndex, @@ -7,3 +7,16 @@ PinholeDistortion, PinholeCameraMetadata, ) +from py123d.datatypes.sensors.fisheye_mei_camera import ( + FisheyeMEICameraType, + FisheyeMEICamera, + FisheyeMEIDistortionIndex, + FisheyeMEIProjectionIndex, + FisheyeMEIProjection, + FisheyeMEICameraMetadata, +) +from py123d.datatypes.sensors.lidar import ( + LiDARType, + LiDARMetadata, + LiDAR, +) diff --git a/src/py123d/datatypes/sensors/fisheye_mei_camera.py b/src/py123d/datatypes/sensors/fisheye_mei_camera.py new file mode 100644 index 00000000..d8f53f14 --- /dev/null +++ b/src/py123d/datatypes/sensors/fisheye_mei_camera.py @@ -0,0 +1,193 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + +import numpy as np +import numpy.typing as npt +from zmq import IntEnum + +from py123d.common.utils.enums import SerialIntEnum +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.se import StateSE3 + + +class FisheyeMEICameraType(SerialIntEnum): + """ + Enum for fisheye cameras in d123. + """ + + FCAM_L = 0 + FCAM_R = 1 + + +@dataclass +class FisheyeMEICamera: + + metadata: FisheyeMEICameraMetadata + image: npt.NDArray[np.uint8] + extrinsic: StateSE3 + + +class FisheyeMEIDistortionIndex(IntEnum): + + K1 = 0 + K2 = 1 + P1 = 2 + P2 = 3 + + +class FisheyeMEIDistortion(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, k1: float, k2: float, p1: float, p2: float) -> None: + array = np.zeros(len(FisheyeMEIDistortionIndex), dtype=np.float64) + array[FisheyeMEIDistortionIndex.K1] = k1 + array[FisheyeMEIDistortionIndex.K2] = k2 + array[FisheyeMEIDistortionIndex.P1] = p1 + array[FisheyeMEIDistortionIndex.P2] = p2 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIDistortion: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIDistortionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def k1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K1] + + @property + def k2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K2] + + @property + def p1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P1] + + @property + def p2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P2] + + +class FisheyeMEIProjectionIndex(IntEnum): + + GAMMA1 = 0 + GAMMA2 = 1 + U0 = 2 + V0 = 3 + + +class FisheyeMEIProjection(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, gamma1: float, gamma2: float, u0: float, v0: float) -> None: + array = np.zeros(len(FisheyeMEIProjectionIndex), dtype=np.float64) + array[FisheyeMEIProjectionIndex.GAMMA1] = gamma1 + array[FisheyeMEIProjectionIndex.GAMMA2] = gamma2 + array[FisheyeMEIProjectionIndex.U0] = u0 + array[FisheyeMEIProjectionIndex.V0] = v0 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIProjection: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIProjectionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def gamma1(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA1] + + @property + def gamma2(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA2] + + @property + def u0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.U0] + + @property + def v0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.V0] + + +@dataclass +class FisheyeMEICameraMetadata: + + camera_type: FisheyeMEICameraType + mirror_parameter: Optional[float] + distortion: Optional[FisheyeMEIDistortion] + projection: Optional[FisheyeMEIProjection] + width: int + height: int + + @classmethod + def from_dict(cls, data_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: + data_dict["camera_type"] = FisheyeMEICameraType(data_dict["camera_type"]) + data_dict["distortion"] = ( + FisheyeMEIDistortion.from_array(np.array(data_dict["distortion"])) + if data_dict["distortion"] is not None + else None + ) + data_dict["projection"] = ( + FisheyeMEIProjection.from_array(np.array(data_dict["projection"])) + if data_dict["projection"] is not None + else None + ) + return FisheyeMEICameraMetadata(**data_dict) + + def to_dict(self) -> Dict[str, Any]: + data_dict = asdict(self) + data_dict["camera_type"] = int(self.camera_type) + data_dict["distortion"] = self.distortion.array.tolist() if self.distortion is not None else None + data_dict["projection"] = self.projection.array.tolist() if self.projection is not None else None + return data_dict + + def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """camera coordinate to image plane""" + norm = np.linalg.norm(points_3d, axis=1) + + x = points_3d[:, 0] / norm + y = points_3d[:, 1] / norm + z = points_3d[:, 2] / norm + + x /= z + self.mirror_parameter + y /= z + self.mirror_parameter + + if self.distortion is not None: + k1 = self.distortion.k1 + k2 = self.distortion.k2 + else: + k1 = k2 = 0.0 + + if self.projection is not None: + gamma1 = self.projection.gamma1 + gamma2 = self.projection.gamma2 + u0 = self.projection.u0 + v0 = self.projection.v0 + else: + gamma1 = gamma2 = 1.0 + u0 = v0 = 0.0 + + ro2 = x * x + y * y + x *= 1 + k1 * ro2 + k2 * ro2 * ro2 + y *= 1 + k1 * ro2 + k2 * ro2 * ro2 + + x = gamma1 * x + u0 + y = gamma2 * y + v0 + + return x, y, norm * points_3d[:, 2] / np.abs(points_3d[:, 2]) diff --git a/src/py123d/datatypes/sensors/lidar/lidar.py b/src/py123d/datatypes/sensors/lidar.py similarity index 100% rename from src/py123d/datatypes/sensors/lidar/lidar.py rename to src/py123d/datatypes/sensors/lidar.py diff --git a/src/py123d/datatypes/sensors/lidar/__init__.py b/src/py123d/datatypes/sensors/lidar/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/datatypes/sensors/lidar/lidar_index.py b/src/py123d/datatypes/sensors/lidar/lidar_index.py deleted file mode 100644 index b4864cac..00000000 --- a/src/py123d/datatypes/sensors/lidar/lidar_index.py +++ /dev/null @@ -1,95 +0,0 @@ -from enum import IntEnum - -from py123d.common.utils.enums import classproperty - -LIDAR_INDEX_REGISTRY = {} - - -def register_lidar_index(enum_class): - LIDAR_INDEX_REGISTRY[enum_class.__name__] = enum_class - return enum_class - - -class LiDARIndex(IntEnum): - - @classproperty - def XY(self) -> slice: - """ - Returns a slice for the XY coordinates of the LiDAR point cloud. - """ - return slice(self.X, self.Y + 1) - - @classproperty - def XYZ(self) -> slice: - """ - Returns a slice for the XYZ coordinates of the LiDAR point cloud. - """ - return slice(self.X, self.Z + 1) - - -@register_lidar_index -class DefaultLidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - - -@register_lidar_index -class NuPlanLidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - RING = 4 - ID = 5 - - -@register_lidar_index -class CARLALidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class WOPDLidarIndex(LiDARIndex): - RANGE = 0 - INTENSITY = 1 - ELONGATION = 2 - X = 3 - Y = 4 - Z = 5 - - -@register_lidar_index -class AVSensorLidarIndex(LiDARIndex): - """Argoverse Sensor LiDAR Indexing Scheme. - - NOTE: The LiDAR files also include, 'offset_ns', which we do not currently include. - """ - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class PandasetLidarIndex(LiDARIndex): - """Pandaset LiDAR Indexing Scheme.""" - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class NuScenesLidarIndex(LiDARIndex): - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - RING = 4 diff --git a/src/py123d/datatypes/sensors/camera/pinhole_camera.py b/src/py123d/datatypes/sensors/pinhole_camera.py similarity index 96% rename from src/py123d/datatypes/sensors/camera/pinhole_camera.py rename to src/py123d/datatypes/sensors/pinhole_camera.py index 0bb99be6..beefa883 100644 --- a/src/py123d/datatypes/sensors/camera/pinhole_camera.py +++ b/src/py123d/datatypes/sensors/pinhole_camera.py @@ -13,20 +13,17 @@ class PinholeCameraType(SerialIntEnum): - """ - Enum for cameras in py123d. - """ - - CAM_F0 = 0 - CAM_B0 = 1 - CAM_L0 = 2 - CAM_L1 = 3 - CAM_L2 = 4 - CAM_R0 = 5 - CAM_R1 = 6 - CAM_R2 = 7 - CAM_STEREO_L = 8 - CAM_STEREO_R = 9 + + PCAM_F0 = 0 + PCAM_B0 = 1 + PCAM_L0 = 2 + PCAM_L1 = 3 + PCAM_L2 = 4 + PCAM_R0 = 5 + PCAM_R1 = 6 + PCAM_R2 = 7 + PCAM_STEREO_L = 8 + PCAM_STEREO_R = 9 @dataclass diff --git a/src/py123d/datatypes/vehicle_state/vehicle_parameters.py b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py index 5e15c3b7..ca2a1944 100644 --- a/src/py123d/datatypes/vehicle_state/vehicle_parameters.py +++ b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py @@ -93,6 +93,23 @@ def get_wopd_chrysler_pacifica_parameters() -> VehicleParameters: ) +def get_kitti360_vw_passat_parameters() -> VehicleParameters: + # The KITTI-360 dataset uses a 2006 VW Passat Variant B6. + # https://en.wikipedia.org/wiki/Volkswagen_Passat_(B6) + # [1] https://scispace.com/pdf/team-annieway-s-autonomous-system-18ql8b7kki.pdf + # NOTE: Parameters are estimated from the vehicle model. + # https://www.cvlibs.net/datasets/kitti-360/documentation.php + return VehicleParameters( + vehicle_name="kitti360_vw_passat", + width=1.820, + length=4.775, + height=1.516, + wheel_base=2.709, + rear_axle_to_center_vertical=1.516 / 2 - 0.9, + rear_axle_to_center_longitudinal=1.3369, + ) + + def get_av2_ford_fusion_hybrid_parameters() -> VehicleParameters: # NOTE: Parameters are estimated from the vehicle model. # https://en.wikipedia.org/wiki/Ford_Fusion_Hybrid#Second_generation diff --git a/src/py123d/geometry/transform/transform_se3.py b/src/py123d/geometry/transform/transform_se3.py index 8bf907ba..8f394772 100644 --- a/src/py123d/geometry/transform/transform_se3.py +++ b/src/py123d/geometry/transform/transform_se3.py @@ -206,10 +206,11 @@ def convert_points_3d_array_between_origins( assert points_3d_array.ndim >= 1 assert points_3d_array.shape[-1] == len(Point3DIndex) - abs_points = points_3d_array @ R_from.T + t_from - new_rel_points = (abs_points - t_to) @ R_to + R_rel = R_to.T @ R_from # Relative rotation matrix + t_rel = R_to.T @ (t_from - t_to) # Relative translation - return new_rel_points + conv_points_3d_array = (R_rel @ points_3d_array.T).T + t_rel + return conv_points_3d_array def translate_se3_along_z(state_se3: StateSE3, distance: float) -> StateSE3: diff --git a/src/py123d/script/config/common/default_dataset_paths.yaml b/src/py123d/script/config/common/default_dataset_paths.yaml index b4941707..81d37227 100644 --- a/src/py123d/script/config/common/default_dataset_paths.yaml +++ b/src/py123d/script/config/common/default_dataset_paths.yaml @@ -21,6 +21,9 @@ dataset_paths: # Pandaset defaults pandaset_data_root: ${oc.env:PANDASET_DATA_ROOT,null} + # KITTI360 defaults + kitti360_data_root: ${oc.env:KITTI360_DATA_ROOT,null} + # nuScenes defaults nuscenes_data_root: ${oc.env:NUSCENES_DATA_ROOT,null} nuscenes_map_root: ${dataset_paths.nuscenes_data_root} diff --git a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml index 77445192..cf2e553a 100644 --- a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml +++ b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml @@ -1,4 +1,5 @@ _target_: py123d.datatypes.scene.arrow.arrow_scene_builder.ArrowSceneBuilder _convert_: 'all' -dataset_path: ${dataset_paths.py123d_data_root} +logs_root: ${dataset_paths.py123d_logs_root} +maps_root: ${dataset_paths.py123d_maps_root} diff --git a/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml index 1a121fd9..ff8a2433 100644 --- a/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml @@ -21,18 +21,16 @@ av2_sensor_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "binary" # "path", "path_merged", "binary" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: + include_traffic_lights: false include_scenario_tags: false include_route: false + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/carla_dataset.yaml b/src/py123d/script/config/conversion/datasets/carla_dataset.yaml deleted file mode 100644 index c28ccb76..00000000 --- a/src/py123d/script/config/conversion/datasets/carla_dataset.yaml +++ /dev/null @@ -1,35 +0,0 @@ -carla_dataset: - _target_: py123d.conversion.datasets.carla.carla_data_converter.CarlaDataConverter - _convert_: 'all' - - splits: ["carla"] - log_path: "${oc.env:HOME}/carla_workspace/data" - - dataset_converter_config: - _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig - _convert_: 'all' - - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - - # Ego - include_ego: true - - # Box Detections - include_box_detections: true - - # Traffic Lights - include_traffic_lights: true - - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" - - # LiDARs - include_lidars: true - lidar_store_option: "path" # "path", "binary" - - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. - include_scenario_tags: true - include_route: true diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml new file mode 100644 index 00000000..4919ed79 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -0,0 +1,67 @@ +kitti360_dataset: + _target_: py123d.conversion.datasets.kitti360.kitti360_converter.Kitti360Converter + _convert_: 'all' + + splits: ["kitti360_train", "kitti360_val", "kitti360_test"] + + kitti360_data_root: ${dataset_paths.kitti360_data_root} + + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + detection_cache_root: ${dataset_paths.kitti360_data_root}/preprocessed_detections + detection_radius: 60.0 + + # NOTE: + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" + + # Fisheye Cameras + include_fisheye_mei_cameras: false + fisheye_mei_camera_store_option: "path" + + # LiDARs + include_lidars: true + lidar_store_option: "path" + + # Not available: + include_traffic_lights: false + include_route: false + include_scenario_tags: false + + # NOTE: Pandaset does not have official splits, so we create our own here. + # We use 80% of the logs for training, 10% for validation, and 10% for testing. + train_sequences: + - "2013_05_28_drive_0000_sync" + - "2013_05_28_drive_0002_sync" + - "2013_05_28_drive_0003_sync" + + + val_sequences: + - "2013_05_28_drive_0004_sync" + - "2013_05_28_drive_0005_sync" + - "2013_05_28_drive_0006_sync" + - "2013_05_28_drive_0007_sync" + + test_sequences: + - "2013_05_28_drive_0008_sync" + - "2013_05_28_drive_0009_sync" + - "2013_05_28_drive_0010_sync" + - "2013_05_28_drive_0018_sync" diff --git a/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml index 671b960c..19b0d0f2 100644 --- a/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml @@ -26,15 +26,17 @@ nuplan_dataset: # Traffic Lights include_traffic_lights: true - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "path_merged" # "path", "path_merged", "binary" # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. include_scenario_tags: true include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml index a59e67a7..50aea778 100644 --- a/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml @@ -26,15 +26,17 @@ nuplan_mini_dataset: # Traffic Lights include_traffic_lights: true - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # "path", "binary", "mp4" # LiDARs include_lidars: true - lidar_store_option: "binary" # "path", "path_merged", "binary" + lidar_store_option: "path_merged" # "path", "path_merged", "binary" # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. include_scenario_tags: true include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml index 0f3ab95e..7ad5834f 100644 --- a/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml @@ -24,13 +24,16 @@ nuscenes_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "path" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" - #lidar + # LiDARs include_lidars: true lidar_store_option: "path" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml index 4c9ba050..e7181c47 100644 --- a/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml @@ -24,13 +24,16 @@ nuscenes_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" - #lidar + # LiDARs include_lidars: true - lidar_store_option: "binary" + lidar_store_option: "path" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml index 51d8e18c..d70a2aab 100644 --- a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml @@ -12,31 +12,28 @@ pandaset_dataset: force_log_conversion: ${force_log_conversion} force_map_conversion: ${force_map_conversion} - # Map - include_map: false - # Ego include_ego: true # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # LiDARs include_lidars: true - lidar_store_option: "binary" # "path", "path_merged", "binary" + lidar_store_option: "binary" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: + include_map: false + include_fisheye_mei_cameras: false + include_traffic_lights: false include_scenario_tags: false include_route: false + # NOTE: Pandaset does not have official splits, so we create our own here. # We use 80% of the logs for training, 10% for validation, and 10% for testing. train_log_names: diff --git a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml index 441c4966..3fb5acee 100644 --- a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml @@ -25,18 +25,15 @@ wopd_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "binary" # "path", "path_merged", "binary" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: + include_traffic_lights: false include_scenario_tags: false include_route: false diff --git a/src/py123d/script/config/conversion/default_conversion.yaml b/src/py123d/script/config/conversion/default_conversion.yaml index daa55f12..4adf788b 100644 --- a/src/py123d/script/config/conversion/default_conversion.yaml +++ b/src/py123d/script/config/conversion/default_conversion.yaml @@ -16,7 +16,7 @@ defaults: - log_writer: arrow_log_writer - map_writer: gpkg_map_writer - datasets: - - ??? + - kitti360_dataset - _self_ diff --git a/src/py123d/script/run_conversion.py b/src/py123d/script/run_conversion.py index bb4fe510..c2510b9b 100644 --- a/src/py123d/script/run_conversion.py +++ b/src/py123d/script/run_conversion.py @@ -37,10 +37,17 @@ def main(cfg: DictConfig) -> None: logger.info(f"Processing dataset: {dataset_converter.__class__.__name__}") map_args = [{"map_index": i} for i in range(dataset_converter.get_number_of_maps())] + logger.info( + f"Found maps: {dataset_converter.get_number_of_maps()} for dataset: {dataset_converter.__class__.__name__}" + ) + worker_map(worker, partial(_convert_maps, cfg=cfg, dataset_converter=dataset_converter), map_args) logger.info(f"Finished maps: {dataset_converter.__class__.__name__}") log_args = [{"log_index": i} for i in range(dataset_converter.get_number_of_logs())] + logger.info( + f"Found logs: {dataset_converter.get_number_of_logs()} for dataset: {dataset_converter.__class__.__name__}" + ) worker_map(worker, partial(_convert_logs, cfg=cfg, dataset_converter=dataset_converter), log_args) logger.info(f"Finished logs: {dataset_converter.__class__.__name__}") diff --git a/src/py123d/visualization/matplotlib/camera.py b/src/py123d/visualization/matplotlib/camera.py index 9126655d..aadd0baf 100644 --- a/src/py123d/visualization/matplotlib/camera.py +++ b/src/py123d/visualization/matplotlib/camera.py @@ -12,7 +12,7 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeIntrinsics +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeIntrinsics from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry import BoundingBoxSE3Index, Corners3DIndex from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_se3_array @@ -73,20 +73,9 @@ def add_box_detections_to_camera_ax( camera: PinholeCamera, box_detections: BoxDetectionWrapper, ego_state_se3: EgoStateSE3, + return_image: bool = False, ) -> plt.Axes: - # box_labels = annotations.names - # boxes = _transform_annotations_to_camera( - # annotations.boxes, - # camera.sensor2lidar_rotation, - # camera.sensor2lidar_translation, - # ) - # box_positions, box_dimensions, box_heading = ( - # boxes[:, BoundingBoxIndex.POSITION], - # boxes[:, BoundingBoxIndex.DIMENSION], - # boxes[:, BoundingBoxIndex.HEADING], - # ) - box_detection_array = np.zeros((len(box_detections.box_detections), len(BoundingBoxSE3Index)), dtype=np.float64) detection_types = np.array( [detection.metadata.box_detection_type for detection in box_detections.box_detections], dtype=object @@ -123,6 +112,10 @@ def add_box_detections_to_camera_ax( box_corners, detection_types = box_corners[valid_corners], detection_types[valid_corners] image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, detection_types) + if return_image: + # ax.imshow(image) + return ax, image + ax.imshow(image) return ax diff --git a/src/py123d/visualization/matplotlib/plots.py b/src/py123d/visualization/matplotlib/plots.py index cbbdca61..01100f01 100644 --- a/src/py123d/visualization/matplotlib/plots.py +++ b/src/py123d/visualization/matplotlib/plots.py @@ -25,7 +25,8 @@ def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, ra point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d if map_api is not None: add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=route_lane_group_ids) - add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) + if traffic_light_detections is not None: + add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) add_box_detections_to_ax(ax, box_detections) add_ego_vehicle_to_ax(ax, ego_vehicle_state) diff --git a/src/py123d/visualization/matplotlib/utils.py b/src/py123d/visualization/matplotlib/utils.py index 3c552eac..81c60260 100644 --- a/src/py123d/visualization/matplotlib/utils.py +++ b/src/py123d/visualization/matplotlib/utils.py @@ -34,19 +34,20 @@ def _add_element_helper(element: geom.Polygon): # Create path with exterior and interior rings def create_polygon_path(polygon): # Get exterior coordinates - exterior_coords = list(polygon.exterior.coords) + # NOTE: Only take first two dimensions in case of 3D coords + exterior_coords = np.array(polygon.exterior.coords)[:, :2].tolist() # Start with exterior ring - vertices = exterior_coords + vertices_2d = exterior_coords codes = [Path.MOVETO] + [Path.LINETO] * (len(exterior_coords) - 2) + [Path.CLOSEPOLY] # Add interior rings (holes) for interior in polygon.interiors: interior_coords = list(interior.coords) - vertices.extend(interior_coords) + vertices_2d.extend(interior_coords) codes.extend([Path.MOVETO] + [Path.LINETO] * (len(interior_coords) - 2) + [Path.CLOSEPOLY]) - return Path(vertices, codes) + return Path(vertices_2d, codes) path = create_polygon_path(element) diff --git a/src/py123d/visualization/viser/elements/render_elements.py b/src/py123d/visualization/viser/elements/render_elements.py index 6df316b2..f807033e 100644 --- a/src/py123d/visualization/viser/elements/render_elements.py +++ b/src/py123d/visualization/viser/elements/render_elements.py @@ -1,7 +1,10 @@ +import numpy as np + from py123d.conversion.utils.sensor_utils.camera_conventions import convert_camera_convention from py123d.datatypes.scene.abstract_scene import AbstractScene from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry.geometry_index import StateSE3Index +from py123d.geometry.rotation import EulerAngles from py123d.geometry.se import StateSE3 from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame from py123d.geometry.vector import Vector3D @@ -16,10 +19,56 @@ def get_ego_3rd_person_view_position( ego_pose = scene.get_ego_state_at_iteration(iteration).rear_axle_se3.array ego_pose[StateSE3Index.XYZ] -= scene_center_array ego_pose_se3 = StateSE3.from_array(ego_pose) - ego_pose_se3 = translate_se3_along_body_frame(ego_pose_se3, Vector3D(-10.0, 0.0, 5.0)) + ego_pose_se3 = translate_se3_along_body_frame(ego_pose_se3, Vector3D(-15.0, 0.0, 15)) + + # adjust the pitch to -10 degrees. + # euler_angles_array = ego_pose_se3.euler_angles.array + # euler_angles_array[1] += np.deg2rad(30) + # new_quaternion = EulerAngles.from_array(euler_angles_array).quaternion + + ego_pose_se3 = _pitch_se3_by_degrees(ego_pose_se3, 30.0) return convert_camera_convention( ego_pose_se3, from_convention="pXpZmY", to_convention="pZmYpX", ) + + +def get_ego_bev_view_position( + scene: AbstractScene, + iteration: int, + initial_ego_state: EgoStateSE3, +) -> StateSE3: + scene_center_array = initial_ego_state.center.point_3d.array + ego_center = scene.get_ego_state_at_iteration(iteration).center.array + ego_center[StateSE3Index.XYZ] -= scene_center_array + ego_center_planar = StateSE3.from_array(ego_center) + + planar_euler_angles = EulerAngles(0.0, 0.0, ego_center_planar.euler_angles.yaw) + quaternion = planar_euler_angles.quaternion + ego_center_planar._array[StateSE3Index.QUATERNION] = quaternion.array + + ego_center_planar = translate_se3_along_body_frame(ego_center_planar, Vector3D(0.0, 0.0, 50)) + ego_center_planar = _pitch_se3_by_degrees(ego_center_planar, 90.0) + + return convert_camera_convention( + ego_center_planar, + from_convention="pXpZmY", + to_convention="pZmYpX", + ) + + +def _pitch_se3_by_degrees(state_se3: StateSE3, degrees: float) -> StateSE3: + + quaternion = EulerAngles(0.0, np.deg2rad(degrees), state_se3.yaw).quaternion + + return StateSE3( + x=state_se3.x, + y=state_se3.y, + z=state_se3.z, + qw=quaternion.qw, + qx=quaternion.qx, + qy=quaternion.qy, + qz=quaternion.qz, + ) diff --git a/src/py123d/visualization/viser/elements/sensor_elements.py b/src/py123d/visualization/viser/elements/sensor_elements.py index 410cccb9..2dd02c23 100644 --- a/src/py123d/visualization/viser/elements/sensor_elements.py +++ b/src/py123d/visualization/viser/elements/sensor_elements.py @@ -7,8 +7,8 @@ import viser from py123d.datatypes.scene.abstract_scene import AbstractScene -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry import StateSE3Index from py123d.geometry.transform.transform_se3 import ( @@ -34,7 +34,7 @@ def add_camera_frustums_to_viser_server( ego_pose[StateSE3Index.XYZ] -= scene_center_array def _add_camera_frustums_to_viser_server(camera_type: PinholeCameraType) -> None: - camera = scene.get_camera_at_iteration(scene_interation, camera_type) + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) if camera is not None: camera_position, camera_quaternion, camera_image = _get_camera_values( camera, @@ -83,7 +83,7 @@ def add_camera_gui_to_viser_server( ) -> None: if viser_config.camera_gui_visible: for camera_type in viser_config.camera_gui_types: - camera = scene.get_camera_at_iteration(scene_interation, camera_type) + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) if camera is not None: if camera_type in camera_gui_handles: camera_gui_handles[camera_type].image = _rescale_image( diff --git a/src/py123d/visualization/viser/viser_config.py b/src/py123d/visualization/viser/viser_config.py index 384f8042..510151f2 100644 --- a/src/py123d/visualization/viser/viser_config.py +++ b/src/py123d/visualization/viser/viser_config.py @@ -1,19 +1,21 @@ from dataclasses import dataclass, field from typing import List, Literal, Optional, Tuple -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.visualization.color.color import ELLIS_5 all_camera_types: List[PinholeCameraType] = [ - PinholeCameraType.CAM_F0, - PinholeCameraType.CAM_B0, - PinholeCameraType.CAM_L0, - PinholeCameraType.CAM_L1, - PinholeCameraType.CAM_L2, - PinholeCameraType.CAM_R0, - PinholeCameraType.CAM_R1, - PinholeCameraType.CAM_R2, + PinholeCameraType.PCAM_F0, + PinholeCameraType.PCAM_B0, + PinholeCameraType.PCAM_L0, + PinholeCameraType.PCAM_L1, + PinholeCameraType.PCAM_L2, + PinholeCameraType.PCAM_R0, + PinholeCameraType.PCAM_R1, + PinholeCameraType.PCAM_R2, + PinholeCameraType.PCAM_STEREO_L, + PinholeCameraType.PCAM_STEREO_R, ] all_lidar_types: List[LiDARType] = [ @@ -64,7 +66,7 @@ class ViserConfig: # -> GUI camera_gui_visible: bool = True - camera_gui_types: List[PinholeCameraType] = field(default_factory=lambda: [PinholeCameraType.CAM_F0].copy()) + camera_gui_types: List[PinholeCameraType] = field(default_factory=lambda: [PinholeCameraType.PCAM_F0].copy()) camera_gui_image_scale: float = 0.25 # Resize factor for the camera image shown in the GUI (<1.0 for speed) # LiDAR diff --git a/src/py123d/visualization/viser/viser_viewer.py b/src/py123d/visualization/viser/viser_viewer.py index f2ed1422..e6333f81 100644 --- a/src/py123d/visualization/viser/viser_viewer.py +++ b/src/py123d/visualization/viser/viser_viewer.py @@ -1,3 +1,4 @@ +import io import logging import time from typing import Dict, List, Optional @@ -9,8 +10,7 @@ from py123d.datatypes.maps.map_datatypes import MapLayer from py123d.datatypes.scene.abstract_scene import AbstractScene -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.visualization.viser.elements import ( add_box_detections_to_viser_server, @@ -19,33 +19,15 @@ add_lidar_pc_to_viser_server, add_map_to_viser_server, ) -from py123d.visualization.viser.elements.render_elements import get_ego_3rd_person_view_position +from py123d.visualization.viser.elements.render_elements import ( + get_ego_3rd_person_view_position, + get_ego_bev_view_position, +) from py123d.visualization.viser.viser_config import ViserConfig logger = logging.getLogger(__name__) -all_camera_types: List[PinholeCameraType] = [ - PinholeCameraType.CAM_F0, - PinholeCameraType.CAM_B0, - PinholeCameraType.CAM_L0, - PinholeCameraType.CAM_L1, - PinholeCameraType.CAM_L2, - PinholeCameraType.CAM_R0, - PinholeCameraType.CAM_R1, - PinholeCameraType.CAM_R2, -] - -all_lidar_types: List[LiDARType] = [ - LiDARType.LIDAR_MERGED, - LiDARType.LIDAR_TOP, - LiDARType.LIDAR_FRONT, - LiDARType.LIDAR_SIDE_LEFT, - LiDARType.LIDAR_SIDE_RIGHT, - LiDARType.LIDAR_BACK, -] - - def _build_viser_server(viser_config: ViserConfig) -> viser.ViserServer: server = viser.ViserServer( host=viser_config.server_host, @@ -138,7 +120,12 @@ def set_scene(self, scene: AbstractScene) -> None: "FPS options", ("10", "25", "50", "75", "100") ) - button = self._viser_server.gui.add_button("Render Scene") + with self._viser_server.gui.add_folder("Render", expand_by_default=False): + render_format = self._viser_server.gui.add_dropdown("Format", ["gif", "mp4"], initial_value="mp4") + render_view = self._viser_server.gui.add_dropdown( + "View", ["3rd Person", "BEV", "Manual"], initial_value="3rd Person" + ) + button = self._viser_server.gui.add_button("Render Scene") # Frame step buttons. @gui_next_frame.on_click @@ -215,6 +202,7 @@ def _(_) -> None: @button.on_click def _(event: viser.GuiEvent) -> None: + nonlocal server_rendering client = event.client assert client is not None @@ -225,12 +213,24 @@ def _(event: viser.GuiEvent) -> None: for i in tqdm(range(scene.number_of_iterations)): gui_timestep.value = i - ego_view = get_ego_3rd_person_view_position(scene, i, initial_ego_state) - client.camera.position = ego_view.point_3d.array - client.camera.wxyz = ego_view.quaternion.array - images.append(client.get_render(height=720, width=1280)) - - client.send_file_download("image.mp4", iio.imwrite("", images, extension=".mp4", fps=30)) + if render_view.value == "BEV": + ego_view = get_ego_bev_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + elif render_view.value == "3rd Person": + ego_view = get_ego_3rd_person_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + images.append(client.get_render(height=1080, width=1920)) + format = render_format.value + buffer = io.BytesIO() + if format == "gif": + iio.imwrite(buffer, images, extension=".gif", loop=False) + elif format == "mp4": + iio.imwrite(buffer, images, extension=".mp4", fps=20) + content = buffer.getvalue() + scene_name = f"{scene.log_metadata.split}_{scene.uuid}" + client.send_file_download(f"{scene_name}.{format}", content, save_immediately=True) server_rendering = False camera_frustum_handles: Dict[PinholeCameraType, viser.CameraFrustumHandle] = {} @@ -282,6 +282,8 @@ def _(event: viser.GuiEvent) -> None: if gui_playing.value and not server_rendering: gui_timestep.value = (gui_timestep.value + 1) % num_frames + else: + time.sleep(0.1) self._viser_server.flush() self.next() diff --git a/test_viser.py b/test_viser.py index d5375bd7..3a1c887e 100644 --- a/test_viser.py +++ b/test_viser.py @@ -1,21 +1,23 @@ from py123d.common.multithreading.worker_sequential import Sequential from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder from py123d.datatypes.scene.scene_filter import SceneFilter +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.visualization.viser.viser_viewer import ViserViewer -# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType - if __name__ == "__main__": - splits = ["nuscenes-mini_val", "nuscenes-mini_train"] + # splits = ["kitti360_train"] + # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] # splits = ["nuplan-mini_test", "nuplan-mini_train", "nuplan-mini_val"] # splits = ["nuplan_private_test"] # splits = ["carla_test"] - # splits = ["wopd_val"] + splits = ["wopd_val"] # splits = ["av2-sensor_train"] # splits = ["pandaset_test", "pandaset_val", "pandaset_train"] # log_names = ["2021.08.24.13.12.55_veh-45_00386_00472"] + # log_names = ["2013_05_28_drive_0000_sync"] + # log_names = ["2013_05_28_drive_0000_sync"] log_names = None - + # scene_uuids = ["60a37beb-6df4-5413-b753-9280125020cf"] scene_uuids = None scene_filter = SceneFilter( @@ -24,9 +26,9 @@ scene_uuids=scene_uuids, duration_s=None, history_s=0.0, - timestamp_threshold_s=10.0, + timestamp_threshold_s=None, shuffle=True, - # camera_types=[PinholeCameraType.CAM_F0], + pinhole_camera_types=[PinholeCameraType.PCAM_F0], ) scene_builder = ArrowSceneBuilder() worker = Sequential()