diff --git a/.github/workflows/deploy-docs.yaml b/.github/workflows/deploy-docs.yaml new file mode 100644 index 00000000..aff1e7dc --- /dev/null +++ b/.github/workflows/deploy-docs.yaml @@ -0,0 +1,54 @@ +name: docs + +on: + push: + branches: + - main # Change this to your branch name (e.g., docs, dev, etc.) + workflow_dispatch: # Allows manual triggering + +permissions: + contents: read + pages: write + id-token: write + +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + pip install uv + uv pip install --system -e ".[docs]" + + - name: Build Sphinx documentation + run: | + sphinx-build docs docs/_build -b dirhtml + + - name: Upload artifact + uses: actions/upload-pages-artifact@v4 + with: + path: 'docs/_build' + + deploy: + environment: + name: github-pages + url: ${{steps.deployment.outputs.page_url}} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@v4 + with: + publish_dir: './docs/_build' diff --git a/.gitignore b/.gitignore index bdc0a21f..971a12d1 100644 --- a/.gitignore +++ b/.gitignore @@ -23,10 +23,12 @@ *.csv *.log *.mp4 - +exp/ # Sphinx documentation docs/_build/ docs/build/ _build/ .doctrees/ + +jbwang_* diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 983998e5..18eca0ba 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -3,7 +3,7 @@ repos: rev: v5.0.0 hooks: - id: check-added-large-files # prevents giant files from being committed. - args: ['--maxkb=1024'] + args: ['--maxkb=5120'] - id: check-case-conflict # checks for files that would conflict in case-insensitive filesystems. - id: check-merge-conflict # checks for files that contain merge conflict strings. - id: check-yaml # checks yaml files for parseable syntax. @@ -24,6 +24,7 @@ repos: - id: isort name: isort (python) args: ["--profile", "black", "--filter-files", '--line-length', '120'] + exclude: __init__.py$ - repo: https://github.com/ambv/black rev: 25.1.0 hooks: @@ -36,6 +37,7 @@ repos: hooks: - id: autoflake args: ['--in-place', '--remove-all-unused-imports', '--remove-unused-variable'] + exclude: __init__.py$ language_version: python3.12 - repo: https://github.com/pycqa/flake8 rev: 7.3.0 diff --git a/README.md b/README.md index ee04aedf..46440e04 100644 --- a/README.md +++ b/README.md @@ -1 +1,8 @@ -# d123 +

+ + + + Logo + +

123D: One Library for 2D and 3D Driving Datasets

+ diff --git a/assets/logo/123D_logo_transparent_black.svg b/assets/logo/123D_logo_transparent_black.svg new file mode 100644 index 00000000..b9214f93 --- /dev/null +++ b/assets/logo/123D_logo_transparent_black.svg @@ -0,0 +1,97 @@ + + + + diff --git a/assets/logo/123D_logo_transparent_white.svg b/assets/logo/123D_logo_transparent_white.svg new file mode 100644 index 00000000..f98386fa --- /dev/null +++ b/assets/logo/123D_logo_transparent_white.svg @@ -0,0 +1,115 @@ + + + + diff --git a/d123/__init__.py b/d123/__init__.py deleted file mode 100644 index b1a19e32..00000000 --- a/d123/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = "0.0.5" diff --git a/d123/common/datatypes/detection/detection_types.py b/d123/common/datatypes/detection/detection_types.py deleted file mode 100644 index a22a614c..00000000 --- a/d123/common/datatypes/detection/detection_types.py +++ /dev/null @@ -1,41 +0,0 @@ -from __future__ import annotations - -from d123.common.utils.enums import SerialIntEnum - - -class DetectionType(SerialIntEnum): - """ - Enum for agents in d123. - """ - - # TODO: - # - Add detection types compatible with other datasets - # - Add finer detection types (e.g. bicycle, motorcycle) and add generic types (e.g. two-wheeled vehicle) for general use. - - # NOTE: Current types strongly aligned with nuPlan. - - VEHICLE = 0 # Includes all four or more wheeled vehicles, as well as trailers. - BICYCLE = 1 # Includes bicycles, motorcycles and tricycles. - PEDESTRIAN = 2 # Pedestrians, incl. strollers and wheelchairs. - - TRAFFIC_CONE = 3 # Cones that are temporarily placed to control the flow of traffic. - BARRIER = 4 # Solid barriers that can be either temporary or permanent. - CZONE_SIGN = 5 # Temporary signs that indicate construction zones. - GENERIC_OBJECT = 6 # Animals, debris, pushable/pullable objects, permanent poles. - - EGO = 7 - SIGN = 8 # TODO: Remove or extent - - -DYNAMIC_DETECTION_TYPES: set[DetectionType] = { - DetectionType.VEHICLE, - DetectionType.BICYCLE, - DetectionType.PEDESTRIAN, -} - -STATIC_DETECTION_TYPES: set[DetectionType] = { - DetectionType.TRAFFIC_CONE, - DetectionType.BARRIER, - DetectionType.CZONE_SIGN, - DetectionType.GENERIC_OBJECT, -} diff --git a/d123/common/datatypes/recording/abstract_recording.py b/d123/common/datatypes/recording/abstract_recording.py deleted file mode 100644 index 4554d10f..00000000 --- a/d123/common/datatypes/recording/abstract_recording.py +++ /dev/null @@ -1,23 +0,0 @@ -import abc -from dataclasses import dataclass -from enum import IntEnum - - -class RecordingType(IntEnum): - DETECTION = 0 - # SENSOR = 1 NOTE: not used yet, but reserved for future use - - -@dataclass -class Recording(abc.ABC): - """ - Abstract observation container. - """ - - # @classmethod - # @abc.abstractmethod - # def observation_type(cls) -> ObservationType: - # """ - # Returns detection type of the observation. - # """ - # raise NotImplementedError diff --git a/d123/common/datatypes/recording/detection_recording.py b/d123/common/datatypes/recording/detection_recording.py deleted file mode 100644 index 19b68827..00000000 --- a/d123/common/datatypes/recording/detection_recording.py +++ /dev/null @@ -1,15 +0,0 @@ -from dataclasses import dataclass - -from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper -from d123.common.datatypes.recording.abstract_recording import Recording - -# TODO: Reconsider if these "wrapper" datatypes are necessary. -# Might be needed to package multiple datatypes into a single object (e.g. as planner input) -# On the other hand, an enum based dictionary might be more flexible. - - -@dataclass -class DetectionRecording(Recording): - - box_detections: BoxDetectionWrapper - traffic_light_detections: TrafficLightDetectionWrapper diff --git a/d123/common/datatypes/sensor/camera.py b/d123/common/datatypes/sensor/camera.py deleted file mode 100644 index 56fe6f07..00000000 --- a/d123/common/datatypes/sensor/camera.py +++ /dev/null @@ -1,117 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import dataclass -from typing import Any, Dict - -import numpy as np -import numpy.typing as npt - -from d123.common.utils.enums import SerialIntEnum - - -class CameraType(SerialIntEnum): - """ - Enum for cameras in d123. - """ - - CAM_F0 = 0 - CAM_B0 = 1 - CAM_L0 = 2 - CAM_L1 = 3 - CAM_L2 = 4 - CAM_R0 = 5 - CAM_R1 = 6 - CAM_R2 = 7 - CAM_STEREO_L = 8 - CAM_STEREO_R = 9 - - -@dataclass -class CameraMetadata: - - camera_type: CameraType - width: int - height: int - intrinsic: npt.NDArray[np.float64] # 3x3 matrix # TODO: don't store matrix but values. - distortion: npt.NDArray[np.float64] # 5x1 vector # TODO: don't store matrix but values. - - def to_dict(self) -> Dict[str, Any]: - # TODO: remove None types. Only a placeholder for now. - return { - "camera_type": int(self.camera_type), - "width": self.width, - "height": self.height, - "intrinsic": self.intrinsic.tolist() if self.intrinsic is not None else None, - "distortion": self.distortion.tolist() if self.distortion is not None else None, - } - - @classmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: - # TODO: remove None types. Only a placeholder for now. - return cls( - camera_type=CameraType(json_dict["camera_type"]), - width=json_dict["width"], - height=json_dict["height"], - intrinsic=np.array(json_dict["intrinsic"]) if json_dict["intrinsic"] is not None else None, - distortion=np.array(json_dict["distortion"]) if json_dict["distortion"] is not None else None, - ) - - @property - def aspect_ratio(self) -> float: - return self.width / self.height - - @property - def fov_x(self) -> float: - """ - Calculates the horizontal field of view (FOV) in radian. - """ - fx = self.intrinsic[0, 0] - fov_x_rad = 2 * np.arctan(self.width / (2 * fx)) - return fov_x_rad - - @property - def fov_y(self) -> float: - """ - Calculates the vertical field of view (FOV) in radian. - """ - fy = self.intrinsic[1, 1] - fov_y_rad = 2 * np.arctan(self.height / (2 * fy)) - return fov_y_rad - - -def camera_metadata_dict_to_json(camera_metadata: Dict[CameraType, CameraMetadata]) -> Dict[str, Dict[str, Any]]: - """ - Converts a dictionary of CameraMetadata to a JSON-serializable format. - :param camera_metadata: Dictionary of CameraMetadata. - :return: JSON-serializable dictionary. - """ - camera_metadata_dict = { - camera_type.serialize(): metadata.to_dict() for camera_type, metadata in camera_metadata.items() - } - return json.dumps(camera_metadata_dict) - - -def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, CameraMetadata]: - """ - Converts a JSON-serializable dictionary back to a dictionary of CameraMetadata. - :param json_dict: JSON-serializable dictionary. - :return: Dictionary of CameraMetadata. - """ - camera_metadata_dict = json.loads(json_dict) - return { - CameraType.deserialize(camera_type): CameraMetadata.from_dict(metadata) - for camera_type, metadata in camera_metadata_dict.items() - } - - -@dataclass -class Camera: - - metadata: CameraMetadata - image: npt.NDArray[np.uint8] - extrinsic: npt.NDArray[np.float64] # 4x4 matrix - - def get_view_matrix(self) -> np.ndarray: - # Compute the view matrix based on the camera's position and orientation - pass diff --git a/d123/common/geometry/base.py b/d123/common/geometry/base.py deleted file mode 100644 index 7fdec5cc..00000000 --- a/d123/common/geometry/base.py +++ /dev/null @@ -1,282 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from enum import IntEnum -from typing import Iterable - -import numpy as np -import numpy.typing as npt -import shapely.geometry as geom - -# from d123.common.geometry.transform.se3 import get_rotation_matrix -from d123.common.utils.enums import classproperty - -# TODO: Reconsider if 2D/3D or SE2/SE3 structure would be better hierarchical, e.g. inheritance or composition. - - -class Point2DIndex(IntEnum): - X = 0 - Y = 1 - - @classproperty - def XY(cls) -> slice: - return slice(cls.X, cls.Y + 1) - - -@dataclass -class Point2D: - """Class to represents 2D points.""" - - x: float # [m] location - y: float # [m] location - __slots__ = "x", "y" - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> Point2D: - assert array.ndim == 1 - assert array.shape[0] == len(Point2DIndex) - return Point2D(array[Point2DIndex.X], array[Point2DIndex.Y]) - - @property - def array(self) -> npt.NDArray[np.float64]: - """ - Convert vector to array - :return: array containing [x, y] - """ - array = np.zeros(len(Point2DIndex), dtype=np.float64) - array[Point2DIndex.X] = self.x - array[Point2DIndex.Y] = self.y - return array - - @property - def shapely_point(self) -> geom.Point: - return geom.Point(self.x, self.y) - - def __iter__(self) -> Iterable[float]: - """ - :return: iterator of tuples (x, y) - """ - return iter((self.x, self.y)) - - def __hash__(self) -> int: - """Hash method""" - return hash((self.x, self.y)) - - -class StateSE2Index(IntEnum): - X = 0 - Y = 1 - YAW = 2 - - @classproperty - def XY(cls) -> slice: - return slice(cls.X, cls.Y + 1) - - -@dataclass -class StateSE2: - """Class to represents 2D points.""" - - x: float # [m] location - y: float # [m] location - yaw: float # [m] location - __slots__ = "x", "y", "yaw" - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> StateSE2: - assert array.ndim == 1 - assert array.shape[0] == len(StateSE2Index) - return StateSE2(array[StateSE2Index.X], array[StateSE2Index.Y], array[StateSE2Index.YAW]) - - @property - def array(self) -> npt.NDArray[np.float64]: - """ - Convert vector to array - :return: array containing [x, y] - """ - array = np.zeros(len(StateSE2Index), dtype=np.float64) - array[StateSE2Index.X] = self.x - array[StateSE2Index.Y] = self.y - array[StateSE2Index.YAW] = self.yaw - return array - - @property - def point_2d(self) -> Point2D: - """ - Convert SE2 state to 2D point (drops heading) - :return: Point2D dataclass - """ - return Point2D(self.x, self.y) - - @property - def shapely_point(self) -> geom.Point: - return geom.Point(self.x, self.y) - - def __iter__(self) -> Iterable[float]: - """ - :return: iterator of tuples (x, y) - """ - return iter((self.x, self.y)) - - def __hash__(self) -> int: - """Hash method""" - return hash((self.x, self.y)) - - -class Point3DIndex(IntEnum): - - X = 0 - Y = 1 - Z = 2 - - @classproperty - def XY(cls) -> slice: - return slice(cls.X, cls.Y + 1) - - -@dataclass -class Point3D: - """Class to represents 2D points.""" - - x: float # [m] location - y: float # [m] location - z: float # [m] location - __slots__ = "x", "y", "z" - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> "Point3D": - assert array.ndim == 1, f"Array must be 1-dimensional, got shape {array.shape}" - assert array.shape[0] == len( - Point3DIndex - ), f"Array must have the same length as Point3DIndex, got shape {array.shape}" - return cls(array[Point3DIndex.X], array[Point3DIndex.Y], array[Point3DIndex.Z]) - - @property - def array(self) -> npt.NDArray[np.float64]: - """ - Convert vector to array - :return: array containing [x, y] - """ - array = np.zeros(len(Point3DIndex), dtype=np.float64) - array[Point3DIndex.X] = self.x - array[Point3DIndex.Y] = self.y - array[Point3DIndex.Z] = self.z - return array - - @property - def point_2d(self) -> Point2D: - return Point2D(self.x, self.y) - - @property - def shapely_point(self) -> geom.Point: - return geom.Point(self.x, self.y, self.z) - - def __iter__(self) -> Iterable[float]: - """ - :return: iterator of tuples (x, y) - """ - return iter((self.x, self.y, self.z)) - - def __hash__(self) -> int: - """Hash method""" - return hash((self.x, self.y, self.z)) - - -class StateSE3Index(IntEnum): - - X = 0 - Y = 1 - Z = 2 - ROLL = 3 - PITCH = 4 - YAW = 5 - - @classproperty - def XY(cls) -> slice: - return slice(cls.X, cls.Y + 1) - - @classproperty - def XYZ(cls) -> slice: - return slice(cls.X, cls.Z + 1) - - @classproperty - def ROTATION_XYZ(cls) -> slice: - return slice(cls.ROLL, cls.YAW + 1) - - -@dataclass -class StateSE3: - """Class to represents 2D points.""" - - x: float # [m] location - y: float # [m] location - z: float # [m] location - roll: float - pitch: float - yaw: float - __slots__ = "x", "y", "z", "roll", "pitch", "yaw" - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> StateSE3: - assert array.ndim == 1 - assert array.shape[0] == len(StateSE3Index) - return StateSE3( - array[StateSE3Index.X], - array[StateSE3Index.Y], - array[StateSE3Index.Z], - array[StateSE3Index.ROLL], - array[StateSE3Index.PITCH], - array[StateSE3Index.YAW], - ) - - @classmethod - def from_matrix(cls, array: npt.NDArray[np.float64]) -> StateSE3: - assert array.ndim == 2 - assert array.shape == (4, 4) - translation = array[:3, 3] - rotation = array[:3, :3] - return StateSE3( - x=translation[0], - y=translation[1], - z=translation[2], - roll=np.arctan2(rotation[2, 1], rotation[2, 2]), - pitch=np.arctan2(-rotation[2, 0], np.sqrt(rotation[2, 1] ** 2 + rotation[2, 2] ** 2)), - yaw=np.arctan2(rotation[1, 0], rotation[0, 0]), - ) - - @property - def array(self) -> npt.NDArray[np.float64]: - array = np.zeros(len(StateSE3Index), dtype=np.float64) - array[StateSE3Index.X] = self.x - array[StateSE3Index.Y] = self.y - array[StateSE3Index.Z] = self.z - array[StateSE3Index.ROLL] = self.roll - array[StateSE3Index.PITCH] = self.pitch - array[StateSE3Index.YAW] = self.yaw - return array - - # @property - # def matrix(self) -> npt.NDArray[np.float64]: - # """Convert SE3 state to 4x4 transformation matrix.""" - # R = get_rotation_matrix(self) - # translation = np.array([self.x, self.y, self.z], dtype=np.float64) - # matrix = np.eye(4, dtype=np.float64) - # matrix[:3, :3] = R - # matrix[:3, 3] = translation - # return matrix - - @property - def state_se2(self) -> StateSE2: - return StateSE2(self.x, self.y, self.yaw) - - @property - def point_3d(self) -> Point3D: - return Point3D(self.x, self.y, self.z) - - @property - def point_2d(self) -> Point2D: - return Point2D(self.x, self.y) - - @property - def shapely_point(self) -> geom.Point: - return geom.Point(self.x, self.y, self.z) diff --git a/d123/common/geometry/base_index.py b/d123/common/geometry/base_index.py deleted file mode 100644 index cb258618..00000000 --- a/d123/common/geometry/base_index.py +++ /dev/null @@ -1 +0,0 @@ -# TODO: Move base index here to avoid circular imports. diff --git a/d123/common/geometry/bounding_box/bounding_box.py b/d123/common/geometry/bounding_box/bounding_box.py deleted file mode 100644 index 625d24d9..00000000 --- a/d123/common/geometry/bounding_box/bounding_box.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from functools import cached_property - -import numpy as np -import numpy.typing as npt -import shapely - -from d123.common.geometry.base import StateSE2, StateSE3 -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index, BoundingBoxSE3Index -from d123.common.geometry.bounding_box.utils import bbse2_array_to_corners_array - -# TODO: Reconsider naming SE2 and SE3 hierarchies. E.g. would inheritance be a better approach? - - -@dataclass -class BoundingBoxSE2: - - center: StateSE2 - length: float - width: float - - @cached_property - def shapely_polygon(self) -> shapely.geometry.Polygon: - return shapely.geometry.Polygon(self.corners_array) - - @property - def array(self) -> npt.NDArray[np.float64]: - array = np.zeros(len(BoundingBoxSE2Index), dtype=np.float64) - array[BoundingBoxSE2Index.X] = self.center.x - array[BoundingBoxSE2Index.Y] = self.center.y - array[BoundingBoxSE2Index.YAW] = self.center.yaw - array[BoundingBoxSE2Index.LENGTH] = self.length - array[BoundingBoxSE2Index.WIDTH] = self.width - return array - - @property - def bounding_box_se2(self) -> BoundingBoxSE2: - return self - - @property - def corners_array(self) -> npt.NDArray[np.float64]: - return bbse2_array_to_corners_array(self.array) - - -@dataclass -class BoundingBoxSE3: - - center: StateSE3 - length: float - width: float - height: float - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> BoundingBoxSE3: - return cls( - center=StateSE3.from_array(array[BoundingBoxSE3Index.STATE_SE3]), - length=array[BoundingBoxSE3Index.LENGTH], - width=array[BoundingBoxSE3Index.WIDTH], - height=array[BoundingBoxSE3Index.HEIGHT], - ) - - @property - def bounding_box_se2(self) -> BoundingBoxSE2: - return BoundingBoxSE2( - center=StateSE2(self.center.x, self.center.y, self.center.yaw), - length=self.length, - width=self.width, - ) - - @property - def center_se3(self) -> StateSE3: - return self.center - - @property - def array(self) -> npt.NDArray[np.float64]: - array = np.zeros(len(BoundingBoxSE3Index), dtype=np.float64) - array[BoundingBoxSE3Index.X] = self.center.x - array[BoundingBoxSE3Index.Y] = self.center.y - array[BoundingBoxSE3Index.Z] = self.center.z - array[BoundingBoxSE3Index.ROLL] = self.center.roll - array[BoundingBoxSE3Index.PITCH] = self.center.pitch - array[BoundingBoxSE3Index.YAW] = self.center.yaw - array[BoundingBoxSE3Index.LENGTH] = self.length - array[BoundingBoxSE3Index.WIDTH] = self.width - array[BoundingBoxSE3Index.HEIGHT] = self.height - return array - - @property - def shapely_polygon(self) -> shapely.geometry.Polygon: - return self.bounding_box_se2.shapely_polygon - - -BoundingBox = BoundingBoxSE2 | BoundingBoxSE3 diff --git a/d123/common/geometry/bounding_box/bounding_box_index.py b/d123/common/geometry/bounding_box/bounding_box_index.py deleted file mode 100644 index c282b07e..00000000 --- a/d123/common/geometry/bounding_box/bounding_box_index.py +++ /dev/null @@ -1,69 +0,0 @@ -from enum import IntEnum - -from d123.common.utils.enums import classproperty - - -class BoundingBoxSE2Index(IntEnum): - X = 0 - Y = 1 - YAW = 2 - LENGTH = 3 - WIDTH = 4 - - @classproperty - def XY(cls) -> slice: - return slice(cls.X, cls.Y + 1) - - @classproperty - def SE2(cls) -> slice: - return slice(cls.X, cls.YAW + 1) - - -class Corners2DIndex(IntEnum): - FRONT_LEFT = 0 - FRONT_RIGHT = 1 - BACK_RIGHT = 2 - BACK_LEFT = 3 - - -class BoundingBoxSE3Index(IntEnum): - X = 0 - Y = 1 - Z = 2 - ROLL = 3 - PITCH = 4 - YAW = 5 - LENGTH = 6 - WIDTH = 7 - HEIGHT = 8 - - @classproperty - def XYZ(cls) -> slice: - return slice(cls.X, cls.Z + 1) - - @classproperty - def STATE_SE3(cls) -> slice: - return slice(cls.X, cls.YAW + 1) - - @classproperty - def ROTATION_XYZ(cls) -> slice: - return slice(cls.ROLL, cls.YAW + 1) - - -class Corners3DIndex(IntEnum): - FRONT_LEFT_BOTTOM = 0 - FRONT_RIGHT_BOTTOM = 1 - BACK_RIGHT_BOTTOM = 2 - BACK_LEFT_BOTTOM = 3 - FRONT_LEFT_TOP = 4 - FRONT_RIGHT_TOP = 5 - BACK_RIGHT_TOP = 6 - BACK_LEFT_TOP = 7 - - @classproperty - def BOTTOM(cls) -> slice: - return slice(cls.FRONT_LEFT_BOTTOM, cls.BACK_LEFT_BOTTOM + 1) - - @classproperty - def TOP(cls) -> slice: - return slice(cls.FRONT_LEFT_TOP, cls.BACK_LEFT_TOP + 1) diff --git a/d123/common/geometry/bounding_box/utils.py b/d123/common/geometry/bounding_box/utils.py deleted file mode 100644 index df205565..00000000 --- a/d123/common/geometry/bounding_box/utils.py +++ /dev/null @@ -1,64 +0,0 @@ -import numpy as np -import numpy.typing as npt -import shapely - -from d123.common.geometry.base import Point2DIndex -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index, Corners2DIndex - - -def bbse2_array_to_corners_array(bbse2: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - """ - Converts an array of BoundingBoxSE2 objects to a coordinates array. - :param bbse2: Array of BoundingBoxSE2 objects. - :return: Coordinates array of shape (n, 5, 2) where n is the number of bounding boxes. - """ - assert bbse2.shape[-1] == len(BoundingBoxSE2Index) - - ndim_one: bool = bbse2.ndim == 1 - if ndim_one: - bbse2 = bbse2[None, :] - - corners_array = np.zeros((*bbse2.shape[:-1], len(Corners2DIndex), len(Point2DIndex)), dtype=np.float64) - - centers = bbse2[..., BoundingBoxSE2Index.XY] - yaws = bbse2[..., BoundingBoxSE2Index.YAW] - half_length = bbse2[..., BoundingBoxSE2Index.LENGTH] / 2.0 - half_width = bbse2[..., BoundingBoxSE2Index.WIDTH] / 2.0 - - corners_array[..., Corners2DIndex.FRONT_LEFT, :] = translate_along_yaw_array(centers, yaws, half_length, half_width) - corners_array[..., Corners2DIndex.FRONT_RIGHT, :] = translate_along_yaw_array( - centers, yaws, half_length, -half_width - ) - corners_array[..., Corners2DIndex.BACK_RIGHT, :] = translate_along_yaw_array( - centers, yaws, -half_length, -half_width - ) - corners_array[..., Corners2DIndex.BACK_LEFT, :] = translate_along_yaw_array(centers, yaws, -half_length, half_width) - - return corners_array.squeeze(axis=0) if ndim_one else corners_array - - -def corners_array_to_polygon_array(corners_array: npt.NDArray[np.float64]) -> npt.NDArray[np.object_]: - polygons = shapely.creation.polygons(corners_array) - return polygons - - -def bbse2_array_to_polygon_array(bbse2: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - return corners_array_to_polygon_array(bbse2_array_to_corners_array(bbse2)) - - -def translate_along_yaw_array( - points_2d: npt.NDArray[np.float64], - headings: npt.NDArray[np.float64], - lon: npt.NDArray[np.float64], - lat: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: - assert points_2d.shape[-1] == len(Point2DIndex) - half_pi = np.pi / 2.0 - translation: npt.NDArray[np.float64] = np.stack( - [ - (lat * np.cos(headings + half_pi)) + (lon * np.cos(headings)), - (lat * np.sin(headings + half_pi)) + (lon * np.sin(headings)), - ], - axis=-1, - ) - return points_2d + translation diff --git a/d123/common/geometry/line/polylines.py b/d123/common/geometry/line/polylines.py deleted file mode 100644 index 37b799b5..00000000 --- a/d123/common/geometry/line/polylines.py +++ /dev/null @@ -1,196 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import List, Optional, Union - -import numpy as np -import numpy.typing as npt -import shapely.creation as geom_creation -import shapely.geometry as geom -from scipy.interpolate import interp1d - -from d123.common.geometry.base import Point2D, Point2DIndex, Point3D, Point3DIndex, StateSE2, StateSE2Index -from d123.common.geometry.constants import DEFAULT_Z -from d123.common.geometry.line.helper import get_linestring_yaws, get_path_progress -from d123.common.geometry.utils import normalize_angle - -# TODO: Implement PolylineSE3 -# TODO: Benchmark interpolation performance and reconsider reliance on LineString - - -@dataclass -class Polyline2D: - - linestring: geom.LineString - - @classmethod - def from_linestring(cls, linestring: geom.LineString) -> Polyline2D: - if linestring.has_z: - linestring_ = geom_creation.linestrings(*linestring.xy) - else: - linestring_ = linestring - return Polyline2D(linestring_) - - @classmethod - def from_array(cls, polyline_array: npt.NDArray[np.float32]) -> Polyline2D: - assert polyline_array.ndim == 2 - linestring: Optional[geom.LineString] = None - if polyline_array.shape[-1] == len(Point2DIndex): - linestring = geom_creation.linestrings(polyline_array) - elif polyline_array.shape[-1] == len(Point3DIndex): - linestring = geom_creation.linestrings(polyline_array[:, Point3DIndex.XY]) - else: - raise ValueError("Array must have shape (N, 2) or (N, 3) for Point2D or Point3D respectively.") - return Polyline2D(linestring) - - @property - def array(self) -> npt.NDArray[np.float64]: - return np.array(self.linestring.coords, dtype=np.float64) - - @property - def polyline_se2(self) -> Polyline3D: - return PolylineSE2.from_linestring(self.linestring) - - @property - def length(self) -> float: - return self.linestring.length - - def interpolate(self, distances: Union[float, npt.NDArray[np.float64]]) -> Union[Point2D, npt.NDArray[np.float64]]: - if isinstance(distances, float) or isinstance(distances, int): - point = self.linestring.interpolate(distances) - return Point2D(point.x, point.y) - else: - distances = np.asarray(distances, dtype=np.float64) - points = self.linestring.interpolate(distances) - return np.array([[p.x, p.y] for p in points], dtype=np.float64) - - def project(self, point: Union[Point2D, npt.NDArray[np.float64]]) -> Union[Point2D, npt.NDArray[np.float64]]: - if isinstance(point, Point2D): - point_ = point.array - else: - point_ = np.array(point, dtype=np.float64) - return self.linestring.project(point_) - - -@dataclass -class PolylineSE2: - - se2_array: npt.NDArray[np.float64] - linestring: Optional[geom.LineString] = None - - _progress: Optional[npt.NDArray[np.float64]] = None - _interpolator: Optional[interp1d] = None - - def __post_init__(self): - assert self.se2_array is not None - - if self.linestring is None: - self.linestring = geom_creation.linestrings(self.se2_array[..., StateSE2Index.XY]) - - self.se2_array[:, StateSE2Index.YAW] = np.unwrap(self.se2_array[:, StateSE2Index.YAW], axis=0) - self._progress = get_path_progress(self.se2_array) - self._interpolator = interp1d(self._progress, self.se2_array, axis=0, bounds_error=False, fill_value=0.0) - - @classmethod - def from_linestring(cls, linestring: geom.LineString) -> PolylineSE2: - points_2d = np.array(linestring.coords, dtype=np.float64)[..., StateSE2Index.XY] - se2_array = np.zeros((len(points_2d), len(StateSE2Index)), dtype=np.float64) - se2_array[:, StateSE2Index.XY] = points_2d - se2_array[:, StateSE2Index.YAW] = get_linestring_yaws(linestring) - return PolylineSE2(se2_array, linestring) - - @classmethod - def from_array(cls, polyline_array: npt.NDArray[np.float32]) -> PolylineSE2: - assert polyline_array.ndim == 2 - if polyline_array.shape[-1] == len(Point2DIndex): - se2_array = np.zeros((len(polyline_array), len(StateSE2Index)), dtype=np.float64) - se2_array[:, StateSE2Index.XY] = polyline_array - se2_array[:, StateSE2Index.YAW] = get_linestring_yaws(geom_creation.linestrings(*polyline_array.T)) - elif polyline_array.shape[-1] == len(StateSE2Index): - se2_array = np.array(polyline_array, dtype=np.float64) - else: - raise ValueError - return PolylineSE2(se2_array) - - @classmethod - def from_discrete_se2(cls, discrete_se2: List[StateSE2]) -> PolylineSE2: - return PolylineSE2(np.array([se2.array for se2 in discrete_se2], dtype=np.float64)) - - @property - def length(self) -> float: - return float(self._progress[-1]) - - def interpolate(self, distances: Union[float, npt.NDArray[np.float64]]) -> Union[StateSE2, npt.NDArray[np.float64]]: - clipped_distances = np.clip(distances, 1e-8, self.length) - interpolated_se2_array = self._interpolator(clipped_distances) - interpolated_se2_array[..., StateSE2Index.YAW] = normalize_angle(interpolated_se2_array[..., StateSE2Index.YAW]) - - if clipped_distances.ndim == 0: - return StateSE2(*interpolated_se2_array) - else: - return interpolated_se2_array - - def project( - self, point: Union[geom.Point, Point2D, npt.NDArray[np.float64]] - ) -> Union[Point2D, npt.NDArray[np.float64]]: - if isinstance(point, Point2D): - point_ = geom.Point(point.x, point.y) - elif isinstance(point, np.ndarray) and point.shape[-1] == 2: - point_ = geom_creation.points(point) - elif isinstance(point, geom.Point): - point_ = point - else: - raise ValueError("Point must be a Point2D, geom.Point, or a 2D numpy array.") - - return self.linestring.project(point_) - - -@dataclass -class Polyline3D: - - linestring: geom.LineString - - @classmethod - def from_linestring(cls, linestring: geom.LineString) -> Polyline3D: - return ( - Polyline3D(linestring) - if linestring.has_z - else Polyline3D(geom_creation.linestrings(*linestring.xy, z=DEFAULT_Z)) - ) - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> Polyline3D: - assert array.ndim == 2 and array.shape[1] == 3, "Array must be 2D with shape (N, 3)" - linestring = geom_creation.linestrings(*array.T) - return Polyline3D(linestring) - - @property - def polyline_2d(self) -> Polyline2D: - return Polyline2D(geom_creation.linestrings(*self.linestring.xy)) - - @property - def polyline_se2(self) -> PolylineSE2: - return PolylineSE2.from_linestring(self.linestring) - - @property - def array(self) -> Polyline2D: - return np.array(self.linestring.coords, dtype=np.float64) - - @property - def length(self) -> float: - return self.linestring.length - - def interpolate(self, distances: Union[float, npt.NDArray[np.float64]]) -> Union[Point3D, npt.NDArray[np.float64]]: - if isinstance(distances, float) or isinstance(distances, int): - point = self.linestring.interpolate(distances) - return Point3D(point.x, point.y, point.z) - else: - distances = np.asarray(distances, dtype=np.float64) - points = self.linestring.interpolate(distances) - return np.array([[p.x, p.y, p.z] for p in points], dtype=np.float64) - - -@dataclass -class PolylineSE3: - # TODO: implement this class - pass diff --git a/d123/common/geometry/occupancy_map.py b/d123/common/geometry/occupancy_map.py deleted file mode 100644 index 2a8085d6..00000000 --- a/d123/common/geometry/occupancy_map.py +++ /dev/null @@ -1,132 +0,0 @@ -from __future__ import annotations - -from typing import Dict, List, Literal, Optional, Sequence, Union - -import numpy as np -import numpy.typing as npt -import shapely.vectorized -from shapely.geometry.base import BaseGeometry -from shapely.strtree import STRtree - -# TODO: Figure out if a 3D equivalent is needed. - - -class OccupancyMap2D: - def __init__( - self, - geometries: Sequence[BaseGeometry], - ids: Optional[Union[List[str], List[int]]] = None, - node_capacity: int = 10, - ): - """ - Constructor of PDMOccupancyMap - :param geometries: list/array of polygons - :param ids: optional list of geometry identifiers - :param node_capacity: max number of child nodes in str-tree, defaults to 10 - """ - assert ids is None or len(ids) == len(geometries), "Length of ids must match length of geometries" - # assert len(tokens) == len(geometries) - - self._ids: Union[List[str], List[int]] = ( - ids if ids is not None else [str(idx) for idx in range(len(geometries))] - ) - self._id_to_idx: Dict[Union[str, int], int] = {id: idx for idx, id in enumerate(self._ids)} - - self._geometries = geometries - self._node_capacity = node_capacity - self._str_tree = STRtree(self._geometries, node_capacity) - - @classmethod - def from_dict(cls, geometry_dict: Dict[Union[str, int], BaseGeometry], node_capacity: int = 10) -> OccupancyMap2D: - ids = list(geometry_dict.keys()) - geometries = list(geometry_dict.values()) - return cls(geometries=geometries, ids=ids, node_capacity=node_capacity) - - def __getitem__(self, id: Union[str, int]) -> BaseGeometry: - """ - Retrieves geometry of token. - :param token: geometry identifier - :return: Geometry of token - """ - return self._geometries[self._id_to_idx[id]] - - def __len__(self) -> int: - """ - Number of geometries in the occupancy map - :return: int - """ - return len(self._ids) - - @property - def ids(self) -> Union[List[str], List[int]]: - """ - Getter for track tokens in occupancy map - :return: list of strings - """ - return self._ids - - @property - def geometries(self) -> Sequence[BaseGeometry]: - - return self._geometries - - @property - def token_to_idx(self) -> Dict[Union[int, str], int]: - """ - Getter for track tokens in occupancy map - :return: dictionary of tokens and indices - """ - return self._id_to_idx - - def intersects(self, geometry: BaseGeometry) -> Union[List[str], List[int]]: - """ - Searches for intersecting geometries in the occupancy map - :param geometry: geometries to query - :return: list of tokens for intersecting geometries - """ - indices = self.query(geometry, predicate="intersects") - return [self._ids[idx] for idx in indices] - - def query( - self, - geometry: Union[BaseGeometry, np.ndarray], - predicate: Optional[ - Literal["intersects", "within", "contains", "overlaps", "crosses", "touches", "covers", "covered_by"] - ] = None, - distance: Optional[float] = None, - ) -> npt.NDArray[np.int64]: - """ - Function to directly calls shapely's query function on str-tree - :param geometry: geometries to query - :param predicate: see shapely, defaults to None - :return: query output - """ - return self._str_tree.query(geometry, predicate=predicate, distance=distance) - - def query_nearest( - self, - geometry: Union[BaseGeometry, np.ndarray], - max_distance: Optional[float] = None, - return_distance: bool = False, - exclusive: bool = False, - all_matches: bool = True, - ): - return self._str_tree.query_nearest( - geometry, - max_distance=max_distance, - return_distance=return_distance, - exclusive=exclusive, - all_matches=all_matches, - ) - - def points_in_polygons(self, points: npt.NDArray[np.float64]) -> npt.NDArray[np.bool_]: - """ - Determines wether input-points are in polygons of the occupancy map - :param points: input-points - :return: boolean array of shape (polygons, input-points) - """ - output = np.zeros((len(self._geometries), len(points)), dtype=bool) - for i, polygon in enumerate(self._geometries): - output[i] = shapely.vectorized.contains(polygon, points[:, 0], points[:, 1]) - - return output diff --git a/d123/common/geometry/transform/rotation.py b/d123/common/geometry/transform/rotation.py deleted file mode 100644 index 03072609..00000000 --- a/d123/common/geometry/transform/rotation.py +++ /dev/null @@ -1,27 +0,0 @@ -# import numpy as np -# import numpy.typing as npt - -# from d123.common.geometry.base import Point3DIndex, StateSE3, StateSE3Index -# from d123.common.geometry.vector import Vector3D - - -# def get_roll_pitch_yaw_from_rotation_matrix( -# rotation_matrix: npt.NDArray[np.float64], -# ) -> Vector3D: -# """Extract roll, pitch, and yaw angles from a rotation matrix.""" -# assert rotation_matrix.shape == (3, 3), "Rotation matrix must be 3x3." - -# sy = np.sqrt(rotation_matrix[0, 0] ** 2 + rotation_matrix[1, 0] ** 2) - -# singular = sy < 1e-6 - -# if not singular: -# x = np.arctan2(rotation_matrix[2, 1], rotation_matrix[2, 2]) -# y = np.arctan2(-rotation_matrix[2, 0], sy) -# z = np.arctan2(rotation_matrix[1, 0], rotation_matrix[0, 0]) -# else: -# x = np.arctan2(-rotation_matrix[1, 2], rotation_matrix[1, 1]) -# y = np.arctan2(-rotation_matrix[2, 0], sy) -# z = 0.0 - -# return Vector3D(x=x, y=y, z=z) diff --git a/d123/common/geometry/transform/se2_array.py b/d123/common/geometry/transform/se2_array.py deleted file mode 100644 index c225a325..00000000 --- a/d123/common/geometry/transform/se2_array.py +++ /dev/null @@ -1,114 +0,0 @@ -from typing import Union - -import numpy as np -import numpy.typing as npt - -from d123.common.geometry.base import StateSE2, StateSE2Index -from d123.common.geometry.line.polylines import normalize_angle - -# TODO: Refactor 2D and 3D transform functions in a more consistent and general way. - - -def convert_absolute_to_relative_se2_array( - origin: Union[StateSE2, npt.NDArray[np.float64]], state_se2_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - """ - Converts an StateSE2 array from global to relative coordinates. - :param origin: origin pose of relative coords system - :param state_se2_array: array of SE2 states with (x,y,θ) in last dim - :return: SE2 coords array in relative coordinates - """ - if isinstance(origin, StateSE2): - origin_array = origin.array - elif isinstance(origin, np.ndarray): - assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) - origin_array = origin - else: - raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") - - rotate_rad = -origin_array[StateSE2Index.YAW] - cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) - R = np.array([[cos, -sin], [sin, cos]]) - - state_se2_rel = state_se2_array - origin_array - state_se2_rel[..., :2] = state_se2_rel[..., :2] @ R.T - state_se2_rel[..., 2] = normalize_angle(state_se2_rel[..., 2]) - - return state_se2_rel - - -def convert_absolute_to_relative_point_2d_array( - origin: Union[StateSE2, npt.NDArray[np.float64]], point_2d_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - """ - Converts an absolute 2D point array from global to relative coordinates. - :param origin: origin pose of relative coords system - :param point_2d_array: array of 2D points with (x,y) in last dim - :return: 2D points array in relative coordinates - """ - if isinstance(origin, StateSE2): - origin_array = origin.array - elif isinstance(origin, np.ndarray): - assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) - origin_array = origin - else: - raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") - - rotate_rad = -origin_array[StateSE2Index.YAW] - cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) - R = np.array([[cos, -sin], [sin, cos]]) - - point_2d_rel = point_2d_array - origin_array[..., StateSE2Index.XY] - point_2d_rel = point_2d_rel @ R.T - - return point_2d_rel - - -def convert_relative_to_absolute_se2_array( - origin: Union[StateSE2, npt.NDArray[np.float64]], state_se2_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - """ - Converts an StateSE2 array from global to relative coordinates. - :param origin: origin pose of relative coords system - :param state_se2_array: array of SE2 states with (x,y,θ) in last dim - :return: SE2 coords array in relative coordinates - """ - if isinstance(origin, StateSE2): - origin_array = origin.array - elif isinstance(origin, np.ndarray): - assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) - origin_array = origin - else: - raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") - - rotate_rad = origin_array[StateSE2Index.YAW] - cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) - R = np.array([[cos, -sin], [sin, cos]]) - - state_se2_rel = state_se2_array + origin_array - state_se2_rel[..., :2] = state_se2_rel[..., :2] @ R.T - state_se2_rel[..., 2] = normalize_angle(state_se2_rel[..., 2]) - - return state_se2_rel - - -def convert_relative_to_absolute_point_2d_array( - origin: Union[StateSE2, npt.NDArray[np.float64]], point_2d_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - - if isinstance(origin, StateSE2): - origin_array = origin.array - elif isinstance(origin, np.ndarray): - assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) - origin_array = origin - else: - raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") - - rotate_rad = origin_array[StateSE2Index.YAW] - cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) - R = np.array([[cos, -sin], [sin, cos]]) - - point_2d_abs = point_2d_array @ R.T - point_2d_abs = point_2d_abs + origin_array[..., StateSE2Index.XY] - - return point_2d_abs diff --git a/d123/common/geometry/transform/se3.py b/d123/common/geometry/transform/se3.py deleted file mode 100644 index e61451b4..00000000 --- a/d123/common/geometry/transform/se3.py +++ /dev/null @@ -1,207 +0,0 @@ -import numpy as np -import numpy.typing as npt - -from d123.common.geometry.base import Point3DIndex, StateSE3, StateSE3Index -from d123.common.geometry.vector import Vector3D - -# def get_rotation_matrix(state_se3: StateSE3) -> npt.NDArray[np.float64]: -# R_x = np.array( -# [ -# [1, 0, 0], -# [0, np.cos(state_se3.roll), -np.sin(state_se3.roll)], -# [0, np.sin(state_se3.roll), np.cos(state_se3.roll)], -# ], -# dtype=np.float64, -# ) -# R_y = np.array( -# [ -# [np.cos(state_se3.pitch), 0, np.sin(state_se3.pitch)], -# [0, 1, 0], -# [-np.sin(state_se3.pitch), 0, np.cos(state_se3.pitch)], -# ], -# dtype=np.float64, -# ) -# R_z = np.array( -# [ -# [np.cos(state_se3.yaw), -np.sin(state_se3.yaw), 0], -# [np.sin(state_se3.yaw), np.cos(state_se3.yaw), 0], -# [0, 0, 1], -# ], -# dtype=np.float64, -# ) -# return R_z @ R_y @ R_x - - -def get_rotation_matrix(state_se3: StateSE3) -> npt.NDArray[np.float64]: - # Intrinsic Z-Y'-X'' rotation: R = R_x(roll) @ R_y(pitch) @ R_z(yaw) - R_x = np.array( - [ - [1, 0, 0], - [0, np.cos(state_se3.roll), -np.sin(state_se3.roll)], - [0, np.sin(state_se3.roll), np.cos(state_se3.roll)], - ], - dtype=np.float64, - ) - R_y = np.array( - [ - [np.cos(state_se3.pitch), 0, np.sin(state_se3.pitch)], - [0, 1, 0], - [-np.sin(state_se3.pitch), 0, np.cos(state_se3.pitch)], - ], - dtype=np.float64, - ) - R_z = np.array( - [ - [np.cos(state_se3.yaw), -np.sin(state_se3.yaw), 0], - [np.sin(state_se3.yaw), np.cos(state_se3.yaw), 0], - [0, 0, 1], - ], - dtype=np.float64, - ) - return R_x @ R_y @ R_z - - -def translate_se3_along_z(state_se3: StateSE3, distance: float) -> StateSE3: - - R = get_rotation_matrix(state_se3) - z_axis = R[:, 2] - - new_x = state_se3.x + distance * z_axis[0] - new_y = state_se3.y + distance * z_axis[1] - new_z = state_se3.z + distance * z_axis[2] - - return StateSE3(new_x, new_y, new_z, state_se3.roll, state_se3.pitch, state_se3.yaw) - - -def translate_se3_along_y(state_se3: StateSE3, distance: float) -> StateSE3: - - R = get_rotation_matrix(state_se3) - y_axis = R[:, 1] - - new_x = state_se3.x + distance * y_axis[0] - new_y = state_se3.y + distance * y_axis[1] - new_z = state_se3.z + distance * y_axis[2] - - return StateSE3(new_x, new_y, new_z, state_se3.roll, state_se3.pitch, state_se3.yaw) - - -def translate_se3_along_x(state_se3: StateSE3, distance: float) -> StateSE3: - - R = get_rotation_matrix(state_se3) - x_axis = R[:, 0] - - new_x = state_se3.x + distance * x_axis[0] - new_y = state_se3.y + distance * x_axis[1] - new_z = state_se3.z + distance * x_axis[2] - - return StateSE3(new_x, new_y, new_z, state_se3.roll, state_se3.pitch, state_se3.yaw) - - -def translate_body_frame(state_se3: StateSE3, vector_3d: Vector3D) -> StateSE3: - R = get_rotation_matrix(state_se3) - - body_translation = vector_3d.array - - # Transform to world frame - world_translation = R @ body_translation - - return StateSE3( - state_se3.x + world_translation[0], - state_se3.y + world_translation[1], - state_se3.z + world_translation[2], - state_se3.roll, - state_se3.pitch, - state_se3.yaw, - ) - - -def convert_relative_to_absolute_points_3d_array( - origin: StateSE3, points_3d_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - - # TODO: implement function for origin as np.ndarray - - R = get_rotation_matrix(origin) - absolute_points = points_3d_array @ R.T + origin.point_3d.array - return absolute_points - - -def convert_absolute_to_relative_se3_array( - origin: StateSE3, se3_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - assert se3_array.shape[-1] == len(StateSE3Index) - # TODO: remove transform for-loop, use vectorized operations - - # Extract rotation and translation of origin - R_origin = get_rotation_matrix(origin) - t_origin = origin.point_3d.array - - # Prepare output array - rel_se3_array = np.empty_like(se3_array) - - # For each SE3 in the array - for i in range(se3_array.shape[0]): - abs_se3 = se3_array[i] - abs_pos = abs_se3[StateSE3Index.XYZ] - abs_rpy = abs_se3[StateSE3Index.ROLL : StateSE3Index.YAW + 1] - - # Relative position: rotate and translate - rel_pos = R_origin.T @ (abs_pos - t_origin) - - # Relative orientation: subtract origin's rpy - rel_rpy = abs_rpy - np.array([origin.roll, origin.pitch, origin.yaw], dtype=np.float64) - - rel_se3_array[i, StateSE3Index.X : StateSE3Index.Z + 1] = rel_pos - rel_se3_array[i, StateSE3Index.ROLL : StateSE3Index.YAW + 1] = rel_rpy - - return rel_se3_array - - -def convert_relative_to_absolute_se3_array( - origin: StateSE3, se3_array: npt.NDArray[np.float64] -) -> npt.NDArray[np.float64]: - assert se3_array.shape[-1] == len(StateSE3Index) - # TODO: remove transform for-loop, use vectorized operations - - # Extract rotation and translation of origin - R_origin = get_rotation_matrix(origin) - t_origin = origin.point_3d.array - - # Prepare output array - abs_se3_array = np.empty_like(se3_array) - - # For each SE3 in the array - for i in range(se3_array.shape[0]): - rel_se3 = se3_array[i] - rel_pos = rel_se3[StateSE3Index.XYZ] - rel_rpy = rel_se3[StateSE3Index.ROLL : StateSE3Index.YAW + 1] - - # Absolute position: rotate and translate - abs_pos = R_origin @ rel_pos + t_origin - - # Absolute orientation: add origin's rpy - abs_rpy = rel_rpy + np.array([origin.roll, origin.pitch, origin.yaw], dtype=np.float64) - - abs_se3_array[i, StateSE3Index.X : StateSE3Index.Z + 1] = abs_pos - abs_se3_array[i, StateSE3Index.ROLL : StateSE3Index.YAW + 1] = abs_rpy - - return abs_se3_array - - -def translate_points_3d_along_z( - state_se3: StateSE3, - points_3d: npt.NDArray[np.float64], - distance: float, -) -> npt.NDArray[np.float64]: - assert points_3d.shape[-1] == len(Point3DIndex) - - R = get_rotation_matrix(state_se3) - z_axis = R[:, 2] - - translated_points = np.zeros_like(points_3d) - - translated_points[..., Point3DIndex.X] = points_3d[..., Point3DIndex.X] + distance * z_axis[0] - translated_points[..., Point3DIndex.Y] = points_3d[..., Point3DIndex.Y] + distance * z_axis[1] - translated_points[..., Point3DIndex.Z] = points_3d[..., Point3DIndex.Z] + distance * z_axis[2] - - return translated_points diff --git a/d123/common/geometry/transform/tranform_2d.py b/d123/common/geometry/transform/tranform_2d.py deleted file mode 100644 index fa8151bc..00000000 --- a/d123/common/geometry/transform/tranform_2d.py +++ /dev/null @@ -1,22 +0,0 @@ -import numpy as np -import numpy.typing as npt - -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.vector import Vector2D - -# TODO: Refactor 2D and 3D transform functions in a more consistent and general way. - - -def translate(pose: StateSE2, translation: Vector2D) -> StateSE2: - return StateSE2(pose.x + translation.x, pose.y + translation.y, pose.yaw) - - -def translate_along_yaw(pose: StateSE2, translation: Vector2D) -> StateSE2: - half_pi = np.pi / 2.0 - translation: npt.NDArray[np.float64] = np.array( - [ - (translation.y * np.cos(pose.yaw + half_pi)) + (translation.x * np.cos(pose.yaw)), - (translation.y * np.sin(pose.yaw + half_pi)) + (translation.x * np.sin(pose.yaw)), - ] - ) - return translate(pose, Vector2D.from_array(translation)) diff --git a/d123/common/geometry/utils.py b/d123/common/geometry/utils.py deleted file mode 100644 index 4fa2e458..00000000 --- a/d123/common/geometry/utils.py +++ /dev/null @@ -1,14 +0,0 @@ -import numpy as np - -# TODO: move this somewhere else -# TODO: Maybe rename wrap angle? -# TODO: Add implementation for torch, jax, or whatever else is needed. - - -def normalize_angle(angle): - """ - Map a angle in range [-π, π] - :param angle: any angle as float - :return: normalized angle - """ - return np.arctan2(np.sin(angle), np.cos(angle)) diff --git a/d123/common/geometry/vector.py b/d123/common/geometry/vector.py deleted file mode 100644 index ef0e4cfa..00000000 --- a/d123/common/geometry/vector.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import annotations - -from enum import IntEnum - -import numpy as np -import numpy.typing as npt - -from d123.common.geometry.base import Point2D, Point3D, Point3DIndex - - -class Vector2DIndex(IntEnum): - X = 0 - Y = 1 - - -class Vector2D(Point2D): - def __add__(self, other: Vector2D) -> Vector2D: - return Vector2D(self.x + other.x, self.y + other.y) - - def __sub__(self, other: Vector2D) -> Vector2D: - return Vector2D(self.x - other.x, self.y - other.y) - - def __mul__(self, scalar: float) -> Vector2D: - return Vector2D(self.x * scalar, self.y * scalar) - - def __truediv__(self, scalar: float) -> Vector2D: - return Vector2D(self.x / scalar, self.y / scalar) - - def magnitude(self) -> float: - """Calculate the magnitude of the vector.""" - return float(np.linalg.norm(self.array)) - - @property - def vector_2d(self) -> Vector2D: - return self - - -class Vector3DIndex(IntEnum): - X = 0 - Y = 1 - Z = 2 - - -class Vector3D(Point3D): - - @classmethod - def from_array(cls, array: npt.NDArray[np.float64]) -> Vector3D: - assert array.ndim == 1 - assert array.shape[0] == len(Point3DIndex) - return cls(array[Point3DIndex.X], array[Point3DIndex.Y], array[Point3DIndex.Z]) - - def __add__(self, other: Vector3D) -> Vector3D: - return Vector3D(self.x + other.x, self.y + other.y, self.z + other.z) - - def __sub__(self, other: Vector3D) -> Vector3D: - return Vector3D(self.x - other.x, self.y - other.y, self.z - other.z) - - def __mul__(self, scalar: float) -> Vector3D: - return Vector3D(self.x * scalar, self.y * scalar, self.z * scalar) - - def __truediv__(self, scalar: float) -> Vector3D: - return Vector3D(self.x / scalar, self.y / scalar, self.z / scalar) - - def magnitude(self) -> float: - """Calculate the magnitude of the vector.""" - return float(np.linalg.norm(self.array)) - - @property - def vector_2d(self) -> Vector2D: - return Vector2D(self.x, self.y) diff --git a/d123/common/visualization/matplotlib/camera copy.py b/d123/common/visualization/matplotlib/camera copy.py deleted file mode 100644 index ea758366..00000000 --- a/d123/common/visualization/matplotlib/camera copy.py +++ /dev/null @@ -1,330 +0,0 @@ -# from typing import List, Optional, Tuple - -from typing import List, Optional, Tuple - -import cv2 -import matplotlib.pyplot as plt -import numpy as np -import numpy.typing as npt - -# from PIL import ImageColor -from pyquaternion import Quaternion - -from d123.common.datatypes.detection.detection import BoxDetectionSE3, BoxDetectionWrapper -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import Camera -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.common.geometry.base import StateSE3 -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE3Index, Corners3DIndex -from d123.common.geometry.transform.se3 import convert_absolute_to_relative_se3_array, get_rotation_matrix -from d123.common.visualization.color.default import BOX_DETECTION_CONFIG - -# from navsim.common.dataclasses import Annotations, Camera, Lidar -# from navsim.common.enums import BoundingBoxIndex, LidarIndex -# from navsim.planning.scenario_builder.navsim_scenario_utils import tracked_object_types -# from navsim.visualization.config import AGENT_CONFIG -# from navsim.visualization.lidar import filter_lidar_pc, get_lidar_pc_color - - -def add_camera_ax(ax: plt.Axes, camera: Camera) -> plt.Axes: - """ - Adds camera image to matplotlib ax object - :param ax: matplotlib ax object - :param camera: navsim camera dataclass - :return: ax object with image - """ - ax.imshow(camera.image) - return ax - - -# FIXME: -# def add_lidar_to_camera_ax(ax: plt.Axes, camera: Camera, lidar: Lidar) -> plt.Axes: -# """ -# Adds camera image with lidar point cloud on matplotlib ax object -# :param ax: matplotlib ax object -# :param camera: navsim camera dataclass -# :param lidar: navsim lidar dataclass -# :return: ax object with image -# """ - -# image, lidar_pc = camera.image.copy(), lidar.lidar_pc.copy() -# image_height, image_width = image.shape[:2] - -# lidar_pc = filter_lidar_pc(lidar_pc) -# lidar_pc_colors = np.array(get_lidar_pc_color(lidar_pc)) - -# pc_in_cam, pc_in_fov_mask = _transform_pcs_to_images( -# lidar_pc, -# camera.sensor2lidar_rotation, -# camera.sensor2lidar_translation, -# camera.intrinsics, -# img_shape=(image_height, image_width), -# ) - -# for (x, y), color in zip(pc_in_cam[pc_in_fov_mask], lidar_pc_colors[pc_in_fov_mask]): -# color = (int(color[0]), int(color[1]), int(color[2])) -# cv2.circle(image, (int(x), int(y)), 5, color, -1) - -# ax.imshow(image) -# return ax - - -def add_box_detections_to_camera_ax( - ax: plt.Axes, - camera: Camera, - box_detections: BoxDetectionWrapper, - ego_state_se3: EgoStateSE3, -) -> plt.Axes: - - box_detection_array = np.zeros((len(box_detections.box_detections), len(BoundingBoxSE3Index)), dtype=np.float64) - detection_types = np.array( - [detection.metadata.detection_type for detection in box_detections.box_detections], dtype=object - ) - for idx, box_detection in enumerate(box_detections.box_detections): - assert isinstance( - box_detection, BoxDetectionSE3 - ), f"Box detection must be of type BoxDetectionSE3, got {type(box_detection)}" - box_detection_array[idx] = box_detection.bounding_box_se3.array - - box_detection_array[..., BoundingBoxSE3Index.STATE_SE3] = convert_absolute_to_relative_se3_array( - ego_state_se3.rear_axle_se3, box_detection_array[..., BoundingBoxSE3Index.STATE_SE3] - ) - detection_positions, detection_extents, detection_yaws = _transform_annotations_to_camera( - box_detection_array, camera.extrinsic - ) - - corners_norm = np.stack(np.unravel_index(np.arange(len(Corners3DIndex)), [2] * 3), axis=1) - corners_norm = corners_norm[[0, 1, 3, 2, 4, 5, 7, 6]] - corners_norm = corners_norm - np.array([0.5, 0.5, 0.5]) - corners = detection_extents.reshape([-1, 1, 3]) * corners_norm.reshape([1, 8, 3]) - - corners = _rotation_3d_in_axis(corners, detection_yaws, axis=1) - corners += detection_positions.reshape(-1, 1, 3) - - # Then draw project corners to image. - box_corners, corners_pc_in_fov = _transform_points_to_image(corners.reshape(-1, 3), camera.metadata.intrinsic) - box_corners = box_corners.reshape(-1, 8, 2) - corners_pc_in_fov = corners_pc_in_fov.reshape(-1, 8) - valid_corners = corners_pc_in_fov.any(-1) - - box_corners, detection_types = box_corners[valid_corners], detection_types[valid_corners] - image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, detection_types) - - ax.imshow(image) - return ax - - -def _transform_annotations_to_camera( - boxes: npt.NDArray[np.float32], extrinsic: npt.NDArray[np.float64] -) -> npt.NDArray[np.float32]: - """ - Helper function to transform bounding boxes into camera frame - TODO: Refactor - :param boxes: array representation of bounding boxes - :param sensor2lidar_rotation: camera rotation - :param sensor2lidar_translation: camera translation - :return: bounding boxes in camera coordinates - """ - sensor2ego_rotation = extrinsic[:3, :3] - sensor2ego_translation = extrinsic[:3, 3] - - locs, rots = ( - boxes[:, BoundingBoxSE3Index.XYZ], - boxes[:, BoundingBoxSE3Index.YAW], - ) - dims_cam = boxes[ - :, [BoundingBoxSE3Index.LENGTH, BoundingBoxSE3Index.HEIGHT, BoundingBoxSE3Index.WIDTH] - ] # l, w, h -> l, h, w - - rots_cam = np.zeros_like(rots) - for idx, state_se3_array in enumerate(boxes[:, BoundingBoxSE3Index.STATE_SE3]): - rot = Quaternion(matrix=get_rotation_matrix(StateSE3.from_array(state_se3_array))) - rot = Quaternion(matrix=sensor2ego_rotation).inverse * rot - rots_cam[idx] = -rot.yaw_pitch_roll[0] - - lidar2cam_r = np.linalg.inv(sensor2ego_rotation) - lidar2cam_t = sensor2ego_translation @ lidar2cam_r.T - lidar2cam_rt = np.eye(4) - lidar2cam_rt[:3, :3] = lidar2cam_r.T - lidar2cam_rt[3, :3] = -lidar2cam_t - - locs_cam = np.concatenate([locs, np.ones_like(locs)[:, :1]], -1) # -1, 4 - locs_cam = lidar2cam_rt.T @ locs_cam.T - locs_cam = locs_cam.T - locs_cam = locs_cam[:, :-1] - return locs_cam, dims_cam, rots_cam - - -def _rotation_3d_in_axis(points: npt.NDArray[np.float32], angles: npt.NDArray[np.float32], axis: int = 0): - """ - Rotate 3D points by angles according to axis. - TODO: Refactor - :param points: array of points - :param angles: array of angles - :param axis: axis to perform rotation, defaults to 0 - :raises value: _description_ - :raises ValueError: if axis invalid - :return: rotated points - """ - rot_sin = np.sin(angles) - rot_cos = np.cos(angles) - ones = np.ones_like(rot_cos) - zeros = np.zeros_like(rot_cos) - if axis == 1: - rot_mat_T = np.stack( - [ - np.stack([rot_cos, zeros, -rot_sin]), - np.stack([zeros, ones, zeros]), - np.stack([rot_sin, zeros, rot_cos]), - ] - ) - elif axis == 2 or axis == -1: - rot_mat_T = np.stack( - [ - np.stack([rot_cos, -rot_sin, zeros]), - np.stack([rot_sin, rot_cos, zeros]), - np.stack([zeros, zeros, ones]), - ] - ) - elif axis == 0: - rot_mat_T = np.stack( - [ - np.stack([zeros, rot_cos, -rot_sin]), - np.stack([zeros, rot_sin, rot_cos]), - np.stack([ones, zeros, zeros]), - ] - ) - else: - raise ValueError(f"axis should in range [0, 1, 2], got {axis}") - return np.einsum("aij,jka->aik", points, rot_mat_T) - - -def _plot_rect_3d_on_img( - image: npt.NDArray[np.float32], - box_corners: npt.NDArray[np.float32], - detection_types: List[DetectionType], - thickness: int = 1, -) -> npt.NDArray[np.uint8]: - """ - Plot the boundary lines of 3D rectangular on 2D images. - TODO: refactor - :param image: The numpy array of image. - :param box_corners: Coordinates of the corners of 3D, shape of [N, 8, 2]. - :param box_labels: labels of boxes for coloring - :param thickness: pixel width of liens, defaults to 3 - :return: image with 3D bounding boxes - """ - line_indices = ( - (0, 1), - (0, 3), - (0, 4), - (1, 2), - (1, 5), - (3, 2), - (3, 7), - (4, 5), - (4, 7), - (2, 6), - (5, 6), - (6, 7), - ) - for i in range(len(box_corners)): - color = BOX_DETECTION_CONFIG[detection_types[i]].fill_color.rgb - corners = box_corners[i].astype(np.int64) - for start, end in line_indices: - cv2.line( - image, - (corners[start, 0], corners[start, 1]), - (corners[end, 0], corners[end, 1]), - color, - thickness, - cv2.LINE_AA, - ) - return image.astype(np.uint8) - - -def _transform_points_to_image( - points: npt.NDArray[np.float32], - intrinsic: npt.NDArray[np.float32], - image_shape: Optional[Tuple[int, int]] = None, - eps: float = 1e-3, -) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.bool_]]: - """ - Transforms points in camera frame to image pixel coordinates - TODO: refactor - :param points: points in camera frame - :param intrinsic: camera intrinsics - :param image_shape: shape of image in pixel - :param eps: lower threshold of points, defaults to 1e-3 - :return: points in pixel coordinates, mask of values in frame - """ - points = points[:, :3] - - viewpad = np.eye(4) - viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic - - pc_img = np.concatenate([points, np.ones_like(points)[:, :1]], -1) - pc_img = viewpad @ pc_img.T - pc_img = pc_img.T - - cur_pc_in_fov = pc_img[:, 2] > eps - pc_img = pc_img[..., 0:2] / np.maximum(pc_img[..., 2:3], np.ones_like(pc_img[..., 2:3]) * eps) - if image_shape is not None: - img_h, img_w = image_shape - cur_pc_in_fov = ( - cur_pc_in_fov - & (pc_img[:, 0] < (img_w - 1)) - & (pc_img[:, 0] > 0) - & (pc_img[:, 1] < (img_h - 1)) - & (pc_img[:, 1] > 0) - ) - return pc_img, cur_pc_in_fov - - -# def _transform_pcs_to_images( -# lidar_pc: npt.NDArray[np.float32], -# sensor2lidar_rotation: npt.NDArray[np.float32], -# sensor2lidar_translation: npt.NDArray[np.float32], -# intrinsic: npt.NDArray[np.float32], -# img_shape: Optional[Tuple[int, int]] = None, -# eps: float = 1e-3, -# ) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.bool_]]: -# """ -# Transforms points in camera frame to image pixel coordinates -# TODO: refactor -# :param lidar_pc: lidar point cloud -# :param sensor2lidar_rotation: camera rotation -# :param sensor2lidar_translation: camera translation -# :param intrinsic: camera intrinsics -# :param img_shape: image shape in pixels, defaults to None -# :param eps: threshold for lidar pc height, defaults to 1e-3 -# :return: lidar pc in pixel coordinates, mask of values in frame -# """ -# pc_xyz = lidar_pc[LidarIndex.POSITION, :].T - -# lidar2cam_r = np.linalg.inv(sensor2lidar_rotation) -# lidar2cam_t = sensor2lidar_translation @ lidar2cam_r.T -# lidar2cam_rt = np.eye(4) -# lidar2cam_rt[:3, :3] = lidar2cam_r.T -# lidar2cam_rt[3, :3] = -lidar2cam_t - -# viewpad = np.eye(4) -# viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic -# lidar2img_rt = viewpad @ lidar2cam_rt.T - -# cur_pc_xyz = np.concatenate([pc_xyz, np.ones_like(pc_xyz)[:, :1]], -1) -# cur_pc_cam = lidar2img_rt @ cur_pc_xyz.T -# cur_pc_cam = cur_pc_cam.T -# cur_pc_in_fov = cur_pc_cam[:, 2] > eps -# cur_pc_cam = cur_pc_cam[..., 0:2] / np.maximum(cur_pc_cam[..., 2:3], np.ones_like(cur_pc_cam[..., 2:3]) * eps) - -# if img_shape is not None: -# img_h, img_w = img_shape -# cur_pc_in_fov = ( -# cur_pc_in_fov -# & (cur_pc_cam[:, 0] < (img_w - 1)) -# & (cur_pc_cam[:, 0] > 0) -# & (cur_pc_cam[:, 1] < (img_h - 1)) -# & (cur_pc_cam[:, 1] > 0) -# ) -# return cur_pc_cam, cur_pc_in_fov diff --git a/d123/common/visualization/viser/server.py b/d123/common/visualization/viser/server.py deleted file mode 100644 index 087a7e8b..00000000 --- a/d123/common/visualization/viser/server.py +++ /dev/null @@ -1,297 +0,0 @@ -import time -from typing import Dict, List, Literal - -import trimesh -import viser - -from d123.common.datatypes.sensor.camera import CameraType -from d123.common.datatypes.sensor.lidar import LiDARType -from d123.common.visualization.viser.utils import ( - get_bounding_box_meshes, - get_camera_values, - get_lidar_points, - get_map_meshes, -) -from d123.common.visualization.viser.utils_v2 import get_bounding_box_outlines -from d123.dataset.scene.abstract_scene import AbstractScene - -# TODO: Try to fix performance issues. -# TODO: Refactor this file. - -all_camera_types: List[CameraType] = [ - CameraType.CAM_F0, - CameraType.CAM_B0, - CameraType.CAM_L0, - CameraType.CAM_L1, - CameraType.CAM_L2, - CameraType.CAM_R0, - CameraType.CAM_R1, - CameraType.CAM_R2, -] - -# MISC config: -LINE_WIDTH: float = 4.0 - -# Bounding box config: -BOUNDING_BOX_TYPE: Literal["mesh", "lines"] = "lines" - -# Map config: -MAP_AVAILABLE: bool = True - - -# Cameras config: - -VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_F0, CameraType.CAM_L0, CameraType.CAM_R0] -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = all_camera_types -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L, CameraType.CAM_STEREO_R] -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [] -VISUALIZE_CAMERA_GUI: List[CameraType] = [CameraType.CAM_F0] -CAMERA_SCALE: float = 1.0 - -# Lidar config: -LIDAR_AVAILABLE: bool = False - -LIDAR_TYPES: List[LiDARType] = [ - LiDARType.LIDAR_MERGED, - LiDARType.LIDAR_TOP, - LiDARType.LIDAR_FRONT, - LiDARType.LIDAR_SIDE_LEFT, - LiDARType.LIDAR_SIDE_RIGHT, - LiDARType.LIDAR_BACK, -] -# LIDAR_TYPES: List[LiDARType] = [ -# LiDARType.LIDAR_TOP, -# ] -LIDAR_POINT_SIZE: float = 0.05 - - -class ViserVisualizationServer: - def __init__( - self, - scenes: List[AbstractScene], - scene_index: int = 0, - host: str = "localhost", - port: int = 8080, - label: str = "D123 Viser Server", - ): - assert len(scenes) > 0, "At least one scene must be provided." - self.scenes = scenes - self.scene_index = scene_index - - self.host = host - self.port = port - self.label = label - - self.server = viser.ViserServer(host=self.host, port=self.port, label=self.label) - self.set_scene(self.scenes[self.scene_index % len(self.scenes)]) - - def next(self) -> None: - self.server.flush() - self.server.gui.reset() - self.server.scene.reset() - self.scene_index = (self.scene_index + 1) % len(self.scenes) - print(f"Viser server started at {self.host}:{self.port}") - self.set_scene(self.scenes[self.scene_index]) - - def set_scene(self, scene: AbstractScene) -> None: - num_frames = scene.get_number_of_iterations() - # print(scene.available_camera_types) - - self.server.gui.configure_theme(dark_mode=False, control_width="large") - - # TODO: Fix lighting. Environment map can help, but cannot be freely configured. - # self.server.scene.configure_environment_map( - # hdri="warehouse", - # background=False, - # background_intensity=0.25, - # environment_intensity=0.5, - # ) - - with self.server.gui.add_folder("Playback"): - server_playing = True - - gui_timestep = self.server.gui.add_slider( - "Timestep", - min=0, - max=num_frames - 1, - step=1, - initial_value=0, - disabled=True, - ) - gui_next_frame = self.server.gui.add_button("Next Frame", disabled=True) - gui_prev_frame = self.server.gui.add_button("Prev Frame", disabled=True) - gui_next_scene = self.server.gui.add_button("Next Scene", disabled=False) - gui_playing = self.server.gui.add_checkbox("Playing", True) - gui_framerate = self.server.gui.add_slider("FPS", min=1, max=60, step=0.1, initial_value=10) - gui_framerate_options = self.server.gui.add_button_group("FPS options", ("10", "20", "30", "60")) - - # Frame step buttons. - @gui_next_frame.on_click - def _(_) -> None: - gui_timestep.value = (gui_timestep.value + 1) % num_frames - - @gui_prev_frame.on_click - def _(_) -> None: - gui_timestep.value = (gui_timestep.value - 1) % num_frames - - @gui_next_scene.on_click - def _(_) -> None: - nonlocal server_playing - server_playing = False - - # Disable frame controls when we're playing. - @gui_playing.on_update - def _(_) -> None: - gui_timestep.disabled = gui_playing.value - gui_next_frame.disabled = gui_playing.value - gui_prev_frame.disabled = gui_playing.value - - # Set the framerate when we click one of the options. - @gui_framerate_options.on_click - def _(_) -> None: - gui_framerate.value = int(gui_framerate_options.value) - - prev_timestep = gui_timestep.value - - # Toggle frame visibility when the timestep slider changes. - @gui_timestep.on_update - def _(_) -> None: - nonlocal current_frame_handle, current_frame_handle, prev_timestep - current_timestep = gui_timestep.value - - start = time.time() - # with self.server.atomic(): - mew_frame_handle = self.server.scene.add_frame(f"/frame{gui_timestep.value}", show_axes=False) - if BOUNDING_BOX_TYPE == "mesh": - meshes = [] - for _, mesh in get_bounding_box_meshes(scene, gui_timestep.value).items(): - meshes.append(mesh) - self.server.scene.add_mesh_trimesh( - f"/frame{gui_timestep.value}/detections", - trimesh.util.concatenate(meshes), - visible=True, - ) - elif BOUNDING_BOX_TYPE == "lines": - lines, colors = get_bounding_box_outlines(scene, gui_timestep.value) - self.server.scene.add_line_segments( - f"/frame{gui_timestep.value}/detections", - points=lines, - colors=colors, - line_width=LINE_WIDTH, - ) - else: - raise ValueError(f"Unknown bounding box type: {BOUNDING_BOX_TYPE}") - - current_frame_handle.remove() - current_frame_handle = mew_frame_handle - - for camera_type in VISUALIZE_CAMERA_GUI: - if camera_type in scene.available_camera_types: - camera_gui_handles[camera_type].image = scene.get_camera_at_iteration( - gui_timestep.value, camera_type - ).image - - for camera_type in VISUALIZE_CAMERA_FRUSTUM: - if camera_type in scene.available_camera_types: - camera_position, camera_quaternion, camera = get_camera_values( - scene, camera_type, gui_timestep.value - ) - - camera_frustum_handles[camera_type].position = camera_position.array - camera_frustum_handles[camera_type].wxyz = camera_quaternion.q - camera_frustum_handles[camera_type].image = camera.image - - if LIDAR_AVAILABLE: - points, colors = get_lidar_points(scene, gui_timestep.value, LIDAR_TYPES) - gui_lidar.points = points - gui_lidar.colors = colors - - prev_timestep = current_timestep - - rendering_time = time.time() - start - sleep_time = 1.0 / gui_framerate.value - rendering_time - time.sleep(max(sleep_time, 0.0)) - self.server.flush() # Optional! - - # Load in frames. - current_frame_handle = self.server.scene.add_frame(f"/frame{gui_timestep.value}", show_axes=False) - self.server.scene.add_frame("/map", show_axes=False) - - camera_gui_handles: Dict[CameraType, viser.GuiImageHandle] = {} - camera_frustum_handles: Dict[CameraType, viser.CameraFrustumHandle] = {} - - for camera_type in VISUALIZE_CAMERA_GUI: - if camera_type in scene.available_camera_types: - with self.server.gui.add_folder(f"Camera {camera_type.serialize()}"): - camera_gui_handles[camera_type] = self.server.gui.add_image( - image=scene.get_camera_at_iteration(gui_timestep.value, camera_type).image, - label=camera_type.serialize(), - format="jpeg", - ) - - for camera_type in VISUALIZE_CAMERA_FRUSTUM: - if camera_type in scene.available_camera_types: - camera_position, camera_quaternion, camera = get_camera_values( - scene, camera_type, gui_timestep.value - ) - camera_frustum_handles[camera_type] = self.server.scene.add_camera_frustum( - f"camera_frustum_{camera_type.serialize()}", - fov=camera.metadata.fov_y, - aspect=camera.metadata.aspect_ratio, - scale=CAMERA_SCALE, - image=camera.image, - position=camera_position.array, - wxyz=camera_quaternion.q, - ) - - if LIDAR_AVAILABLE: - points, colors = get_lidar_points(scene, gui_timestep.value, LIDAR_TYPES) - gui_lidar = self.server.scene.add_point_cloud( - name="LiDAR", - points=points, - colors=colors, - point_size=LIDAR_POINT_SIZE, - point_shape="circle", - ) - - if MAP_AVAILABLE: - for name, mesh in get_map_meshes(scene).items(): - self.server.scene.add_mesh_trimesh(f"/map/{name}", mesh, visible=True) - - # centerlines, __, __, road_edges = get_map_lines(scene) - # for i, centerline in enumerate(centerlines): - # self.server.scene.add_line_segments( - # "/map/centerlines", - # centerlines, - # colors=[[BLACK.rgb]], - # line_width=LINE_WIDTH, - # ) - # self.server.scene.add_line_segments( - # "/map/left_boundary", - # left_boundaries, - # colors=[[TAB_10[2].rgb]], - # line_width=LINE_WIDTH, - # ) - # self.server.scene.add_line_segments( - # "/map/right_boundary",clear - # right_boundaries, - # colors=[[TAB_10[3].rgb]], - # line_width=LINE_WIDTH, - # ) - # print(centerlines.shape, road_edges.shape) - # self.server.scene.add_line_segments( - # "/map/road_edges", - # road_edges, - # colors=[[BLACK.rgb]], - # line_width=LINE_WIDTH, - # ) - - # Playback update loop. - prev_timestep = gui_timestep.value - while server_playing: - # Update the timestep if we're playing. - if gui_playing.value: - gui_timestep.value = (gui_timestep.value + 1) % num_frames - - self.server.flush() - self.next() diff --git a/d123/common/visualization/viser/utils.py b/d123/common/visualization/viser/utils.py deleted file mode 100644 index 561c3676..00000000 --- a/d123/common/visualization/viser/utils.py +++ /dev/null @@ -1,317 +0,0 @@ -from typing import List, Tuple - -import numpy as np -import numpy.typing as npt -import trimesh -from pyquaternion import Quaternion -from typing_extensions import Final - -# from d123.common.datatypes.sensor.camera_parameters import get_nuplan_camera_parameters -from d123.common.datatypes.sensor.camera import Camera, CameraType -from d123.common.datatypes.sensor.lidar import LiDARType -from d123.common.geometry.base import Point3D, StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 -from d123.common.geometry.line.polylines import Polyline3D -from d123.common.geometry.transform.se3 import convert_relative_to_absolute_points_3d_array -from d123.common.visualization.color.color import TAB_10, Color -from d123.common.visualization.color.config import PlotConfig -from d123.common.visualization.color.default import BOX_DETECTION_CONFIG, EGO_VEHICLE_CONFIG, MAP_SURFACE_CONFIG -from d123.dataset.maps.abstract_map import MapLayer -from d123.dataset.maps.abstract_map_objects import AbstractLane, AbstractSurfaceMapObject -from d123.dataset.scene.abstract_scene import AbstractScene - -# TODO: Refactor this file. -# TODO: Add general utilities for 3D primitives and mesh support. - -MAP_RADIUS: Final[float] = 500 -BRIGHTNESS_FACTOR: Final[float] = 1.0 - - -def configure_trimesh(mesh: trimesh.Trimesh, color: Color): - # base_color = [r / 255.0 for r in color.rgba] - mesh.visual.face_colors = color.rgba - - # pbr_material = trimesh.visual.material.PBRMaterial( - # baseColorFactor=base_color, # Your desired color (RGBA, 0-1 range) - # metallicFactor=0.0, # 0.0 = non-metallic (more matte) - # roughnessFactor=1.0, # 0.8 = quite rough (less shiny, 0=mirror, 1=completely rough) - # emissiveFactor=[0.0, 0.0, 0.0], # No emission - # alphaCutoff=0.9, # Alpha threshold for transparency - # doubleSided=True, # Single-sided material - # ) - # mesh.visual.material = pbr_material - # mesh.visual = mesh.visual.to_texture() - - return mesh - - -def bounding_box_to_trimesh(bbox: BoundingBoxSE3, plot_config: PlotConfig) -> trimesh.Trimesh: - - # Create a unit box centered at origin - box_mesh = trimesh.creation.box(extents=[bbox.length, bbox.width, bbox.height]) - - # Apply rotations in order: roll, pitch, yaw - box_mesh = box_mesh.apply_transform(trimesh.transformations.rotation_matrix(bbox.center.yaw, [0, 0, 1])) - box_mesh = box_mesh.apply_transform(trimesh.transformations.rotation_matrix(bbox.center.pitch, [0, 1, 0])) - box_mesh = box_mesh.apply_transform(trimesh.transformations.rotation_matrix(bbox.center.roll, [1, 0, 0])) - - # Apply translation - box_mesh = box_mesh.apply_translation([bbox.center.x, bbox.center.y, bbox.center.z]) - - return configure_trimesh(box_mesh, plot_config.fill_color) - - -def translate_bounding_box_se3(bounding_box_se3: BoundingBoxSE3, point_3d: Point3D) -> BoundingBoxSE3: - bounding_box_se3.center.x = bounding_box_se3.center.x - point_3d.x - bounding_box_se3.center.y = bounding_box_se3.center.y - point_3d.y - bounding_box_se3.center.z = bounding_box_se3.center.z - point_3d.z - return bounding_box_se3 - - -def get_bounding_box_meshes(scene: AbstractScene, iteration: int): - initial_ego_vehicle_state = scene.get_ego_state_at_iteration(0) - - ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) - box_detections = scene.get_box_detections_at_iteration(iteration) - # traffic_light_detections = scene.get_traffic_light_detections_at_iteration(iteration) - # map_api = scene.map_api - - output = {} - for box_detection in box_detections: - bbox: BoundingBoxSE3 = box_detection.bounding_box - bbox = translate_bounding_box_se3(bbox, initial_ego_vehicle_state.center_se3) - plot_config = BOX_DETECTION_CONFIG[box_detection.metadata.detection_type] - trimesh_box = bounding_box_to_trimesh(bbox, plot_config) - output[f"{box_detection.metadata.detection_type.serialize()}/{box_detection.metadata.track_token}"] = ( - trimesh_box - ) - - ego_bbox = translate_bounding_box_se3(ego_vehicle_state.bounding_box, initial_ego_vehicle_state.center_se3) - trimesh_box = bounding_box_to_trimesh(ego_bbox, EGO_VEHICLE_CONFIG) - output["ego"] = trimesh_box - return output - - -def get_map_meshes(scene: AbstractScene): - initial_ego_vehicle_state = scene.get_ego_state_at_iteration(0) - center = initial_ego_vehicle_state.center_se3 - map_layers = [ - MapLayer.LANE_GROUP, - MapLayer.LANE, - MapLayer.WALKWAY, - MapLayer.CROSSWALK, - MapLayer.CARPARK, - MapLayer.GENERIC_DRIVABLE, - ] - - map_objects_dict = scene.map_api.get_proximal_map_objects(center.point_2d, radius=MAP_RADIUS, layers=map_layers) - print(map_objects_dict.keys()) - output = {} - - for map_layer in map_objects_dict.keys(): - surface_meshes = [] - for map_surface in map_objects_dict[map_layer]: - map_surface: AbstractSurfaceMapObject - trimesh_mesh = map_surface.trimesh_mesh - if map_layer in [ - MapLayer.WALKWAY, - MapLayer.CROSSWALK, - MapLayer.GENERIC_DRIVABLE, - MapLayer.CARPARK, - ]: - # Push meshes up by a few centimeters to avoid overlap with the ground in the visualization. - trimesh_mesh.vertices -= Point3D(x=center.x, y=center.y, z=center.z - 0.1).array - else: - trimesh_mesh.vertices -= Point3D(x=center.x, y=center.y, z=center.z).array - - if not scene.log_metadata.map_has_z: - trimesh_mesh.vertices += Point3D( - x=0, y=0, z=center.z - initial_ego_vehicle_state.vehicle_parameters.height / 2 - ).array - - trimesh_mesh = configure_trimesh(trimesh_mesh, MAP_SURFACE_CONFIG[map_layer].fill_color) - surface_meshes.append(trimesh_mesh) - output[f"{map_layer.serialize()}"] = trimesh.util.concatenate(surface_meshes) - return output - - -def get_map_lines(scene: AbstractScene): - map_layers = [MapLayer.LANE, MapLayer.ROAD_EDGE] - initial_ego_vehicle_state = scene.get_ego_state_at_iteration(0) - center = initial_ego_vehicle_state.center_se3 - map_objects_dict = scene.map_api.get_proximal_map_objects(center.point_2d, radius=MAP_RADIUS, layers=map_layers) - - def polyline_to_segments(polyline: Polyline3D) -> npt.NDArray[np.float64]: - polyline_array = polyline.array - center.point_3d.array - polyline_array = polyline_array.reshape(-1, 1, 3) - polyline_array = np.concatenate([polyline_array[:-1], polyline_array[1:]], axis=1) - return polyline_array - - centerlines, right_boundaries, left_boundaries, road_edges = [], [], [], [] - for lane in map_objects_dict[MapLayer.LANE]: - lane: AbstractLane - - centerlines.append(polyline_to_segments(lane.centerline)) - right_boundaries.append(polyline_to_segments(lane.right_boundary)) - left_boundaries.append(polyline_to_segments(lane.left_boundary)) - - for road_edge in map_objects_dict[MapLayer.ROAD_EDGE]: - road_edges.append(polyline_to_segments(road_edge.polyline_3d)) - - centerlines = np.concatenate(centerlines, axis=0) - left_boundaries = np.concatenate(left_boundaries, axis=0) - right_boundaries = np.concatenate(right_boundaries, axis=0) - road_edges = np.concatenate(road_edges, axis=0) - - if not scene.log_metadata.map_has_z: - # If the map does not have a z-coordinate, we set it to the height of the ego vehicle. - centerlines[:, :, 2] += center.z - initial_ego_vehicle_state.vehicle_parameters.height / 2 - left_boundaries[:, :, 2] += center.z - initial_ego_vehicle_state.vehicle_parameters.height / 2 - right_boundaries[:, :, 2] += center.z - initial_ego_vehicle_state.vehicle_parameters.height / 2 - road_edges[:, :, 2] += center.z - initial_ego_vehicle_state.vehicle_parameters.height / 2 - - return centerlines, left_boundaries, right_boundaries, road_edges - - -def get_trimesh_from_boundaries( - left_boundary: Polyline3D, right_boundary: Polyline3D, resolution: float = 1.0 -) -> trimesh.Trimesh: - resolution = 1.0 # [m] - - average_length = (left_boundary.length + right_boundary.length) / 2 - num_samples = int(average_length // resolution) + 1 - left_boundary_array = _interpolate_polyline(left_boundary, num_samples=num_samples) - right_boundary_array = _interpolate_polyline(right_boundary, num_samples=num_samples) - return _create_lane_mesh_from_boundary_arrays(left_boundary_array, right_boundary_array) - - -def _interpolate_polyline(polyline_3d: Polyline3D, num_samples: int = 20) -> npt.NDArray[np.float64]: - if num_samples < 2: - num_samples = 2 - distances = np.linspace(0, polyline_3d.length, num=num_samples, endpoint=True, dtype=np.float64) - return polyline_3d.interpolate(distances) - - -def _create_lane_mesh_from_boundary_arrays( - left_boundary_array: npt.NDArray[np.float64], - right_boundary_array: npt.NDArray[np.float64], -) -> trimesh.Trimesh: - - # Ensure both polylines have the same number of points - if left_boundary_array.shape[0] != right_boundary_array.shape[0]: - raise ValueError("Both polylines must have the same number of points") - - n_points = left_boundary_array.shape[0] - - # Combine vertices from both polylines - vertices = np.vstack([left_boundary_array, right_boundary_array]) - - # Create faces by connecting corresponding points on the two polylines - faces = [] - for i in range(n_points - 1): - faces.append([i, i + n_points, i + 1]) - faces.append([i + 1, i + n_points, i + n_points + 1]) - - faces = np.array(faces) - mesh = trimesh.Trimesh(vertices=vertices, faces=faces) - mesh.visual.face_colors = MAP_SURFACE_CONFIG[MapLayer.LANE].fill_color.rgba - return mesh - - -def get_camera_values( - scene: AbstractScene, camera_type: CameraType, iteration: int -) -> Tuple[Point3D, Quaternion, Camera]: - initial_point_3d = scene.get_ego_state_at_iteration(0).center_se3.point_3d - rear_axle = scene.get_ego_state_at_iteration(iteration).rear_axle_se3 - - camera = scene.get_camera_at_iteration(iteration, camera_type) - - rear_axle_array = rear_axle.array - rear_axle_array[:3] -= initial_point_3d.array - rear_axle = StateSE3.from_array(rear_axle_array) - - camera_to_ego = camera.extrinsic # 4x4 transformation from camera to ego frame - - # Get the rotation matrix of the rear axle pose - from d123.common.geometry.transform.se3 import get_rotation_matrix - - ego_transform = np.eye(4, dtype=np.float64) - ego_transform[:3, :3] = get_rotation_matrix(rear_axle) - ego_transform[:3, 3] = rear_axle.point_3d.array - - camera_transform = ego_transform @ camera_to_ego - - # Camera transformation in ego frame - camera_position = Point3D(*camera_transform[:3, 3]) - camera_rotation = Quaternion(matrix=camera_transform[:3, :3]) - - return camera_position, camera_rotation, camera - - -def _get_ego_frame_pose(scene: AbstractScene, iteration: int) -> StateSE3: - - initial_point_3d = scene.get_ego_state_at_iteration(0).center_se3.point_3d - state_se3 = scene.get_ego_state_at_iteration(iteration).center_se3 - - state_se3.x = state_se3.x - initial_point_3d.x - state_se3.y = state_se3.y - initial_point_3d.y - state_se3.z = state_se3.z - initial_point_3d.z - - return state_se3 - - -def euler_to_quaternion_scipy(roll: float, pitch: float, yaw: float) -> npt.NDArray[np.float64]: - from scipy.spatial.transform import Rotation - - r = Rotation.from_euler("xyz", [roll, pitch, yaw], degrees=False) - quat = r.as_quat(scalar_first=True) - return quat - - -def get_lidar_points( - scene: AbstractScene, iteration: int, lidar_types: List[LiDARType] -) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.float32]]: - - initial_ego_vehicle_state = scene.get_ego_state_at_iteration(0) - current_ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) - - def float_to_rgb(values: npt.NDArray[np.float32], cmap_name: str = "viridis") -> npt.NDArray[np.float32]: - """ - Converts an array of float values to RGB colors using a matplotlib colormap. - Normalizes values to [0, 1] using min-max scaling. - Returns an array of shape (N, 3) with RGB values in [0, 1]. - """ - import matplotlib.pyplot as plt - - vmin = np.min(values) - vmax = np.max(values) - if vmax > vmin: - normed = (values - vmin) / (vmax - vmin) - else: - normed = np.zeros_like(values) - cmap = plt.get_cmap(cmap_name) - rgb = cmap(normed)[:, :3] # Ignore alpha channel - return rgb.astype(np.float32) - - points_ = [] - colors_ = [] - for lidar_idx, lidar_type in enumerate(lidar_types): - if lidar_type not in scene.available_lidar_types: - continue - lidar = scene.get_lidar_at_iteration(iteration, lidar_type) - - # 1. convert points to ego frame - points = lidar.xyz - - # 2. convert points to world frame - origin = current_ego_vehicle_state.rear_axle_se3 - points = convert_relative_to_absolute_points_3d_array(origin, points) - points = points - initial_ego_vehicle_state.center_se3.point_3d.array - points_.append(points) - colors_.append([TAB_10[lidar_idx % len(TAB_10)].rgb] * points.shape[0]) - # colors_.append(float_to_rgb(lidar.intensity, cmap_name="viridis")) - - points_ = np.concatenate(points_, axis=0) if points_ else np.empty((0, 3), dtype=np.float32) - colors_ = np.concatenate(colors_, axis=0) if colors_ else np.empty((0, 3), dtype=np.float32) - - return points_, colors_ diff --git a/d123/common/visualization/viser/utils_v2.py b/d123/common/visualization/viser/utils_v2.py deleted file mode 100644 index 5db06ab8..00000000 --- a/d123/common/visualization/viser/utils_v2.py +++ /dev/null @@ -1,99 +0,0 @@ -import numpy as np -import numpy.typing as npt - -# from d123.common.datatypes.sensor.camera_parameters import get_nuplan_camera_parameters -from d123.common.geometry.base import Point3D, Point3DIndex -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 -from d123.common.geometry.bounding_box.bounding_box_index import Corners3DIndex -from d123.common.geometry.transform.se3 import translate_body_frame -from d123.common.geometry.vector import Vector3D -from d123.common.visualization.color.default import BOX_DETECTION_CONFIG, EGO_VEHICLE_CONFIG -from d123.common.visualization.viser.utils import BRIGHTNESS_FACTOR -from d123.dataset.scene.abstract_scene import AbstractScene - -# TODO: Refactor this file. -# TODO: Add general utilities for 3D primitives and mesh support. - - -def _get_bounding_box_corners(bounding_box: BoundingBoxSE3) -> npt.NDArray[np.float64]: - """ - Get the vertices of a bounding box in 3D space. - """ - corner_extent_factors = { - Corners3DIndex.FRONT_LEFT_BOTTOM: Vector3D(+0.5, -0.5, -0.5), - Corners3DIndex.FRONT_RIGHT_BOTTOM: Vector3D(+0.5, +0.5, -0.5), - Corners3DIndex.BACK_RIGHT_BOTTOM: Vector3D(-0.5, +0.5, -0.5), - Corners3DIndex.BACK_LEFT_BOTTOM: Vector3D(-0.5, -0.5, -0.5), - Corners3DIndex.FRONT_LEFT_TOP: Vector3D(+0.5, -0.5, +0.5), - Corners3DIndex.FRONT_RIGHT_TOP: Vector3D(+0.5, +0.5, +0.5), - Corners3DIndex.BACK_RIGHT_TOP: Vector3D(-0.5, +0.5, +0.5), - Corners3DIndex.BACK_LEFT_TOP: Vector3D(-0.5, -0.5, +0.5), - } - corners = np.zeros((len(Corners3DIndex), len(Point3DIndex)), dtype=np.float64) - bounding_box_extent = np.array([bounding_box.length, bounding_box.width, bounding_box.height], dtype=np.float64) - for idx, vec in corner_extent_factors.items(): - vector_3d = Vector3D.from_array(bounding_box_extent * vec.array) - corners[idx] = translate_body_frame(bounding_box.center, vector_3d).point_3d.array - return corners - - -def _get_bounding_box_lines(bounding_box: BoundingBoxSE3) -> npt.NDArray[np.float64]: - """ - Get the edges of a bounding box in 3D space as a Polyline3D. - """ - corners = _get_bounding_box_corners(bounding_box) - index_pairs = [ - (Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.FRONT_RIGHT_BOTTOM), - (Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.BACK_RIGHT_BOTTOM), - (Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.BACK_LEFT_BOTTOM), - (Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.FRONT_LEFT_BOTTOM), - (Corners3DIndex.FRONT_LEFT_TOP, Corners3DIndex.FRONT_RIGHT_TOP), - (Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.BACK_RIGHT_TOP), - (Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.BACK_LEFT_TOP), - (Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.FRONT_LEFT_TOP), - (Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.FRONT_LEFT_TOP), - (Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.FRONT_RIGHT_TOP), - (Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.BACK_RIGHT_TOP), - (Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.BACK_LEFT_TOP), - ] - lines = np.zeros((len(index_pairs), 2, len(Point3DIndex)), dtype=np.float64) - for i, (start_idx, end_idx) in enumerate(index_pairs): - lines[i, 0] = corners[start_idx] - lines[i, 1] = corners[end_idx] - return lines - - -def translate_points_3d(points_3d: npt.NDArray[np.float64], point_3d: Point3D) -> npt.NDArray[np.float64]: - return points_3d - point_3d.array - - -def get_bounding_box_outlines(scene: AbstractScene, iteration: int): - - initial_ego_vehicle_state = scene.get_ego_state_at_iteration(0) - ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) - box_detections = scene.get_box_detections_at_iteration(iteration) - - lines = [] - colors = [] - for box_detection in box_detections: - bbox: BoundingBoxSE3 = box_detection.bounding_box_se3 - bbox_lines = _get_bounding_box_lines(bbox) - bbox_lines = translate_points_3d(bbox_lines, initial_ego_vehicle_state.center_se3.point_3d) - bbox_color = np.zeros(bbox_lines.shape, dtype=np.float32) - bbox_color[..., :] = ( - BOX_DETECTION_CONFIG[box_detection.metadata.detection_type] - .fill_color.set_brightness(BRIGHTNESS_FACTOR) - .rgb_norm - ) - - lines.append(bbox_lines) - colors.append(bbox_color) - - ego_bbox_lines = _get_bounding_box_lines(ego_vehicle_state.bounding_box_se3) - ego_bbox_lines = translate_points_3d(ego_bbox_lines, initial_ego_vehicle_state.center_se3.point_3d) - ego_bbox_color = np.zeros(ego_bbox_lines.shape, dtype=np.float32) - ego_bbox_color[..., :] = EGO_VEHICLE_CONFIG.fill_color.set_brightness(BRIGHTNESS_FACTOR).rgb_norm - - lines.append(ego_bbox_lines) - colors.append(ego_bbox_color) - return np.concatenate(lines, axis=0), np.concatenate(colors, axis=0) diff --git a/d123/dataset/arrow/__init__.py b/d123/dataset/arrow/__init__.py deleted file mode 100644 index 406e0d32..00000000 --- a/d123/dataset/arrow/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -class SceneBuilder: - pass diff --git a/d123/dataset/arrow/conversion.py b/d123/dataset/arrow/conversion.py deleted file mode 100644 index 68251f82..00000000 --- a/d123/dataset/arrow/conversion.py +++ /dev/null @@ -1,167 +0,0 @@ -# TODO: rename this file and potentially move somewhere more appropriate. - -import io -import os -from pathlib import Path -from typing import Dict, Optional - -import numpy as np -import numpy.typing as npt -import pyarrow as pa -from PIL import Image - -from d123.common.datatypes.detection.detection import ( - BoxDetection, - BoxDetectionMetadata, - BoxDetectionSE3, - BoxDetectionWrapper, - TrafficLightDetection, - TrafficLightDetectionWrapper, - TrafficLightStatus, -) -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import Camera, CameraMetadata -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.common.datatypes.vehicle_state.vehicle_parameters import VehicleParameters -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 -from d123.common.geometry.vector import Vector3D -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.maps.abstract_map import List - -DATASET_SENSOR_ROOT: Dict[str, Path] = { - "nuplan": Path(os.environ["NUPLAN_DATA_ROOT"]) / "nuplan-v1.1" / "sensor_blobs", - "carla": Path(os.environ["CARLA_DATA_ROOT"]) / "sensor_blobs", - # "av2-sensor": Path(os.environ["AV2_SENSOR_DATA_ROOT"]) / "sensor", -} - - -def get_timepoint_from_arrow_table(arrow_table: pa.Table, index: int) -> TimePoint: - return TimePoint.from_us(arrow_table["timestamp"][index].as_py()) - - -def get_ego_vehicle_state_from_arrow_table( - arrow_table: pa.Table, index: int, vehicle_parameters: VehicleParameters -) -> EgoStateSE3: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - return EgoStateSE3.from_array( - array=pa.array(arrow_table["ego_states"][index]).to_numpy(), - vehicle_parameters=vehicle_parameters, - timepoint=timepoint, - ) - - -def get_box_detections_from_arrow_table(arrow_table: pa.Table, index: int) -> BoxDetectionWrapper: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - box_detections: List[BoxDetection] = [] - - for detection_state, detection_velocity, detection_token, detection_type in zip( - arrow_table["detections_state"][index].as_py(), - arrow_table["detections_velocity"][index].as_py(), - arrow_table["detections_token"][index].as_py(), - arrow_table["detections_type"][index].as_py(), - ): - box_detection = BoxDetectionSE3( - metadata=BoxDetectionMetadata( - detection_type=DetectionType(detection_type), - timepoint=timepoint, - track_token=detection_token, - confidence=None, - ), - bounding_box_se3=BoundingBoxSE3.from_array(np.array(detection_state)), - velocity=Vector3D.from_array(np.array(detection_velocity)) if detection_velocity else None, - ) - box_detections.append(box_detection) - return BoxDetectionWrapper(box_detections=box_detections) - - -def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: int) -> TrafficLightDetectionWrapper: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - traffic_light_detections: List[TrafficLightDetection] = [] - - for lane_id, status in zip( - arrow_table["traffic_light_ids"][index].as_py(), arrow_table["traffic_light_types"][index].as_py() - ): - traffic_light_detection = TrafficLightDetection( - timepoint=timepoint, - lane_id=lane_id, - status=TrafficLightStatus(status), - ) - traffic_light_detections.append(traffic_light_detection) - - return TrafficLightDetectionWrapper(traffic_light_detections=traffic_light_detections) - - -def get_camera_from_arrow_table( - arrow_table: pa.Table, - index: int, - camera_metadata: CameraMetadata, - log_metadata: LogMetadata, -) -> Camera: - - table_data = arrow_table[camera_metadata.camera_type.serialize()][index].as_py() - extrinsic = arrow_table[f"{camera_metadata.camera_type.serialize()}_extrinsic"][index].as_py() - extrinsic = np.array(extrinsic).reshape((4, 4)) if extrinsic else None - - if table_data is None or extrinsic is None: - return None - - image: Optional[npt.NDArray[np.uint8]] = None - - if isinstance(table_data, str): - sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] - full_image_path = sensor_root / table_data - assert full_image_path.exists(), f"Camera file not found: {full_image_path}" - img = Image.open(full_image_path) - img.load() - image = np.asarray(img, dtype=np.uint8) - elif isinstance(table_data, bytes): - image = np.array(Image.open(io.BytesIO(table_data))) - else: - raise NotImplementedError("Only string file paths for camera data are supported.") - - return Camera( - metadata=camera_metadata, - image=image, - extrinsic=extrinsic, - ) - - -def get_lidar_from_arrow_table( - arrow_table: pa.Table, - index: int, - lidar_metadata: LiDARMetadata, - log_metadata: LogMetadata, -) -> LiDAR: - assert ( - lidar_metadata.lidar_type.serialize() in arrow_table.schema.names - ), f'"{lidar_metadata.lidar_type.serialize()}" field not found in Arrow table schema.' - lidar_data = arrow_table[lidar_metadata.lidar_type.serialize()][index].as_py() - if isinstance(lidar_data, str): - sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] - full_lidar_path = sensor_root / lidar_data - assert full_lidar_path.exists(), f"LiDAR file not found: {full_lidar_path}" - - # NOTE: We move data specific import into if-else block, to avoid data specific import errors - if log_metadata.dataset == "nuplan": - from d123.dataset.dataset_specific.nuplan.load_sensor import load_nuplan_lidar_from_path - - lidar = load_nuplan_lidar_from_path(full_lidar_path, lidar_metadata) - elif log_metadata.dataset == "carla": - from d123.dataset.dataset_specific.carla.load_sensor import load_carla_lidar_from_path - - lidar = load_carla_lidar_from_path(full_lidar_path, lidar_metadata) - elif log_metadata.dataset == "wopd": - raise NotImplementedError - else: - raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") - - else: - if log_metadata.dataset == "wopd": - lidar_data = np.array(lidar_data, dtype=np.float64) - lidar = LiDAR(metadata=lidar_metadata, point_cloud=lidar_data.T) - else: - raise NotImplementedError("Only string file paths for lidar data are supported.") - - return lidar diff --git a/d123/dataset/arrow/helper.py b/d123/dataset/arrow/helper.py deleted file mode 100644 index 6f68c152..00000000 --- a/d123/dataset/arrow/helper.py +++ /dev/null @@ -1,16 +0,0 @@ -from pathlib import Path -from typing import Union - -import pyarrow as pa - - -def open_arrow_table(arrow_file_path: Union[str, Path]) -> pa.Table: - with pa.memory_map(str(arrow_file_path), "rb") as source: - table: pa.Table = pa.ipc.open_file(source).read_all() - return table - - -def write_arrow_table(table: pa.Table, arrow_file_path: Union[str, Path]) -> None: - with pa.OSFile(str(arrow_file_path), "wb") as sink: - with pa.ipc.new_file(sink, table.schema) as writer: - writer.write_table(table) diff --git a/d123/dataset/conversion/map/opendrive/opendrive_map_conversion.py b/d123/dataset/conversion/map/opendrive/opendrive_map_conversion.py deleted file mode 100644 index 56dfa65c..00000000 --- a/d123/dataset/conversion/map/opendrive/opendrive_map_conversion.py +++ /dev/null @@ -1,422 +0,0 @@ -import logging -import os -import warnings -from pathlib import Path -from typing import Dict, Final, List - -import geopandas as gpd -import numpy as np -import pandas as pd -import shapely -from shapely.ops import polygonize, unary_union - -from d123.dataset.conversion.map.opendrive.parser.opendrive import Junction, OpenDrive -from d123.dataset.conversion.map.opendrive.utils.collection import collect_element_helpers -from d123.dataset.conversion.map.opendrive.utils.id_mapping import IntIDMapping -from d123.dataset.conversion.map.opendrive.utils.lane_helper import ( - OpenDriveLaneGroupHelper, - OpenDriveLaneHelper, -) -from d123.dataset.conversion.map.opendrive.utils.objects_helper import ( - OpenDriveObjectHelper, -) -from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import split_line_geometry_by_max_length -from d123.dataset.conversion.map.road_edge.road_edge_3d_utils import get_road_edges_3d_from_gdf -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType - -logger = logging.getLogger(__name__) -D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) - -MAX_ROAD_EDGE_LENGTH: Final[float] = 100.0 # [m] - - -def convert_from_xodr( - xordr_file: Path, - map_name: str, - interpolation_step_size: float, - connection_distance_threshold: float, -) -> None: - - opendrive = OpenDrive.parse_from_file(xordr_file) - - _, junction_dict, lane_helper_dict, lane_group_helper_dict, object_helper_dict = collect_element_helpers( - opendrive, interpolation_step_size, connection_distance_threshold - ) - - # Collect data frames and store - dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} - dataframes[MapLayer.LANE] = _extract_lane_dataframe(lane_group_helper_dict) - dataframes[MapLayer.LANE_GROUP] = _extract_lane_group_dataframe(lane_group_helper_dict) - dataframes[MapLayer.WALKWAY] = _extract_walkways_dataframe(lane_helper_dict) - dataframes[MapLayer.CARPARK] = _extract_carpark_dataframe(lane_helper_dict) - dataframes[MapLayer.GENERIC_DRIVABLE] = _extract_generic_drivable_dataframe(lane_helper_dict) - dataframes[MapLayer.INTERSECTION] = _extract_intersections_dataframe(junction_dict, lane_group_helper_dict) - dataframes[MapLayer.CROSSWALK] = _extract_crosswalk_dataframe(object_helper_dict) - - _convert_ids_to_int( - dataframes[MapLayer.LANE], - dataframes[MapLayer.WALKWAY], - dataframes[MapLayer.CARPARK], - dataframes[MapLayer.GENERIC_DRIVABLE], - dataframes[MapLayer.LANE_GROUP], - dataframes[MapLayer.INTERSECTION], - dataframes[MapLayer.CROSSWALK], - ) - dataframes[MapLayer.ROAD_EDGE] = _extract_road_edge_df( - dataframes[MapLayer.LANE], - dataframes[MapLayer.CARPARK], - dataframes[MapLayer.GENERIC_DRIVABLE], - dataframes[MapLayer.LANE_GROUP], - ) - dataframes[MapLayer.ROAD_LINE] = _extract_road_line_df( - dataframes[MapLayer.LANE], - dataframes[MapLayer.LANE_GROUP], - ) - map_file_name = D123_MAPS_ROOT / f"{map_name}.gpkg" - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="'crs' was not provided") - for layer, gdf in dataframes.items(): - gdf.to_file(map_file_name, layer=layer.serialize(), driver="GPKG", mode="a") - - -def _extract_lane_dataframe(lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper]) -> gpd.GeoDataFrame: - - ids = [] - lane_group_ids = [] - speed_limits_mps = [] - predecessor_ids = [] - successor_ids = [] - left_boundaries = [] - right_boundaries = [] - left_lane_ids = [] - right_lane_ids = [] - baseline_paths = [] - geometries = [] - - for lane_group_helper in lane_group_helper_dict.values(): - lane_group_id = lane_group_helper.lane_group_id - lane_helpers = lane_group_helper.lane_helpers - num_lanes = len(lane_helpers) - # NOTE: Lanes are going left to right, ie. inner to outer - for lane_idx, lane_helper in enumerate(lane_helpers): - ids.append(lane_helper.lane_id) - lane_group_ids.append(lane_group_id) - speed_limits_mps.append(lane_helper.speed_limit_mps) - predecessor_ids.append(lane_helper.predecessor_lane_ids) - successor_ids.append(lane_helper.successor_lane_ids) - left_boundaries.append(shapely.LineString(lane_helper.inner_polyline_3d)) - right_boundaries.append(shapely.LineString(lane_helper.outer_polyline_3d)) - baseline_paths.append(shapely.LineString(lane_helper.center_polyline_3d)) - geometries.append(lane_helper.shapely_polygon) - left_lane_id = lane_helpers[lane_idx - 1].lane_id if lane_idx > 0 else None - right_lane_id = lane_helpers[lane_idx + 1].lane_id if lane_idx < num_lanes - 1 else None - left_lane_ids.append(left_lane_id) - right_lane_ids.append(right_lane_id) - - data = pd.DataFrame( - { - "id": ids, - "lane_group_id": lane_group_ids, - "speed_limit_mps": speed_limits_mps, - "predecessor_ids": predecessor_ids, - "successor_ids": successor_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "left_lane_id": left_lane_ids, - "right_lane_id": right_lane_ids, - "baseline_path": baseline_paths, - } - ) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_lane_group_dataframe(lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper]) -> gpd.GeoDataFrame: - - ids = [] - lane_ids = [] - predecessor_lane_group_ids = [] - successor_lane_group_ids = [] - intersection_ids = [] - left_boundaries = [] - right_boundaries = [] - geometries = [] - - for lane_group_helper in lane_group_helper_dict.values(): - lane_group_helper: OpenDriveLaneGroupHelper - ids.append(lane_group_helper.lane_group_id) - lane_ids.append([lane_helper.lane_id for lane_helper in lane_group_helper.lane_helpers]) - predecessor_lane_group_ids.append(lane_group_helper.predecessor_lane_group_ids) - successor_lane_group_ids.append(lane_group_helper.successor_lane_group_ids) - intersection_ids.append(lane_group_helper.junction_id) - left_boundaries.append(shapely.LineString(lane_group_helper.inner_polyline_3d)) - right_boundaries.append(shapely.LineString(lane_group_helper.outer_polyline_3d)) - geometries.append(lane_group_helper.shapely_polygon) - - data = pd.DataFrame( - { - "id": ids, - "lane_ids": lane_ids, - "predecessor_ids": predecessor_lane_group_ids, - "successor_ids": successor_lane_group_ids, - "intersection_id": intersection_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - - return gdf - - -def _extract_walkways_dataframe(lane_helper_dict: Dict[str, OpenDriveLaneHelper]) -> gpd.GeoDataFrame: - - ids = [] - left_boundaries = [] - right_boundaries = [] - outlines = [] - geometries = [] - - for lane_helper in lane_helper_dict.values(): - if lane_helper.type == "sidewalk": - ids.append(lane_helper.lane_id) - left_boundaries.append(shapely.LineString(lane_helper.inner_polyline_3d)) - right_boundaries.append(shapely.LineString(lane_helper.outer_polyline_3d)) - outlines.append(shapely.LineString(lane_helper.outline_polyline_3d)) - geometries.append(lane_helper.shapely_polygon) - - data = pd.DataFrame( - { - "id": ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "outline": outlines, - } - ) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_carpark_dataframe(lane_helper_dict: Dict[str, OpenDriveLaneHelper]) -> gpd.GeoDataFrame: - - ids = [] - left_boundaries = [] - right_boundaries = [] - outlines = [] - geometries = [] - - for lane_helper in lane_helper_dict.values(): - if lane_helper.type == "parking": - ids.append(lane_helper.lane_id) - left_boundaries.append(shapely.LineString(lane_helper.inner_polyline_3d)) - right_boundaries.append(shapely.LineString(lane_helper.outer_polyline_3d)) - outlines.append(shapely.LineString(lane_helper.outline_polyline_3d)) - geometries.append(lane_helper.shapely_polygon) - - data = pd.DataFrame( - { - "id": ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "outline": outlines, - } - ) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_generic_drivable_dataframe(lane_helper_dict: Dict[str, OpenDriveLaneHelper]) -> gpd.GeoDataFrame: - - ids = [] - left_boundaries = [] - right_boundaries = [] - outlines = [] - geometries = [] - - for lane_helper in lane_helper_dict.values(): - if lane_helper.type in ["none", "border", "bidirectional"]: - ids.append(lane_helper.lane_id) - left_boundaries.append(shapely.LineString(lane_helper.inner_polyline_3d)) - right_boundaries.append(shapely.LineString(lane_helper.outer_polyline_3d)) - outlines.append(shapely.LineString(lane_helper.outline_polyline_3d)) - geometries.append(lane_helper.shapely_polygon) - - data = pd.DataFrame( - { - "id": ids, - "left_boundary": left_boundaries, - "right_boundary": left_boundaries, - "outline": outlines, - } - ) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_intersections_dataframe( - junction_dict: Dict[str, Junction], - lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper], -) -> gpd.GeoDataFrame: - def _find_lane_group_helpers_with_junction_id(junction_id: int) -> List[OpenDriveLaneGroupHelper]: - return [ - lane_group_helper - for lane_group_helper in lane_group_helper_dict.values() - if lane_group_helper.junction_id == junction_id - ] - - ids = [] - lane_group_ids = [] - geometries = [] - for junction in junction_dict.values(): - lane_group_helpers = _find_lane_group_helpers_with_junction_id(junction.id) - lane_group_ids_ = [lane_group_helper.lane_group_id for lane_group_helper in lane_group_helpers] - if len(lane_group_ids_) == 0: - logger.debug(f"Skipping Junction {junction.id} without lane groups!") - continue - - polygon = extract_exteriors_polygon(lane_group_helpers) - ids.append(junction.id) - lane_group_ids.append(lane_group_ids_) - geometries.append(polygon) - - data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids}) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_crosswalk_dataframe(object_helper_dict: Dict[int, OpenDriveObjectHelper]) -> gpd.GeoDataFrame: - ids = [] - outlines = [] - geometries = [] - for object_helper in object_helper_dict.values(): - ids.append(object_helper.object_id) - outlines.append(shapely.LineString(object_helper.outline_3d)) - geometries.append(object_helper.shapely_polygon) - - data = pd.DataFrame({"id": ids, "outline": outlines}) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _convert_ids_to_int( - lane_df: gpd.GeoDataFrame, - walkways_df: gpd.GeoDataFrame, - carpark_df: gpd.GeoDataFrame, - generic_drivable_area_df: gpd.GeoDataFrame, - lane_group_df: gpd.GeoDataFrame, - intersections_df: gpd.GeoDataFrame, - crosswalk_df: gpd.GeoDataFrame, -) -> None: - - # NOTE: intersection and crosswalk ids are already integers - - # initialize id mappings - lane_id_mapping = IntIDMapping.from_series(lane_df["id"]) - walkway_id_mapping = IntIDMapping.from_series(walkways_df["id"]) - carpark_id_mapping = IntIDMapping.from_series(carpark_df["id"]) - generic_drivable_id_mapping = IntIDMapping.from_series(generic_drivable_area_df["id"]) - lane_group_id_mapping = IntIDMapping.from_series(lane_group_df["id"]) - - # Adjust cross reference in lane_df and lane_group_df - lane_df["lane_group_id"] = lane_df["lane_group_id"].map(lane_group_id_mapping.str_to_int) - lane_group_df["lane_ids"] = lane_group_df["lane_ids"].apply(lambda x: lane_id_mapping.map_list(x)) - - # Adjust predecessor/successor in lane_df and lane_group_df - for column in ["predecessor_ids", "successor_ids"]: - lane_df[column] = lane_df[column].apply(lambda x: lane_id_mapping.map_list(x)) - lane_group_df[column] = lane_group_df[column].apply(lambda x: lane_group_id_mapping.map_list(x)) - - for column in ["left_lane_id", "right_lane_id"]: - lane_df[column] = lane_df[column].apply( - lambda x: str(lane_id_mapping.str_to_int[x]) if pd.notna(x) and x is not None else x - ) - - lane_df["id"] = lane_df["id"].map(lane_id_mapping.str_to_int) - walkways_df["id"] = walkways_df["id"].map(walkway_id_mapping.str_to_int) - carpark_df["id"] = carpark_df["id"].map(carpark_id_mapping.str_to_int) - generic_drivable_area_df["id"] = generic_drivable_area_df["id"].map(generic_drivable_id_mapping.str_to_int) - lane_group_df["id"] = lane_group_df["id"].map(lane_group_id_mapping.str_to_int) - - intersections_df["lane_group_ids"] = intersections_df["lane_group_ids"].apply( - lambda x: lane_group_id_mapping.map_list(x) - ) - - -def _extract_road_line_df( - lane_df: gpd.GeoDataFrame, - lane_group_df: gpd.GeoDataFrame, -) -> None: - - lane_group_on_intersection = { - lane_group_id: str(intersection_id) != "nan" - for lane_group_id, intersection_id in zip(lane_group_df.id.tolist(), lane_group_df.intersection_id.tolist()) - } - ids = [] - road_line_types = [] - geometries = [] - - running_id = 0 - for lane_row in lane_df.itertuples(): - on_intersection = lane_group_on_intersection.get(lane_row.lane_group_id, False) - if on_intersection: - # Skip road lines on intersections - continue - if str(lane_row.right_lane_id) in ["nan", "None"]: - # This is a boundary lane, e.g. a border or sidewalk - ids.append(running_id) - road_line_types.append(int(RoadLineType.SOLID_WHITE)) - geometries.append(lane_row.right_boundary) - running_id += 1 - else: - # This is a regular lane - ids.append(running_id) - road_line_types.append(int(RoadLineType.DASHED_WHITE)) - geometries.append(lane_row.right_boundary) - running_id += 1 - if str(lane_row.left_lane_id) in ["nan", "None"]: - # This is a boundary lane, e.g. a border or sidewalk - ids.append(running_id) - road_line_types.append(int(RoadLineType.DASHED_WHITE)) - geometries.append(lane_row.left_boundary) - running_id += 1 - - data = pd.DataFrame({"id": ids, "road_line_type": road_line_types}) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def _extract_road_edge_df( - lane_df: gpd.GeoDataFrame, - carpark_df: gpd.GeoDataFrame, - generic_drivable_area_df: gpd.GeoDataFrame, - lane_group_df: gpd.GeoDataFrame, -) -> None: - road_edges = get_road_edges_3d_from_gdf(lane_df, carpark_df, generic_drivable_area_df, lane_group_df) - road_edges = split_line_geometry_by_max_length(road_edges, MAX_ROAD_EDGE_LENGTH) - - ids = np.arange(len(road_edges), dtype=np.int64).tolist() - # TODO @DanielDauner: Figure out if other types should/could be assigned here. - road_edge_types = [int(RoadEdgeType.ROAD_EDGE_BOUNDARY)] * len(road_edges) - geometries = road_edges - return gpd.GeoDataFrame(pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}), geometry=geometries) - - -# TODO: move this somewhere else and improve -def extract_exteriors_polygon(lane_group_helpers: List[OpenDriveLaneGroupHelper]) -> shapely.Polygon: - - # Step 1: Extract all boundary line segments - all_polygons = [] - for lane_group_helper in lane_group_helpers: - all_polygons.append(lane_group_helper.shapely_polygon) - - # Step 2: Merge all boundaries and extract the enclosed polygons - # try: - merged_boundaries = unary_union(all_polygons) - # except Exception as e: - # warnings.warn(f"Topological error during polygon union: {e}") - # print([(helper.lane_group_id, poly.is_valid) for poly, helper in zip(all_polygons, lane_group_helpers)]) - # merged_boundaries = unary_union([poly for poly in all_polygons if poly.is_valid]) - - # Step 3: Generate polygons from the merged lines - polygons = list(polygonize(merged_boundaries)) - - # Step 4: Select the polygon that represents the intersection - # Usually it's the largest polygon - if len(polygons) == 1: - return polygons[0] - else: - # Take the largest polygon if there are multiple - return max(polygons, key=lambda p: p.area) diff --git a/d123/dataset/conversion/map/opendrive/utils/id_mapping.py b/d123/dataset/conversion/map/opendrive/utils/id_mapping.py deleted file mode 100644 index 78c0572f..00000000 --- a/d123/dataset/conversion/map/opendrive/utils/id_mapping.py +++ /dev/null @@ -1,26 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import Dict, List, Optional - -import pandas as pd - - -@dataclass -class IntIDMapping: - - str_to_int: Dict[str, int] - - def __post_init__(self): - self.int_to_str = {v: k for k, v in self.str_to_int.items()} - - @classmethod - def from_series(cls, series: pd.Series) -> IntIDMapping: - unique_ids = series.unique() - str_to_int = {str_id: idx for idx, str_id in enumerate(unique_ids)} - return IntIDMapping(str_to_int) - - def map_list(self, id_list: Optional[List[str]]) -> pd.Series: - if id_list is None: - return [] - return [self.str_to_int.get(id_str, -1) for id_str in id_list] diff --git a/d123/dataset/conversion/map/road_edge/road_edge_2d_utils.py b/d123/dataset/conversion/map/road_edge/road_edge_2d_utils.py deleted file mode 100644 index f0eb580d..00000000 --- a/d123/dataset/conversion/map/road_edge/road_edge_2d_utils.py +++ /dev/null @@ -1,60 +0,0 @@ -from typing import Final, List, Union - -import numpy as np -import shapely -from shapely import LinearRing, LineString, Polygon, union_all - -ROAD_EDGE_BUFFER: Final[float] = 0.05 - - -def get_road_edge_linear_rings(drivable_polygons: List[Polygon]) -> List[LinearRing]: - - def _polygon_to_linear_rings(polygon: Polygon) -> List[LinearRing]: - assert polygon.geom_type == "Polygon" - linear_ring_list = [] - linear_ring_list.append(polygon.exterior) - for interior in polygon.interiors: - linear_ring_list.append(interior) - return linear_ring_list - - union_polygon = union_all([polygon.buffer(ROAD_EDGE_BUFFER, join_style=2) for polygon in drivable_polygons]).buffer( - -ROAD_EDGE_BUFFER, join_style=2 - ) - - linear_ring_list = [] - if union_polygon.geom_type == "Polygon": - for polyline in _polygon_to_linear_rings(union_polygon): - linear_ring_list.append(LinearRing(polyline)) - elif union_polygon.geom_type == "MultiPolygon": - for polygon in union_polygon.geoms: - for polyline in _polygon_to_linear_rings(polygon): - linear_ring_list.append(LinearRing(polyline)) - - return linear_ring_list - - -def split_line_geometry_by_max_length( - geometries: List[Union[LineString, LinearRing]], - max_length_meters: float, -) -> List[LineString]: - # TODO: move somewhere more appropriate or implement in Polyline2D, PolylineSE2, etc. - - if not isinstance(geometries, list): - geometries = [geometries] - - all_segments = [] - for geom in geometries: - if geom.length <= max_length_meters: - all_segments.append(LineString(geom.coords)) - continue - - num_segments = int(np.ceil(geom.length / max_length_meters)) - segment_length = geom.length / num_segments - - for i in range(num_segments): - start_dist = i * segment_length - end_dist = min((i + 1) * segment_length, geom.length) - segment = shapely.ops.substring(geom, start_dist, end_dist) - all_segments.append(segment) - - return all_segments diff --git a/d123/dataset/conversion/map/road_edge/road_edge_3d_utils.py b/d123/dataset/conversion/map/road_edge/road_edge_3d_utils.py deleted file mode 100644 index 93a34526..00000000 --- a/d123/dataset/conversion/map/road_edge/road_edge_3d_utils.py +++ /dev/null @@ -1,298 +0,0 @@ -import logging -from collections import defaultdict -from typing import Dict, List, Set - -import geopandas as gpd -import networkx as nx -import numpy as np -import numpy.typing as npt -import shapely -from shapely.geometry import LineString - -from d123.common.geometry.base import Point3DIndex -from d123.common.geometry.occupancy_map import OccupancyMap2D -from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import get_road_edge_linear_rings - -logger = logging.getLogger(__name__) - - -def get_road_edges_3d_from_generic_drivable_area_df(generic_drivable_area_df: gpd.GeoDataFrame) -> List[LineString]: - """ - Extracts 3D road edges from the generic drivable area GeoDataFrame. - """ - # NOTE: this is a simplified version that assumes the generic drivable area covers all areas. - # This is the case for argoverse 2. - road_edges_2d = get_road_edge_linear_rings(generic_drivable_area_df.geometry.tolist()) - outlines = generic_drivable_area_df.outline.tolist() - non_conflicting_road_edges = lift_road_edges_to_3d(road_edges_2d, outlines) - return non_conflicting_road_edges - - -def get_road_edges_3d_from_gdf( - lane_df: gpd.GeoDataFrame, - carpark_df: gpd.GeoDataFrame, - generic_drivable_area_df: gpd.GeoDataFrame, - lane_group_df: gpd.GeoDataFrame, -) -> List[LineString]: - - # 1. Find conflicting lane groups, e.g. groups of lanes that overlap in 2D but have different Z-values (bridges) - conflicting_lane_groups = _get_conflicting_lane_groups(lane_group_df, lane_df) - - # 2. Extract road edges in 2D (including conflicting lane groups) - drivable_polygons = ( - lane_group_df.geometry.tolist() + carpark_df.geometry.tolist() + generic_drivable_area_df.geometry.tolist() - ) - road_edges_2d = get_road_edge_linear_rings(drivable_polygons) - - # 3. Collect 3D boundaries of non-conflicting lane groups and other drivable areas - non_conflicting_boundaries: List[LineString] = [] - for lane_group_id, lane_group_helper in lane_group_df.iterrows(): - if lane_group_id not in conflicting_lane_groups.keys(): - non_conflicting_boundaries.append(lane_group_helper["left_boundary"]) - non_conflicting_boundaries.append(lane_group_helper["right_boundary"]) - for outline in carpark_df.outline.tolist() + generic_drivable_area_df.outline.tolist(): - non_conflicting_boundaries.append(outline) - - # 4. Lift road edges to 3D using the boundaries of non-conflicting elements - non_conflicting_road_edges = lift_road_edges_to_3d(road_edges_2d, non_conflicting_boundaries) - - # 5. Add road edges from conflicting lane groups - resolved_road_edges = _resolve_conflicting_lane_groups(conflicting_lane_groups, lane_group_df) - - all_road_edges = non_conflicting_road_edges + resolved_road_edges - - return all_road_edges - - -def _get_conflicting_lane_groups(lane_group_df: gpd.GeoDataFrame, lane_df: gpd.GeoDataFrame) -> Dict[int, List[int]]: - """ - Even more optimized version using vectorized operations where possible. - """ - Z_THRESHOLD = 5.0 # [m] Z-value threshold for conflict detection - - # Convert to regular dictionaries for faster access - lane_group_dict = lane_group_df.set_index("id").to_dict("index") - lane_baseline_dict = dict(zip(lane_df.id.values, lane_df.baseline_path.values)) - - # Pre-compute all centerlines - centerlines_cache = {} - polygons = [] - ids = [] - - for lane_group_id, data in lane_group_dict.items(): - geometry = data["geometry"] - lane_ids = data["lane_ids"] - - # Vectorized centerline computation - centerlines = [np.array(lane_baseline_dict[lane_id].coords, dtype=np.float64) for lane_id in lane_ids] - centerlines_3d = np.concatenate(centerlines, axis=0) - - centerlines_cache[lane_group_id] = centerlines_3d - polygons.append(geometry) - ids.append(lane_group_id) - - occupancy_map = OccupancyMap2D(polygons, ids) - conflicting_lane_groups: Dict[int, List[int]] = defaultdict(list) - processed_pairs = set() - - for i, lane_group_id in enumerate(ids): - lane_group_polygon = polygons[i] - lane_group_centerlines = centerlines_cache[lane_group_id] - - # Get all intersecting geometries at once - intersecting_ids = occupancy_map.intersects(lane_group_polygon) - intersecting_ids.remove(lane_group_id) - - for intersecting_id in intersecting_ids: - pair_key = tuple(sorted([lane_group_id, intersecting_id])) - if pair_key in processed_pairs: - continue - processed_pairs.add(pair_key) - - intersecting_geometry = occupancy_map[intersecting_id] - if intersecting_geometry.geom_type != "Polygon": - continue - try: - # Compute actual intersection for better centroid - intersection = lane_group_polygon.intersection(intersecting_geometry) - except shapely.errors.GEOSException as e: - logger.debug(f"Error computing intersection for {pair_key}: {e}") - continue - - if intersection.is_empty: - continue - - intersection_centroid = np.array(intersection.centroid.coords[0], dtype=np.float64) - intersecting_centerlines = centerlines_cache[intersecting_id] - - z_at_intersecting = _get_nearest_z_from_points_3d(intersecting_centerlines, intersection_centroid) - z_at_lane_group = _get_nearest_z_from_points_3d(lane_group_centerlines, intersection_centroid) - - if np.abs(z_at_lane_group - z_at_intersecting) >= Z_THRESHOLD: - conflicting_lane_groups[lane_group_id].append(intersecting_id) - conflicting_lane_groups[intersecting_id].append(lane_group_id) - - return conflicting_lane_groups - - -def lift_road_edges_to_3d( - road_edges_2d: List[shapely.LinearRing], - boundaries: List[LineString], - max_distance: float = 0.01, -) -> List[LineString]: - """ - Even faster version using batch processing and optimized data structures. - """ - if not road_edges_2d or not boundaries: - return [] - - # 1. Build comprehensive spatial index with all boundary segments - boundary_segments = [] - - for boundary_idx, boundary in enumerate(boundaries): - coords = np.array(boundary.coords, dtype=np.float64).reshape(-1, 1, 3) - segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1) - boundary_segments.append(segment_coords_boundary) - - boundary_segments = np.concatenate(boundary_segments, axis=0) - boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments) - - occupancy_map = OccupancyMap2D(boundary_segment_linestrings) - - road_edges_3d = [] - for linear_ring in road_edges_2d: - points_2d = np.array(linear_ring.coords, dtype=np.float64) - points_3d = np.zeros((len(points_2d), 3), dtype=np.float64) - points_3d[:, :2] = points_2d - - # 3. Batch query for all points - query_points = shapely.creation.points(points_2d) - results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True) - - for query_idx, geometry_idx in zip(*results): - query_point = query_points[query_idx] - segment_coords = boundary_segments[geometry_idx] - best_z = _interpolate_z_on_segment(query_point, segment_coords) - points_3d[query_idx, 2] = best_z - - continuous_segments = _find_continuous_segments(np.array(results[0])) - - for segment_indices in continuous_segments: - if len(segment_indices) >= 2: - segment_points = points_3d[segment_indices] - road_edges_3d.append(LineString(segment_points)) - - return road_edges_3d - - -def _get_nearest_z_from_points_3d(points_3d: npt.NDArray[np.float64], query_point: npt.NDArray[np.float64]) -> float: - assert points_3d.ndim == 2 and points_3d.shape[1] == len( - Point3DIndex - ), "points_3d must be a 2D array with shape (N, 3)" - distances = np.linalg.norm(points_3d[..., Point3DIndex.XY] - query_point[..., Point3DIndex.XY], axis=1) - closest_point = points_3d[np.argmin(distances)] - return closest_point[2] - - -def _interpolate_z_on_segment(point: shapely.Point, segment_coords: npt.NDArray[np.float64]) -> float: - """Interpolate Z coordinate along a 3D line segment.""" - p1, p2 = segment_coords[0], segment_coords[1] - - # Project point onto segment - segment_vec = p2[:2] - p1[:2] - point_vec = np.array([point.x, point.y]) - p1[:2] - - # Handle degenerate case - segment_length_sq = np.dot(segment_vec, segment_vec) - if segment_length_sq == 0: - return p1[2] - - # Calculate projection parameter - t = np.dot(point_vec, segment_vec) / segment_length_sq - t = np.clip(t, 0, 1) # Clamp to segment bounds - - # Interpolate Z - return p1[2] + t * (p2[2] - p1[2]) - - -def _find_continuous_segments(indices: np.ndarray) -> List[np.ndarray]: - """Vectorized version of finding continuous segments.""" - if len(indices) == 0: - return [] - - # Find breaks in continuity - breaks = np.where(np.diff(indices) != 1)[0] + 1 - segments = np.split(indices, breaks) - - # Filter segments with at least 2 points - return [seg for seg in segments if len(seg) >= 2] - - -def _resolve_conflicting_lane_groups( - conflicting_lane_groups: Dict[int, List[int]], lane_group_df: gpd.GeoDataFrame -) -> List[LineString]: - - # Split conflicting lane groups into non-conflicting sets for further merging - non_conflicting_sets = _create_non_conflicting_sets(conflicting_lane_groups) - - road_edges_3d: List[LineString] = [] - for non_conflicting_set in non_conflicting_sets: - - # Collect 2D polygons of non-conflicting lane group set - set_lane_group_rows = lane_group_df[lane_group_df.id.isin(non_conflicting_set)] - connected_lane_group = [] - for row in set_lane_group_rows.itertuples(): - connected_lane_group.extend(row.predecessor_ids) - connected_lane_group.extend(row.successor_ids) - connected_lane_group_rows = lane_group_df[lane_group_df.id.isin(connected_lane_group)] - - set_polygons = set_lane_group_rows.geometry.tolist() + connected_lane_group_rows.geometry.tolist() - - # Get 2D road edge linestrings for the non-conflicting set - set_road_edges_2d = get_road_edge_linear_rings(set_polygons) - - # Collect 3D boundaries of non-conflicting lane groups - set_boundaries_3d: List[LineString] = [] - for lane_group_id in non_conflicting_set: - lane_group_helper = lane_group_df[lane_group_df.id == lane_group_id] - set_boundaries_3d.append(lane_group_helper.left_boundary.values[0]) - set_boundaries_3d.append(lane_group_helper.right_boundary.values[0]) - - # Lift road edges to 3D using the boundaries of non-conflicting lane groups - lifted_road_edges_3d = lift_road_edges_to_3d(set_road_edges_2d, set_boundaries_3d) - road_edges_3d.extend(lifted_road_edges_3d) - - return road_edges_3d - - -def _create_non_conflicting_sets(conflicts: Dict[int, List[int]]) -> List[Set[int]]: - """ - Creates sets of non-conflicting indices using NetworkX. - """ - # Create graph from conflicts - G = nx.Graph() - for idx, conflict_list in conflicts.items(): - for conflict_idx in conflict_list: - G.add_edge(idx, conflict_idx) - - result = [] - - # Process each connected component - for component in nx.connected_components(G): - subgraph = G.subgraph(component) - - # Try bipartite coloring first (most common case) - if nx.is_bipartite(subgraph): - sets = nx.bipartite.sets(subgraph) - result.extend([set(s) for s in sets]) - else: - # Fall back to greedy coloring for non-bipartite graphs - coloring = nx.greedy_color(subgraph, strategy="largest_first") - color_groups = {} - for node, color in coloring.items(): - if color not in color_groups: - color_groups[color] = set() - color_groups[color].add(node) - result.extend(color_groups.values()) - - return result diff --git a/d123/dataset/dataset_specific/av2/av2_constants.py b/d123/dataset/dataset_specific/av2/av2_constants.py deleted file mode 100644 index 836bb12c..00000000 --- a/d123/dataset/dataset_specific/av2/av2_constants.py +++ /dev/null @@ -1,107 +0,0 @@ -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraType -from d123.common.utils.enums import SerialIntEnum -from d123.dataset.maps.map_datatypes import RoadLineType - - -class AV2SensorBoxDetectionType(SerialIntEnum): - """Sensor dataset annotation categories.""" - - ANIMAL = 1 - ARTICULATED_BUS = 2 - BICYCLE = 3 - BICYCLIST = 4 - BOLLARD = 5 - BOX_TRUCK = 6 - BUS = 7 - CONSTRUCTION_BARREL = 8 - CONSTRUCTION_CONE = 9 - DOG = 10 - LARGE_VEHICLE = 11 - MESSAGE_BOARD_TRAILER = 12 - MOBILE_PEDESTRIAN_CROSSING_SIGN = 13 - MOTORCYCLE = 14 - MOTORCYCLIST = 15 - OFFICIAL_SIGNALER = 16 - PEDESTRIAN = 17 - RAILED_VEHICLE = 18 - REGULAR_VEHICLE = 19 - SCHOOL_BUS = 20 - SIGN = 21 - STOP_SIGN = 22 - STROLLER = 23 - TRAFFIC_LIGHT_TRAILER = 24 - TRUCK = 25 - TRUCK_CAB = 26 - VEHICULAR_TRAILER = 27 - WHEELCHAIR = 28 - WHEELED_DEVICE = 29 - WHEELED_RIDER = 30 - - -# Mapping from AV2SensorBoxDetectionType to general DetectionType -# TODO: Change the detection types. Multiple mistakes, e.g. animals/dogs are not generic objects. -AV2_TO_DETECTION_TYPE = { - AV2SensorBoxDetectionType.ANIMAL: DetectionType.GENERIC_OBJECT, - AV2SensorBoxDetectionType.ARTICULATED_BUS: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.BICYCLE: DetectionType.BICYCLE, - AV2SensorBoxDetectionType.BICYCLIST: DetectionType.PEDESTRIAN, - AV2SensorBoxDetectionType.BOLLARD: DetectionType.BARRIER, - AV2SensorBoxDetectionType.BOX_TRUCK: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.BUS: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.CONSTRUCTION_BARREL: DetectionType.BARRIER, - AV2SensorBoxDetectionType.CONSTRUCTION_CONE: DetectionType.TRAFFIC_CONE, - AV2SensorBoxDetectionType.DOG: DetectionType.GENERIC_OBJECT, - AV2SensorBoxDetectionType.LARGE_VEHICLE: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.MESSAGE_BOARD_TRAILER: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.MOBILE_PEDESTRIAN_CROSSING_SIGN: DetectionType.CZONE_SIGN, - AV2SensorBoxDetectionType.MOTORCYCLE: DetectionType.BICYCLE, - AV2SensorBoxDetectionType.MOTORCYCLIST: DetectionType.BICYCLE, - AV2SensorBoxDetectionType.OFFICIAL_SIGNALER: DetectionType.PEDESTRIAN, - AV2SensorBoxDetectionType.PEDESTRIAN: DetectionType.PEDESTRIAN, - AV2SensorBoxDetectionType.RAILED_VEHICLE: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.REGULAR_VEHICLE: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.SCHOOL_BUS: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.SIGN: DetectionType.SIGN, - AV2SensorBoxDetectionType.STOP_SIGN: DetectionType.SIGN, - AV2SensorBoxDetectionType.STROLLER: DetectionType.PEDESTRIAN, - AV2SensorBoxDetectionType.TRAFFIC_LIGHT_TRAILER: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.TRUCK: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.TRUCK_CAB: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.VEHICULAR_TRAILER: DetectionType.VEHICLE, - AV2SensorBoxDetectionType.WHEELCHAIR: DetectionType.PEDESTRIAN, - AV2SensorBoxDetectionType.WHEELED_DEVICE: DetectionType.GENERIC_OBJECT, - AV2SensorBoxDetectionType.WHEELED_RIDER: DetectionType.BICYCLE, -} - - -AV2_CAMERA_TYPE_MAPPING = { - "ring_front_center": CameraType.CAM_F0, - "ring_front_left": CameraType.CAM_L0, - "ring_front_right": CameraType.CAM_R0, - "ring_side_left": CameraType.CAM_L1, - "ring_side_right": CameraType.CAM_R1, - "ring_rear_left": CameraType.CAM_L2, - "ring_rear_right": CameraType.CAM_R2, - "stereo_front_left": CameraType.CAM_STEREO_L, - "stereo_front_right": CameraType.CAM_STEREO_R, -} - - -AV2_ROAD_LINE_TYPE_MAPPING = { - "NONE": RoadLineType.NONE, - "UNKNOWN": RoadLineType.UNKNOWN, - "DASH_SOLID_YELLOW": RoadLineType.DASH_SOLID_YELLOW, - "DASH_SOLID_WHITE": RoadLineType.DASH_SOLID_WHITE, - "DASHED_WHITE": RoadLineType.DASHED_WHITE, - "DASHED_YELLOW": RoadLineType.DASHED_YELLOW, - "DOUBLE_SOLID_YELLOW": RoadLineType.DOUBLE_SOLID_YELLOW, - "DOUBLE_SOLID_WHITE": RoadLineType.DOUBLE_SOLID_WHITE, - "DOUBLE_DASH_YELLOW": RoadLineType.DOUBLE_DASH_YELLOW, - "DOUBLE_DASH_WHITE": RoadLineType.DOUBLE_DASH_WHITE, - "SOLID_YELLOW": RoadLineType.SOLID_YELLOW, - "SOLID_WHITE": RoadLineType.SOLID_WHITE, - "SOLID_DASH_WHITE": RoadLineType.SOLID_DASH_WHITE, - "SOLID_DASH_YELLOW": RoadLineType.SOLID_DASH_YELLOW, - "SOLID_BLUE": RoadLineType.SOLID_BLUE, -} diff --git a/d123/dataset/dataset_specific/av2/av2_data_converter.py b/d123/dataset/dataset_specific/av2/av2_data_converter.py deleted file mode 100644 index 0ff84a7e..00000000 --- a/d123/dataset/dataset_specific/av2/av2_data_converter.py +++ /dev/null @@ -1,551 +0,0 @@ -import gc -import hashlib -import json -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple, Union - -import numpy as np -import pandas as pd -import pyarrow as pa -from pyquaternion import Quaternion - -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import ( - get_av2_ford_fusion_hybrid_parameters, - rear_axle_se3_to_center_se3, -) -from d123.common.geometry.base import StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index -from d123.common.geometry.constants import DEFAULT_PITCH, DEFAULT_ROLL -from d123.common.geometry.transform.se3 import convert_relative_to_absolute_se3_array, get_rotation_matrix -from d123.common.geometry.vector import Vector3D, Vector3DIndex -from d123.common.multithreading.worker_utils import WorkerPool, worker_map -from d123.dataset.dataset_specific.av2.av2_constants import ( - AV2_CAMERA_TYPE_MAPPING, - AV2_TO_DETECTION_TYPE, - AV2SensorBoxDetectionType, -) -from d123.dataset.dataset_specific.av2.av2_helper import ( - build_sensor_dataframe, - build_synchronization_dataframe, - find_closest_target_fpath, - get_slice_with_timestamp_ns, -) -from d123.dataset.dataset_specific.av2.av2_map_conversion import convert_av2_map -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.dataset.logs.log_metadata import LogMetadata - - -def create_token(input_data: str) -> str: - # TODO: Refactor this function. - # TODO: Add a general function to create tokens from arbitrary data. - if isinstance(input_data, str): - input_data = input_data.encode("utf-8") - - hash_obj = hashlib.sha256(input_data) - return hash_obj.hexdigest()[:16] - - -class AV2SensorDataConverter(RawDataConverter): - def __init__( - self, - splits: List[str], - log_path: Union[Path, str], - data_converter_config: DataConverterConfig, - ) -> None: - super().__init__(data_converter_config) - for split in splits: - assert ( - split in self.get_available_splits() - ), f"Split {split} is not available. Available splits: {self.available_splits}" - - self._splits: List[str] = splits - self._data_root: Path = Path(log_path) - self._log_paths_per_split: Dict[str, List[Path]] = self._collect_log_paths() - self._target_dt: float = 0.1 - - def _collect_log_paths(self) -> Dict[str, List[Path]]: - log_paths_per_split: Dict[str, List[Path]] = {} - - for split in self._splits: - subsplit = split.split("_")[-1] - assert subsplit in ["train", "val", "test"] - - if "av2_sensor" in split: - log_folder = self._data_root / "sensor" / subsplit - elif "av2_lidar" in split: - log_folder = self._data_root / "lidar" / subsplit - elif "av2_motion" in split: - log_folder = self._data_root / "motion-forecasting" / subsplit - elif "av2-sensor-mini" in split: - log_folder = self._data_root / "sensor_mini" / subsplit - - log_paths_per_split[split] = list(log_folder.iterdir()) - - return log_paths_per_split - - def get_available_splits(self) -> List[str]: - return [ - "av2-sensor_train", - "av2-sensor_val", - "av2-sensor_test", - "av2-sensor-mini_train", - "av2-sensor-mini_val", - "av2-sensor-mini_test", - ] - - def convert_maps(self, worker: WorkerPool) -> None: - log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - worker_map( - worker, - partial(convert_av2_map_to_gpkg, data_converter_config=self.data_converter_config), - log_args, - ) - - def convert_logs(self, worker: WorkerPool) -> None: - log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - - worker_map( - worker, - partial( - convert_av2_log_to_arrow, - data_converter_config=self.data_converter_config, - ), - log_args, - ) - - -def convert_av2_map_to_gpkg( - args: List[Dict[str, Union[List[str], List[Path]]]], - data_converter_config: DataConverterConfig, -) -> List[Any]: - for log_info in args: - source_log_path: Path = log_info["log_path"] - split: str = log_info["split"] - - source_log_name = source_log_path.name - - map_path = data_converter_config.output_path / "maps" / split / f"{source_log_name}.gpkg" - if data_converter_config.force_map_conversion or not map_path.exists(): - map_path.unlink(missing_ok=True) - convert_av2_map(source_log_path, map_path) - return [] - - -def convert_av2_log_to_arrow( - args: List[Dict[str, Union[List[str], List[Path]]]], - data_converter_config: DataConverterConfig, -) -> List[Any]: - for log_info in args: - log_path: Path = log_info["log_path"] - split: str = log_info["split"] - - if not log_path.exists(): - raise FileNotFoundError(f"Log path {log_path} does not exist.") - - log_file_path = data_converter_config.output_path / split / f"{log_path.stem}.arrow" - - if data_converter_config.force_log_conversion or not log_file_path.exists(): - log_file_path.unlink(missing_ok=True) - if not log_file_path.parent.exists(): - log_file_path.parent.mkdir(parents=True, exist_ok=True) - - sensor_df = build_sensor_dataframe(log_path) - synchronization_df = build_synchronization_dataframe(sensor_df) - - metadata = LogMetadata( - dataset="av2-sensor", - log_name=log_path.name, - location=None, - timestep_seconds=0.1, # TODO: verify this - map_has_z=True, - ) - vehicle_parameters = get_av2_ford_fusion_hybrid_parameters() # TODO: Add av2 vehicle parameters - camera_metadata = get_av2_camera_metadata(log_path) - lidar_metadata = get_av2_lidar_metadata(log_path) - - schema_column_list = [ - ("token", pa.string()), - ("timestamp", pa.int64()), - ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), - ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), - ("detections_token", pa.list_(pa.string())), - ("detections_type", pa.list_(pa.int16())), - ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), - ("traffic_light_ids", pa.list_(pa.int64())), - ("traffic_light_types", pa.list_(pa.int16())), - ("scenario_tag", pa.list_(pa.string())), - ("route_lane_group_ids", pa.list_(pa.int64())), - ] - if data_converter_config.lidar_store_option is not None: - for lidar_type in lidar_metadata.keys(): - if data_converter_config.lidar_store_option == "path": - schema_column_list.append((lidar_type.serialize(), pa.string())) - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") - - if data_converter_config.camera_store_option is not None: - for camera_type in camera_metadata.keys(): - if data_converter_config.camera_store_option == "path": - schema_column_list.append((camera_type.serialize(), pa.string())) - - elif data_converter_config.camera_store_option == "binary": - schema_column_list.append((camera_type.serialize(), pa.binary())) - - schema_column_list.append((f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 4 * 4))) - - recording_schema = pa.schema(schema_column_list) - recording_schema = recording_schema.with_metadata( - { - "log_metadata": json.dumps(asdict(metadata)), - "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), - "camera_metadata": camera_metadata_dict_to_json(camera_metadata), - "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), - } - ) - - _write_recording_table( - sensor_df, - synchronization_df, - recording_schema, - log_file_path, - log_path, - data_converter_config, - ) - del recording_schema, vehicle_parameters - gc.collect() - return [] - - -def get_av2_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: - - intrinsics_file = log_path / "calibration" / "intrinsics.feather" - intrinsics_df = pd.read_feather(intrinsics_file) - - camera_metadata: Dict[CameraType, CameraMetadata] = {} - for _, row in intrinsics_df.iterrows(): - row = row.to_dict() - - camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] - camera_metadata[camera_type] = CameraMetadata( - camera_type=camera_type, - width=row["width_px"], - height=row["height_px"], - intrinsic=np.array( - [[row["fx_px"], 0, row["cx_px"]], [0, row["fy_px"], row["cy_px"]], [0, 0, 1]], dtype=np.float64 - ), - distortion=np.array([row["k1"], row["k2"], row["k3"], 0, 0], dtype=np.float64), - ) - - return camera_metadata - - -def get_av2_lidar_metadata(log_path: Path) -> Dict[LiDARType, LiDARMetadata]: - # metadata: Dict[LiDARType, LiDARMetadata] = {} - # metadata[LiDARType.LIDAR_MERGED] = LiDARMetadata( - # lidar_type=LiDARType.LIDAR_MERGED, - # lidar_index=NuplanLidarIndex, - # extrinsic=None, # NOTE: LiDAR extrinsic are unknown - # ) - # return metadata - return {} - - -def _write_recording_table( - sensor_df: pd.DataFrame, - synchronization_df: pd.DataFrame, - recording_schema: pa.schema, - log_file_path: Path, - source_log_path: Path, - data_converter_config: DataConverterConfig, -) -> None: - - # NOTE: Similar to other datasets, we use the lidar timestamps as reference timestamps. - lidar_sensor = sensor_df.xs(key="lidar", level=2) - lidar_timestamps_ns = np.sort([int(idx_tuple[2]) for idx_tuple in lidar_sensor.index]) - - # NOTE: The annotation dataframe is not available for the test split. - annotations_df = ( - pd.read_feather(source_log_path / "annotations.feather") - if (source_log_path / "annotations.feather").exists() - else None - ) - - city_se3_egovehicle_df = pd.read_feather(source_log_path / "city_SE3_egovehicle.feather") - - egovehicle_se3_sensor_df = ( - pd.read_feather(source_log_path / "calibration" / "egovehicle_SE3_sensor.feather") - if data_converter_config.camera_store_option is not None - else None - ) - - # with pa.ipc.new_stream(str(log_file_path), recording_schema) as writer: - with pa.OSFile(str(log_file_path), "wb") as sink: - with pa.ipc.new_file(sink, recording_schema) as writer: - - for lidar_timestamp_ns in lidar_timestamps_ns: - - ego_state_se3 = _extract_ego_state(city_se3_egovehicle_df, lidar_timestamp_ns) - ( - detections_state, - detections_velocity, - detections_token, - detections_types, - ) = _extract_box_detections(annotations_df, lidar_timestamp_ns, ego_state_se3) - traffic_light_ids, traffic_light_types = _extract_traffic_lights() - route_lane_group_ids = None # TODO: Add route lane group ids extraction ? - row_data = { - "token": [create_token(str(lidar_timestamp_ns))], - "timestamp": [TimePoint.from_ns(int(lidar_timestamp_ns)).time_us], - "detections_state": [detections_state], - "detections_velocity": [detections_velocity], - "detections_token": [detections_token], - "detections_type": [detections_types], - "ego_states": [ego_state_se3.array.tolist()], - "traffic_light_ids": [traffic_light_ids], - "traffic_light_types": [traffic_light_types], - "scenario_tag": [_extract_scenario_tag()], - "route_lane_group_ids": [route_lane_group_ids], - } - - # TODO: add lidar data - - # if data_converter_config.lidar_store_option is not None: - # lidar_data_dict = _extract_lidar(lidar_pc, data_converter_config) - # for lidar_type, lidar_data in lidar_data_dict.items(): - # if lidar_data is not None: - # row_data[lidar_type.serialize()] = [lidar_data] - # else: - # row_data[lidar_type.serialize()] = [None] - - if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_camera( - lidar_timestamp_ns, - city_se3_egovehicle_df, - egovehicle_se3_sensor_df, - ego_state_se3, - synchronization_df, - source_log_path, - data_converter_config, - ) - for camera_type, camera_data in camera_data_dict.items(): - if camera_data is not None: - row_data[camera_type.serialize()] = [camera_data[0]] - row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] - else: - row_data[camera_type.serialize()] = [None] - row_data[f"{camera_type.serialize()}_extrinsic"] = [None] - - batch = pa.record_batch(row_data, schema=recording_schema) - writer.write_batch(batch) - del batch, row_data, detections_state, detections_velocity, detections_token, detections_types - - -def _extract_box_detections( - annotations_df: Optional[pd.DataFrame], - lidar_timestamp_ns: int, - ego_state_se3: EgoStateSE3, -) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: - - # TODO: Extract velocity from annotations_df if available. - - if annotations_df is None: - return [], [], [], [] - - annotations_slice = get_slice_with_timestamp_ns(annotations_df, lidar_timestamp_ns) - num_detections = len(annotations_slice) - - detections_state = np.zeros((num_detections, len(BoundingBoxSE3Index)), dtype=np.float64) - detections_velocity = np.zeros((num_detections, len(Vector3DIndex)), dtype=np.float64) - detections_token: List[str] = annotations_slice["track_uuid"].tolist() - detections_types: List[int] = [] - - for detection_idx, (_, row) in enumerate(annotations_slice.iterrows()): - row = row.to_dict() - yaw, pitch, roll = Quaternion( - w=row["qw"], - x=row["qx"], - y=row["qy"], - z=row["qz"], - ).yaw_pitch_roll - - detections_state[detection_idx, BoundingBoxSE3Index.X] = row["tx_m"] - detections_state[detection_idx, BoundingBoxSE3Index.Y] = row["ty_m"] - detections_state[detection_idx, BoundingBoxSE3Index.Z] = row["tz_m"] - detections_state[detection_idx, BoundingBoxSE3Index.ROLL] = roll - detections_state[detection_idx, BoundingBoxSE3Index.PITCH] = pitch - detections_state[detection_idx, BoundingBoxSE3Index.YAW] = yaw - detections_state[detection_idx, BoundingBoxSE3Index.LENGTH] = row["length_m"] - detections_state[detection_idx, BoundingBoxSE3Index.WIDTH] = row["width_m"] - detections_state[detection_idx, BoundingBoxSE3Index.HEIGHT] = row["height_m"] - - av2_detection_type = AV2SensorBoxDetectionType.deserialize(row["category"]) - detections_types.append(int(AV2_TO_DETECTION_TYPE[av2_detection_type])) - - detections_state[:, BoundingBoxSE3Index.STATE_SE3] = convert_relative_to_absolute_se3_array( - origin=ego_state_se3.rear_axle_se3, se3_array=detections_state[:, BoundingBoxSE3Index.STATE_SE3] - ) - - ZERO_BOX_ROLL_PITCH = False # TODO: Add config option or remove - if ZERO_BOX_ROLL_PITCH: - detections_state[:, BoundingBoxSE3Index.ROLL] = DEFAULT_ROLL - detections_state[:, BoundingBoxSE3Index.PITCH] = DEFAULT_PITCH - - return detections_state.tolist(), detections_velocity.tolist(), detections_token, detections_types - - -def _extract_ego_state(city_se3_egovehicle_df: pd.DataFrame, lidar_timestamp_ns: int) -> EgoStateSE3: - ego_state_slice = get_slice_with_timestamp_ns(city_se3_egovehicle_df, lidar_timestamp_ns) - assert ( - len(ego_state_slice) == 1 - ), f"Expected exactly one ego state for timestamp {lidar_timestamp_ns}, got {len(ego_state_slice)}." - - ego_pose_dict = ego_state_slice.iloc[0].to_dict() - - ego_pose_quat = Quaternion( - w=ego_pose_dict["qw"], - x=ego_pose_dict["qx"], - y=ego_pose_dict["qy"], - z=ego_pose_dict["qz"], - ) - - yaw, pitch, roll = ego_pose_quat.yaw_pitch_roll - - rear_axle_pose = StateSE3( - x=ego_pose_dict["tx_m"], - y=ego_pose_dict["ty_m"], - z=ego_pose_dict["tz_m"], - roll=roll, - pitch=pitch, - yaw=yaw, - ) - vehicle_parameters = get_av2_ford_fusion_hybrid_parameters() # TODO: Add av2 vehicle parameters - center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) - # TODO: Add script to calculate the dynamic state from log sequence. - dynamic_state = DynamicStateSE3( - velocity=Vector3D( - x=0.0, - y=0.0, - z=0.0, - ), - acceleration=Vector3D( - x=0.0, - y=0.0, - z=0.0, - ), - angular_velocity=Vector3D( - x=0.0, - y=0.0, - z=0.0, - ), - ) - - return EgoStateSE3( - center_se3=center, - dynamic_state_se3=dynamic_state, - vehicle_parameters=vehicle_parameters, - timepoint=None, - ) - - -def _extract_traffic_lights() -> Tuple[List[int], List[int]]: - return [], [] - - -def _extract_scenario_tag() -> List[str]: - return ["unknown"] - - -def _extract_camera( - lidar_timestamp_ns: int, - city_se3_egovehicle_df: pd.DataFrame, - egovehicle_se3_sensor_df: pd.DataFrame, - ego_state_se3: EgoStateSE3, - synchronization_df: pd.DataFrame, - source_log_path: Path, - data_converter_config: DataConverterConfig, -) -> Dict[CameraType, Union[str, bytes]]: - - camera_dict: Dict[CameraType, Union[str, bytes]] = { - camera_type: None for camera_type in AV2_CAMERA_TYPE_MAPPING.values() - } - split = source_log_path.parent.name - log_id = source_log_path.name - - source_dataset_dir = source_log_path.parent.parent - - rear_axle_se3 = ego_state_se3.rear_axle_se3 - ego_transform = np.zeros((4, 4), dtype=np.float64) - ego_transform[:3, :3] = get_rotation_matrix(ego_state_se3.rear_axle_se3) - ego_transform[:3, 3] = rear_axle_se3.point_3d.array - - for _, row in egovehicle_se3_sensor_df.iterrows(): - row = row.to_dict() - if row["sensor_name"] not in AV2_CAMERA_TYPE_MAPPING: - continue - - camera_name = row["sensor_name"] - camera_type = AV2_CAMERA_TYPE_MAPPING[camera_name] - - relative_image_path = find_closest_target_fpath( - split=split, - log_id=log_id, - src_sensor_name="lidar", - src_timestamp_ns=lidar_timestamp_ns, - target_sensor_name=camera_name, - synchronization_df=synchronization_df, - ) - if relative_image_path is None: - camera_dict[camera_type] = None - else: - absolute_image_path = source_dataset_dir / relative_image_path - assert absolute_image_path.exists() - # TODO: Adjust for finer IMU timestamps to correct the camera extrinsic. - camera_extrinsic = np.eye(4, dtype=np.float64) - camera_extrinsic[:3, :3] = Quaternion( - w=row["qw"], - x=row["qx"], - y=row["qy"], - z=row["qz"], - ).rotation_matrix - camera_extrinsic[:3, 3] = np.array([row["tx_m"], row["ty_m"], row["tz_m"]], dtype=np.float64) - # camera_extrinsic = camera_extrinsic @ ego_transform - camera_extrinsic = camera_extrinsic.flatten().tolist() - - if data_converter_config.camera_store_option == "path": - camera_dict[camera_type] = (str(relative_image_path), camera_extrinsic) - elif data_converter_config.camera_store_option == "binary": - with open(absolute_image_path, "rb") as f: - camera_dict[camera_type] = (f.read(), camera_extrinsic) - - return camera_dict - - -def _extract_lidar(lidar_pc, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: - - # lidar: Optional[str] = None - # lidar_full_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" / lidar_pc.filename - # if lidar_full_path.exists(): - # lidar = lidar_pc.filename - - # return {LiDARType.LIDAR_MERGED: lidar} - return {} diff --git a/d123/dataset/dataset_specific/carla/carla_data_converter.py b/d123/dataset/dataset_specific/carla/carla_data_converter.py deleted file mode 100644 index f598bb1e..00000000 --- a/d123/dataset/dataset_specific/carla/carla_data_converter.py +++ /dev/null @@ -1,455 +0,0 @@ -import gc -import gzip -import hashlib -import json -import os -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, Final, List, Optional, Tuple, Union - -import numpy as np -import pyarrow as pa - -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import CarlaLidarIndex -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import get_carla_lincoln_mkz_2020_parameters -from d123.common.geometry.base import Point2D, Point3D -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index -from d123.common.geometry.vector import Vector3DIndex -from d123.common.multithreading.worker_utils import WorkerPool, worker_map -from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table -from d123.dataset.conversion.map.opendrive.opendrive_map_conversion import convert_from_xodr -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.maps.abstract_map import AbstractMap, MapLayer -from d123.dataset.maps.abstract_map_objects import AbstractLane -from d123.dataset.scene.arrow_scene import get_map_api_from_names - -AVAILABLE_CARLA_MAP_LOCATIONS: Final[List[str]] = [ - "Town01", # A small, simple town with a river and several bridges. - "Town02", # A small simple town with a mixture of residential and commercial buildings. - "Town03", # A larger, urban map with a roundabout and large junctions. - "Town04", # A small town embedded in the mountains with a special "figure of 8" infinite highway. - "Town05", # Squared-grid town with cross junctions and a bridge. It has multiple lanes per direction. Useful to perform lane changes. - "Town06", # Long many lane highways with many highway entrances and exits. It also has a Michigan left. - "Town07", # A rural environment with narrow roads, corn, barns and hardly any traffic lights. - "Town10HD", # A downtown urban environment with skyscrapers, residential buildings and an ocean promenade. - "Town11", # A Large Map that is undecorated. Serves as a proof of concept for the Large Maps feature. - "Town12", # A Large Map with numerous different regions, including high-rise, residential and rural environments. - "Town13", # ??? - "Town15", # ??? -] - -CARLA_DT: Final[float] = 0.1 # [s] -TRAFFIC_LIGHT_ASSIGNMENT_DISTANCE: Final[float] = 1.0 # [m] -SORT_BY_TIMESTAMP: Final[bool] = True - -CARLA_CAMERA_TYPES = {CameraType.CAM_F0} - -CARLA_DATA_ROOT: Final[Path] = Path(os.environ["CARLA_DATA_ROOT"]) - - -# TODO: Refactor this files and convert coordinate systems more elegantly. -# NOTE: Currently some coordinate transforms from Unreal to ISO 8855 are done in the data agent of PDM-Lite. -# Ideally a general function to transform poses and points between coordinate systems would be nice - - -def _load_json_gz(path: Path) -> Dict: - """Helper function to load a gzipped JSON file.""" - with gzip.open(path, "rt") as f: - data = json.load(f) - return data - - -def create_token(input_data: str) -> str: - # TODO: Refactor this function. - # TODO: Add a general function to create tokens from arbitrary data. - if isinstance(input_data, str): - input_data = input_data.encode("utf-8") - - hash_obj = hashlib.sha256(input_data) - return hash_obj.hexdigest()[:16] - - -class CarlaDataConverter(RawDataConverter): - - def __init__( - self, - splits: List[str], - log_path: Union[Path, str], - data_converter_config: DataConverterConfig, - ) -> None: - super().__init__(data_converter_config) - for split in splits: - assert ( - split in self.get_available_splits() - ), f"Split {split} is not available. Available splits: {self.available_splits}" - - self._splits: str = splits - self._log_path: Path = Path(log_path) - self._log_paths_per_split: Dict[str, List[Path]] = self._collect_log_paths() - - def _collect_log_paths(self) -> Dict[str, List[Path]]: - # TODO: fix "carla" split placeholder and add support for other splits - log_paths_per_split: Dict[str, List[Path]] = {} - log_paths = [ - log_path for log_path in self._log_path.iterdir() if log_path.is_dir() and log_path.stem != "sensor_blobs" - ] - log_paths_per_split["carla"] = log_paths - return log_paths_per_split - - def get_available_splits(self) -> List[str]: - """Returns a list of available raw data types.""" - return ["carla"] # TODO: fix the placeholder - - def convert_maps(self, worker: WorkerPool) -> None: - worker_map( - worker, - partial( - convert_carla_map_to_gpkg, - data_converter_config=self.data_converter_config, - ), - list(AVAILABLE_CARLA_MAP_LOCATIONS), - ) - - def convert_logs(self, worker: WorkerPool) -> None: - - log_args = [ - {"log_path": log_path, "split": split} - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - - worker_map( - worker, partial(convert_carla_log_to_arrow, data_converter_config=self.data_converter_config), log_args - ) - - -def convert_carla_map_to_gpkg(map_names: List[str], data_converter_config: DataConverterConfig) -> List[Any]: - - # TODO: add to config - _interpolation_step_size = 0.5 # [m] - _connection_distance_threshold = 0.1 # [m] - for map_name in map_names: - map_path = data_converter_config.output_path / "maps" / f"carla_{map_name.lower()}.gpkg" - if data_converter_config.force_map_conversion or not map_path.exists(): - map_path.unlink(missing_ok=True) - assert os.environ["CARLA_ROOT"] is not None - CARLA_ROOT = Path(os.environ["CARLA_ROOT"]) - - if map_name not in ["Town11", "Town12", "Town13", "Town15"]: - carla_maps_root = CARLA_ROOT / "CarlaUE4" / "Content" / "Carla" / "Maps" / "OpenDrive" - carla_map_path = carla_maps_root / f"{map_name}.xodr" - else: - carla_map_path = ( - CARLA_ROOT / "CarlaUE4" / "Content" / "Carla" / "Maps" / map_name / "OpenDrive" / f"{map_name}.xodr" - ) - - convert_from_xodr( - carla_map_path, - f"carla_{map_name.lower()}", - _interpolation_step_size, - _connection_distance_threshold, - ) - return [] - - -def convert_carla_log_to_arrow( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - def convert_log_internal(args: List[Dict[str, Union[List[str], List[Path]]]]) -> None: - for log_info in args: - log_path: Path = log_info["log_path"] - split: str = log_info["split"] - output_path: Path = data_converter_config.output_path - - log_file_path = output_path / split / f"{log_path.stem}.arrow" - - if data_converter_config.force_log_conversion or not log_file_path.exists(): - log_file_path.unlink(missing_ok=True) - if not log_file_path.parent.exists(): - log_file_path.parent.mkdir(parents=True, exist_ok=True) - - bounding_box_paths = sorted([bb_path for bb_path in (log_path / "boxes").iterdir()]) - first_log_dict = _load_json_gz(bounding_box_paths[0]) - map_name = first_log_dict["location"] - map_api = get_map_api_from_names("carla", map_name) - - metadata = _get_metadata(map_name, str(log_path.stem)) - vehicle_parameters = get_carla_lincoln_mkz_2020_parameters() - camera_metadata = get_carla_camera_metadata(first_log_dict) - lidar_metadata = get_carla_lidar_metadata(first_log_dict) - - schema_column_list = [ - ("token", pa.string()), - ("timestamp", pa.int64()), - ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), - ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), - ("detections_token", pa.list_(pa.string())), - ("detections_type", pa.list_(pa.int16())), - ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), - ("traffic_light_ids", pa.list_(pa.int64())), - ("traffic_light_types", pa.list_(pa.int16())), - ("scenario_tag", pa.list_(pa.string())), - ("route_lane_group_ids", pa.list_(pa.int64())), - ] - if data_converter_config.lidar_store_option is not None: - for lidar_type in lidar_metadata.keys(): - if data_converter_config.lidar_store_option == "path": - schema_column_list.append((lidar_type.serialize(), pa.string())) - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") - - # TODO: Adjust how cameras are added - if data_converter_config.camera_store_option is not None: - for camera_type in camera_metadata.keys(): - if data_converter_config.camera_store_option == "path": - schema_column_list.append((camera_type.serialize(), pa.string())) - schema_column_list.append( - (f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 16)) - ) - elif data_converter_config.camera_store_option == "binary": - raise NotImplementedError("Binary camera storage is not implemented.") - - recording_schema = pa.schema(schema_column_list) - recording_schema = recording_schema.with_metadata( - { - "log_metadata": json.dumps(asdict(metadata)), - "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), - "camera_metadata": camera_metadata_dict_to_json(camera_metadata), - "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), - } - ) - - _write_recording_table( - bounding_box_paths, - map_api, - recording_schema, - log_file_path, - data_converter_config, - ) - - gc.collect() - - convert_log_internal(args) - gc.collect() - return [] - - -def _get_metadata(location: str, log_name: str) -> LogMetadata: - return LogMetadata( - dataset="carla", - log_name=log_name, - location=location, - timestep_seconds=CARLA_DT, - map_has_z=True, - ) - - -def get_carla_camera_metadata(first_log_dict: Dict[str, Any]) -> Dict[CameraType, CameraMetadata]: - - # FIXME: This is a placeholder function to return camera metadata. - - intrinsic = np.array( - first_log_dict[f"{CameraType.CAM_F0.serialize()}_intrinsics"], - dtype=np.float64, - ) - camera_metadata = { - CameraType.CAM_F0: CameraMetadata( - camera_type=CameraType.CAM_F0, - width=1024, - height=512, - intrinsic=intrinsic, - distortion=np.zeros((5,), dtype=np.float64), - ) - } - return camera_metadata - - -def get_carla_lidar_metadata(first_log_dict: Dict[str, Any]) -> Dict[LiDARType, LiDARMetadata]: - - # TODO: add lidar extrinsic - lidar_metadata = { - LiDARType.LIDAR_TOP: LiDARMetadata( - lidar_type=LiDARType.LIDAR_TOP, - lidar_index=CarlaLidarIndex, - extrinsic=None, - ) - } - return lidar_metadata - - -def _write_recording_table( - bounding_box_paths: List[Path], - map_api: AbstractMap, - recording_schema: pa.Schema, - log_file_path: Path, - data_converter_config: DataConverterConfig, -) -> pa.Table: - # TODO: Refactor this function to be more readable - log_name = str(bounding_box_paths[0].parent.parent.stem) - - with pa.OSFile(str(log_file_path), "wb") as sink: - with pa.ipc.new_file(sink, recording_schema) as writer: - for box_path in bounding_box_paths: - sample_name = box_path.stem.split(".")[0] - - data = _load_json_gz(box_path) - traffic_light_ids, traffic_light_types = _extract_traffic_light_data( - data["traffic_light_states"], data["traffic_light_positions"], map_api - ) - route_lane_group_ids = _extract_route_lane_group_ids(data["route"], map_api) if "route" in data else [] - - row_data = { - "token": [create_token(f"{log_name}_{box_path.stem}")], - "timestamp": [data["timestamp"]], - "detections_state": [_extract_detection_states(data["detections_state"])], - "detections_velocity": [ - ( - data["detections_velocity"] - if "detections_velocity" in data - else np.zeros((len(data["detections_types"]), 3)).tolist() - ) - ], - "detections_token": [data["detections_token"]], - "detections_type": [data["detections_types"]], - "ego_states": [_extract_ego_vehicle_state(data["ego_state"])], - "traffic_light_ids": [traffic_light_ids], - "traffic_light_types": [traffic_light_types], - "scenario_tag": [data["scenario_tag"]], - "route_lane_group_ids": [route_lane_group_ids], - } - if data_converter_config.lidar_store_option is not None: - lidar_data_dict = _extract_lidar(log_name, sample_name, data_converter_config) - for lidar_type, lidar_data in lidar_data_dict.items(): - row_data[lidar_type.serialize()] = [lidar_data] - - if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_cameras(data, log_name, sample_name, data_converter_config) - for camera_type, camera_data in camera_data_dict.items(): - if camera_data is not None: - row_data[camera_type.serialize()] = [camera_data[0]] - row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] - else: - row_data[camera_type.serialize()] = [None] - - batch = pa.record_batch(row_data, schema=recording_schema) - writer.write_batch(batch) - del batch, row_data - - if SORT_BY_TIMESTAMP: - recording_table = open_arrow_table(log_file_path) - recording_table = recording_table.sort_by([("timestamp", "ascending")]) - write_arrow_table(recording_table, log_file_path) - - -def _extract_ego_vehicle_state(ego_state_list: List[float]) -> List[float]: - # NOTE: This function used to convert coordinate systems, but it is not needed anymore. - assert len(ego_state_list) == len(EgoStateSE3Index), "Ego state list has incorrect length." - return ego_state_list - - -def _extract_detection_states(detection_states: List[List[float]]) -> List[List[float]]: - # NOTE: This function used to convert coordinate systems, but it is not needed anymore. - detection_state_converted = [] - for detection_state in detection_states: - assert len(detection_state) == len(BoundingBoxSE3Index), "Detection state has incorrect length." - detection_state_converted.append(detection_state) - return detection_state_converted - - -def _extract_traffic_light_data( - traffic_light_states: List[int], traffic_light_positions: List[List[float]], map_api: AbstractMap -) -> Tuple[List[int], List[int]]: - traffic_light_types: List[int] = [] - traffic_light_ids: List[int] = [] - for traffic_light_state, traffic_light_waypoints in zip(traffic_light_states, traffic_light_positions): - for traffic_light_waypoint in traffic_light_waypoints: - point_3d = Point3D(*traffic_light_waypoint) - nearby_lanes = map_api.get_proximal_map_objects( - point_3d, TRAFFIC_LIGHT_ASSIGNMENT_DISTANCE, [MapLayer.LANE] - )[MapLayer.LANE] - - for lane in nearby_lanes: - lane: AbstractLane - lane_start_point = lane.centerline.array[0] - distance_to_lane_start = np.linalg.norm(lane_start_point - point_3d.array) - if distance_to_lane_start < TRAFFIC_LIGHT_ASSIGNMENT_DISTANCE: - traffic_light_ids.append(int(lane.id)) - traffic_light_types.append(traffic_light_state) - return traffic_light_ids, traffic_light_types - - -def _extract_route_lane_group_ids(route: List[List[float]], map_api: AbstractMap) -> List[int]: - - # FIXME: Carla route is very buggy. No check if lanes are connected. - route = np.array(route, dtype=np.float64) - route[..., 1] = -route[..., 1] # Unreal coordinate system to ISO 8855 - route = route[::2] - - route_lane_group_ids: List[int] = [] - - for point in route[:200]: - point_2d = Point2D(point[0], point[1]) - nearby_lane_groups = map_api.query(point_2d.shapely_point, [MapLayer.LANE_GROUP], predicate="intersects")[ - MapLayer.LANE_GROUP - ] - if len(nearby_lane_groups) == 0: - continue - elif len(nearby_lane_groups) > 1: - possible_lane_group_ids = [lane_group.id for lane_group in nearby_lane_groups] - if len(route_lane_group_ids) > 0: - prev_lane_group_id = route_lane_group_ids[-1] - if prev_lane_group_id in possible_lane_group_ids: - continue - else: - # TODO: Choose with least heading difference? - route_lane_group_ids.append(int(nearby_lane_groups[0].id)) - else: - # TODO: Choose with least heading difference? - route_lane_group_ids.append(int(nearby_lane_groups[0].id)) - elif len(nearby_lane_groups) == 1: - route_lane_group_ids.append(int(nearby_lane_groups[0].id)) - - return list(dict.fromkeys(route_lane_group_ids)) # Remove duplicates while preserving order - - -def _extract_cameras( - data: Dict[str, Any], log_name: str, sample_name: str, data_converter_config: DataConverterConfig -) -> Dict[CameraType, Optional[str]]: - camera_dict: Dict[str, Union[str, bytes]] = {} - for camera_type in CARLA_CAMERA_TYPES: - camera_full_path = CARLA_DATA_ROOT / "sensor_blobs" / log_name / camera_type.name / f"{sample_name}.jpg" - if camera_full_path.exists(): - if data_converter_config.camera_store_option == "path": - path = f"{log_name}/{camera_type.name}/{sample_name}.jpg" - extrinsics = data.get(f"{camera_type.serialize()}_transform", None) - camera_dict[camera_type] = path, ( - np.array(extrinsics, dtype=np.float64).flatten() if extrinsics is not None else None - ) - - elif data_converter_config.camera_store_option == "binary": - raise NotImplementedError("Binary camera storage is not implemented.") - else: - camera_dict[camera_type] = None - return camera_dict - - -def _extract_lidar( - log_name: str, sample_name: str, data_converter_config: DataConverterConfig -) -> Dict[LiDARType, Optional[str]]: - - lidar: Optional[str] = None - lidar_full_path = CARLA_DATA_ROOT / "sensor_blobs" / log_name / "lidar" / f"{sample_name}.npy" - if lidar_full_path.exists(): - if data_converter_config.lidar_store_option == "path": - lidar = f"{log_name}/lidar/{sample_name}.npy" - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") - else: - raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") - - return {LiDARType.LIDAR_TOP: lidar} if lidar else None diff --git a/d123/dataset/dataset_specific/carla/load_sensor.py b/d123/dataset/dataset_specific/carla/load_sensor.py deleted file mode 100644 index 56ff68fa..00000000 --- a/d123/dataset/dataset_specific/carla/load_sensor.py +++ /dev/null @@ -1,10 +0,0 @@ -from pathlib import Path - -import numpy as np - -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata - - -def load_carla_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: - assert filepath.exists(), f"LiDAR file not found: {filepath}" - return LiDAR(metadata=lidar_metadata, point_cloud=np.load(filepath)) diff --git a/d123/dataset/dataset_specific/nuplan/load_sensor.py b/d123/dataset/dataset_specific/nuplan/load_sensor.py deleted file mode 100644 index 0bbdc406..00000000 --- a/d123/dataset/dataset_specific/nuplan/load_sensor.py +++ /dev/null @@ -1,21 +0,0 @@ -import io -from pathlib import Path - -from nuplan.database.utils.pointclouds.lidar import LidarPointCloud - -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata - - -def load_nuplan_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: - assert filepath.exists(), f"LiDAR file not found: {filepath}" - with open(filepath, "rb") as fp: - buffer = io.BytesIO(fp.read()) - return LiDAR(metadata=lidar_metadata, point_cloud=LidarPointCloud.from_buffer(buffer, "pcd").points) - - -# def load_camera_from_path(filename: str, metadata: CameraMetadata) -> Camera: -# camera_full_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" / filename -# assert camera_full_path.exists(), f"Camera file not found: {camera_full_path}" -# img = Image.open(camera_full_path) -# img.load() -# return Camera(metadata=metadata, image=np.asarray(img, dtype=np.uint8)) diff --git a/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py b/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py deleted file mode 100644 index 23a4d702..00000000 --- a/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py +++ /dev/null @@ -1,500 +0,0 @@ -import gc -import json -import os -import pickle -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, Final, List, Optional, Tuple, Union - -import numpy as np -import pyarrow as pa -import yaml -from nuplan.database.nuplan_db.nuplan_scenario_queries import get_cameras, get_images_from_lidar_tokens -from nuplan.database.nuplan_db_orm.ego_pose import EgoPose -from nuplan.database.nuplan_db_orm.lidar_box import LidarBox -from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc -from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB -from nuplan.planning.simulation.observation.observation_type import CameraChannel -from pyquaternion import Quaternion -from sqlalchemy import func - -import d123.dataset.dataset_specific.nuplan.utils as nuplan_utils -from d123.common.datatypes.detection.detection import TrafficLightStatus -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import NuplanLidarIndex -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import ( - get_nuplan_chrysler_pacifica_parameters, - rear_axle_se3_to_center_se3, -) -from d123.common.geometry.base import StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3, BoundingBoxSE3Index -from d123.common.geometry.constants import DEFAULT_PITCH, DEFAULT_ROLL -from d123.common.geometry.vector import Vector3D, Vector3DIndex -from d123.common.multithreading.worker_utils import WorkerPool, worker_map -from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table -from d123.dataset.dataset_specific.nuplan.nuplan_map_conversion import MAP_LOCATIONS, NuPlanMapConverter -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.dataset.logs.log_metadata import LogMetadata - -TARGET_DT: Final[float] = 0.1 -NUPLAN_DT: Final[float] = 0.05 -SORT_BY_TIMESTAMP: Final[bool] = True - -NUPLAN_TRAFFIC_STATUS_DICT: Final[Dict[str, TrafficLightStatus]] = { - "green": TrafficLightStatus.GREEN, - "red": TrafficLightStatus.RED, - "unknown": TrafficLightStatus.UNKNOWN, -} -NUPLAN_DETECTION_NAME_DICT = { - "vehicle": DetectionType.VEHICLE, - "bicycle": DetectionType.BICYCLE, - "pedestrian": DetectionType.PEDESTRIAN, - "traffic_cone": DetectionType.TRAFFIC_CONE, - "barrier": DetectionType.BARRIER, - "czone_sign": DetectionType.CZONE_SIGN, - "generic_object": DetectionType.GENERIC_OBJECT, -} - -NUPLAN_CAMERA_TYPES = { - CameraType.CAM_F0: CameraChannel.CAM_F0, - CameraType.CAM_B0: CameraChannel.CAM_B0, - CameraType.CAM_L0: CameraChannel.CAM_L0, - CameraType.CAM_L1: CameraChannel.CAM_L1, - CameraType.CAM_L2: CameraChannel.CAM_L2, - CameraType.CAM_R0: CameraChannel.CAM_R0, - CameraType.CAM_R1: CameraChannel.CAM_R1, - CameraType.CAM_R2: CameraChannel.CAM_R2, -} - -NUPLAN_DATA_ROOT = Path(os.environ["NUPLAN_DATA_ROOT"]) -NUPLAN_ROLLING_SHUTTER_S: Final[TimePoint] = TimePoint.from_s(1 / 60) - - -def create_splits_logs() -> Dict[str, List[str]]: - yaml_filepath = Path(nuplan_utils.__path__[0]) / "log_splits.yaml" - with open(yaml_filepath, "r") as stream: - splits = yaml.safe_load(stream) - - return splits["log_splits"] - - -class NuplanDataConverter(RawDataConverter): - def __init__( - self, - splits: List[str], - log_path: Union[Path, str], - data_converter_config: DataConverterConfig, - ) -> None: - super().__init__(data_converter_config) - for split in splits: - assert ( - split in self.get_available_splits() - ), f"Split {split} is not available. Available splits: {self.available_splits}" - - self._splits: List[str] = splits - self._log_path: Path = Path(log_path) - self._log_paths_per_split: Dict[str, List[Path]] = self._collect_log_paths() - self._target_dt: float = 0.1 - - def _collect_log_paths(self) -> Dict[str, List[Path]]: - # NOTE: the nuplan mini folder has an internal train, val, test structure, all stored in "mini". - # The complete dataset is saved in the "trainval" folder (train and val), or in the "test" folder (for test). - subsplit_log_names: Dict[str, List[str]] = create_splits_logs() - log_paths_per_split: Dict[str, List[Path]] = {} - - for split in self._splits: - subsplit = split.split("_")[-1] - assert subsplit in ["train", "val", "test"] - if split in ["nuplan_train", "nuplan_val"]: - log_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "splits" / "trainval" - elif split in ["nuplan_test"]: - log_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "splits" / "test" - elif split in ["nuplan_mini_train", "nuplan_mini_val", "nuplan_mini_test"]: - log_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "splits" / "mini" - elif split == "nuplan_private_test": - log_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "splits" / "private_test" - - all_log_files_in_path = [log_file for log_file in log_path.glob("*.db")] - all_log_names = set([str(log_file.stem) for log_file in all_log_files_in_path]) - set(subsplit_log_names[subsplit]) - # log_paths = [log_path / f"{log_name}.db" for log_name in list(all_log_names & split_log_names)] - log_paths = [log_path / f"{log_name}.db" for log_name in list(all_log_names)] - log_paths_per_split[split] = log_paths - - return log_paths_per_split - - def get_available_splits(self) -> List[str]: - return [ - "nuplan_train", - "nuplan_val", - "nuplan_test", - "nuplan_mini_train", - "nuplan_mini_val", - "nuplan_mini_test", - "nuplan_private_test", - ] - - def convert_maps(self, worker: WorkerPool) -> None: - worker_map( - worker, - partial(convert_nuplan_map_to_gpkg, data_converter_config=self.data_converter_config), - list(MAP_LOCATIONS), - ) - - def convert_logs(self, worker: WorkerPool) -> None: - log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - - worker_map( - worker, - partial( - convert_nuplan_log_to_arrow, - data_converter_config=self.data_converter_config, - ), - log_args, - ) - - -def convert_nuplan_map_to_gpkg(map_names: List[str], data_converter_config: DataConverterConfig) -> List[Any]: - for map_name in map_names: - map_path = data_converter_config.output_path / "maps" / f"nuplan_{map_name}.gpkg" - if data_converter_config.force_map_conversion or not map_path.exists(): - map_path.unlink(missing_ok=True) - NuPlanMapConverter(data_converter_config.output_path / "maps").convert(map_name=map_name) - return [] - - -def convert_nuplan_log_to_arrow( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - for log_info in args: - log_path: Path = log_info["log_path"] - split: str = log_info["split"] - - if not log_path.exists(): - raise FileNotFoundError(f"Log path {log_path} does not exist.") - - log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None) - log_file_path = data_converter_config.output_path / split / f"{log_path.stem}.arrow" - - if data_converter_config.force_log_conversion or not log_file_path.exists(): - log_file_path.unlink(missing_ok=True) - if not log_file_path.parent.exists(): - log_file_path.parent.mkdir(parents=True, exist_ok=True) - - metadata = LogMetadata( - dataset="nuplan", - log_name=log_db.log_name, - location=log_db.log.map_version, - timestep_seconds=TARGET_DT, - map_has_z=False, - ) - vehicle_parameters = get_nuplan_chrysler_pacifica_parameters() - camera_metadata = get_nuplan_camera_metadata(log_path) - lidar_metadata = get_nuplan_lidar_metadata(log_db) - - schema_column_list = [ - ("token", pa.string()), - ("timestamp", pa.int64()), - ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), - ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), - ("detections_token", pa.list_(pa.string())), - ("detections_type", pa.list_(pa.int16())), - ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), - ("traffic_light_ids", pa.list_(pa.int64())), - ("traffic_light_types", pa.list_(pa.int16())), - ("scenario_tag", pa.list_(pa.string())), - ("route_lane_group_ids", pa.list_(pa.int64())), - ] - if data_converter_config.lidar_store_option is not None: - for lidar_type in lidar_metadata.keys(): - if data_converter_config.lidar_store_option == "path": - schema_column_list.append((lidar_type.serialize(), pa.string())) - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") - - if data_converter_config.camera_store_option is not None: - for camera_type in camera_metadata.keys(): - if data_converter_config.camera_store_option == "path": - schema_column_list.append((camera_type.serialize(), pa.string())) - schema_column_list.append( - (f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 4 * 4)) - ) - - elif data_converter_config.camera_store_option == "binary": - raise NotImplementedError("Binary camera storage is not implemented.") - - recording_schema = pa.schema(schema_column_list) - recording_schema = recording_schema.with_metadata( - { - "log_metadata": json.dumps(asdict(metadata)), - "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), - "camera_metadata": camera_metadata_dict_to_json(camera_metadata), - "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), - } - ) - - _write_recording_table(log_db, recording_schema, log_file_path, log_path, data_converter_config) - - log_db.detach_tables() - log_db.remove_ref() - del recording_schema, vehicle_parameters, log_db - gc.collect() - return [] - - -def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: - - def _get_camera_metadata(camera_type: CameraType) -> CameraMetadata: - cam = list(get_cameras(log_path, [str(NUPLAN_CAMERA_TYPES[camera_type].value)]))[0] - intrinsic = np.array(pickle.loads(cam.intrinsic)) - rotation = np.array(pickle.loads(cam.rotation)) - rotation = Quaternion(rotation).rotation_matrix - distortion = np.array(pickle.loads(cam.distortion)) - return CameraMetadata( - camera_type=camera_type, - width=cam.width, - height=cam.height, - intrinsic=intrinsic, - distortion=distortion, - ) - - log_cam_infos: Dict[str, CameraMetadata] = {} - for camera_type in NUPLAN_CAMERA_TYPES.keys(): - log_cam_infos[camera_type] = _get_camera_metadata(camera_type) - - return log_cam_infos - - -def get_nuplan_lidar_metadata(log_db: NuPlanDB) -> Dict[LiDARType, LiDARMetadata]: - metadata: Dict[LiDARType, LiDARMetadata] = {} - metadata[LiDARType.LIDAR_MERGED] = LiDARMetadata( - lidar_type=LiDARType.LIDAR_MERGED, - lidar_index=NuplanLidarIndex, - extrinsic=None, # NOTE: LiDAR extrinsic are unknown - ) - return metadata - - -def _write_recording_table( - log_db: NuPlanDB, - recording_schema: pa.schema, - log_file_path: Path, - source_log_path: Path, - data_converter_config: DataConverterConfig, -) -> None: - - # with pa.ipc.new_stream(str(log_file_path), recording_schema) as writer: - with pa.OSFile(str(log_file_path), "wb") as sink: - with pa.ipc.new_file(sink, recording_schema) as writer: - step_interval: float = int(TARGET_DT / NUPLAN_DT) - for lidar_pc in log_db.lidar_pc[::step_interval]: - lidar_pc_token: str = lidar_pc.token - ( - detections_state, - detections_velocity, - detections_token, - detections_types, - ) = _extract_detections(lidar_pc) - traffic_light_ids, traffic_light_types = _extract_traffic_lights(log_db, lidar_pc_token) - route_lane_group_ids = [ - int(roadblock_id) - for roadblock_id in str(lidar_pc.scene.roadblock_ids).split(" ") - if len(roadblock_id) > 0 - ] - - row_data = { - "token": [lidar_pc_token], - "timestamp": [lidar_pc.timestamp], - "detections_state": [detections_state], - "detections_velocity": [detections_velocity], - "detections_token": [detections_token], - "detections_type": [detections_types], - "ego_states": [_extract_ego_state(lidar_pc)], - "traffic_light_ids": [traffic_light_ids], - "traffic_light_types": [traffic_light_types], - "scenario_tag": [_extract_scenario_tag(log_db, lidar_pc_token)], - "route_lane_group_ids": [route_lane_group_ids], - } - - if data_converter_config.lidar_store_option is not None: - lidar_data_dict = _extract_lidar(lidar_pc, data_converter_config) - for lidar_type, lidar_data in lidar_data_dict.items(): - if lidar_data is not None: - row_data[lidar_type.serialize()] = [lidar_data] - else: - row_data[lidar_type.serialize()] = [None] - - if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_camera(log_db, lidar_pc, source_log_path, data_converter_config) - for camera_type, camera_data in camera_data_dict.items(): - if camera_data is not None: - row_data[camera_type.serialize()] = [camera_data[0]] - row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] - else: - row_data[camera_type.serialize()] = [None] - row_data[f"{camera_type.serialize()}_extrinsic"] = [None] - - batch = pa.record_batch(row_data, schema=recording_schema) - writer.write_batch(batch) - del batch, row_data, detections_state, detections_velocity, detections_token, detections_types - - if SORT_BY_TIMESTAMP: - recording_table = open_arrow_table(log_file_path) - recording_table = recording_table.sort_by([("timestamp", "ascending")]) - write_arrow_table(recording_table, log_file_path) - - -def _extract_detections(lidar_pc: LidarPc) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: - detections_state: List[List[float]] = [] - detections_velocity: List[List[float]] = [] - detections_token: List[str] = [] - detections_types: List[int] = [] - - for lidar_box in lidar_pc.lidar_boxes: - lidar_box: LidarBox - center = StateSE3( - x=lidar_box.x, - y=lidar_box.y, - z=lidar_box.z, - roll=DEFAULT_ROLL, - pitch=DEFAULT_PITCH, - yaw=lidar_box.yaw, - ) - bounding_box_se3 = BoundingBoxSE3(center, lidar_box.length, lidar_box.width, lidar_box.height) - - detections_state.append(bounding_box_se3.array) - detections_velocity.append(lidar_box.velocity) - detections_token.append(lidar_box.track_token) - detections_types.append(int(NUPLAN_DETECTION_NAME_DICT[lidar_box.category.name])) - - return detections_state, detections_velocity, detections_token, detections_types - - -def _extract_ego_state(lidar_pc: LidarPc) -> List[float]: - - yaw, pitch, roll = lidar_pc.ego_pose.quaternion.yaw_pitch_roll - vehicle_parameters = get_nuplan_chrysler_pacifica_parameters() - # vehicle_parameters = get_pacifica_parameters() - - rear_axle_pose = StateSE3( - x=lidar_pc.ego_pose.x, - y=lidar_pc.ego_pose.y, - z=lidar_pc.ego_pose.z, - roll=roll, - pitch=pitch, - yaw=yaw, - ) - # NOTE: The height to rear axle is not provided the dataset and is merely approximated. - center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) - dynamic_state = DynamicStateSE3( - velocity=Vector3D( - x=lidar_pc.ego_pose.vx, - y=lidar_pc.ego_pose.vy, - z=lidar_pc.ego_pose.vz, - ), - acceleration=Vector3D( - x=lidar_pc.ego_pose.acceleration_x, - y=lidar_pc.ego_pose.acceleration_y, - z=lidar_pc.ego_pose.acceleration_z, - ), - angular_velocity=Vector3D( - x=lidar_pc.ego_pose.angular_rate_x, - y=lidar_pc.ego_pose.angular_rate_y, - z=lidar_pc.ego_pose.angular_rate_z, - ), - ) - - return EgoStateSE3( - center_se3=center, - dynamic_state_se3=dynamic_state, - vehicle_parameters=vehicle_parameters, - timepoint=None, - ).array.tolist() - - -def _extract_traffic_lights(log_db: NuPlanDB, lidar_pc_token: str) -> Tuple[List[int], List[int]]: - traffic_light_ids: List[int] = [] - traffic_light_types: List[int] = [] - traffic_lights = log_db.traffic_light_status.select_many(lidar_pc_token=lidar_pc_token) - for traffic_light in traffic_lights: - traffic_light_ids.append(int(traffic_light.lane_connector_id)) - traffic_light_types.append(int(NUPLAN_TRAFFIC_STATUS_DICT[traffic_light.status].value)) - return traffic_light_ids, traffic_light_types - - -def _extract_scenario_tag(log_db: NuPlanDB, lidar_pc_token: str) -> List[str]: - - scenario_tags = [ - scenario_tag.type for scenario_tag in log_db.scenario_tag.select_many(lidar_pc_token=lidar_pc_token) - ] - if len(scenario_tags) == 0: - scenario_tags = ["unknown"] - return scenario_tags - - -def _extract_camera( - log_db: NuPlanDB, - lidar_pc: LidarPc, - source_log_path: Path, - data_converter_config: DataConverterConfig, -) -> Dict[CameraType, Union[str, bytes]]: - - camera_dict: Dict[str, Union[str, bytes]] = {} - sensor_root = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" - - log_cam_infos = {camera.token: camera for camera in log_db.log.cameras} - - for camera_type, camera_channel in NUPLAN_CAMERA_TYPES.items(): - camera_data: Optional[Union[str, bytes]] = None - c2e: Optional[List[float]] = None - image_class = list(get_images_from_lidar_tokens(source_log_path, [lidar_pc.token], [str(camera_channel.value)])) - if len(image_class) != 0: - image = image_class[0] - filename_jpg = sensor_root / image.filename_jpg - if filename_jpg.exists(): - - # Code taken from MTGS - # https://github.com/OpenDriveLab/MTGS/blob/main/nuplan_scripts/utils/nuplan_utils_custom.py#L117 - - timestamp = image.timestamp + NUPLAN_ROLLING_SHUTTER_S.time_us - img_ego_pose: EgoPose = ( - log_db.log._session.query(EgoPose).order_by(func.abs(EgoPose.timestamp - timestamp)).first() - ) - img_e2g = img_ego_pose.trans_matrix - g2e = lidar_pc.ego_pose.trans_matrix_inv - img_e2e = g2e @ img_e2g - cam_info = log_cam_infos[image.camera_token] - c2img_e = cam_info.trans_matrix - c2e = img_e2e @ c2img_e - - if data_converter_config.camera_store_option == "path": - camera_data = str(filename_jpg), c2e.flatten().tolist() - elif data_converter_config.camera_store_option == "binary": - with open(filename_jpg, "rb") as f: - camera_data = f.read(), c2e - - camera_dict[camera_type] = camera_data - - return camera_dict - - -def _extract_lidar(lidar_pc: LidarPc, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: - - lidar: Optional[str] = None - lidar_full_path = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" / lidar_pc.filename - if lidar_full_path.exists(): - lidar = lidar_pc.filename - - return {LiDARType.LIDAR_MERGED: lidar} diff --git a/d123/dataset/dataset_specific/nuplan/nuplan_map_conversion.py b/d123/dataset/dataset_specific/nuplan/nuplan_map_conversion.py deleted file mode 100644 index 78f5d3f6..00000000 --- a/d123/dataset/dataset_specific/nuplan/nuplan_map_conversion.py +++ /dev/null @@ -1,483 +0,0 @@ -# TODO: Refactor this mess. - -import os -import warnings -from pathlib import Path -from typing import Dict, List, Optional - -import geopandas as gpd -import numpy as np -import pandas as pd -import pyogrio -from shapely.geometry import LineString - -from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import ( - get_road_edge_linear_rings, - split_line_geometry_by_max_length, -) -from d123.dataset.maps.gpkg.utils import get_all_rows_with_value, get_row_with_value -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType - -MAP_FILES = { - "sg-one-north": "sg-one-north/9.17.1964/map.gpkg", - "us-ma-boston": "us-ma-boston/9.12.1817/map.gpkg", - "us-nv-las-vegas-strip": "us-nv-las-vegas-strip/9.15.1915/map.gpkg", - "us-pa-pittsburgh-hazelwood": "us-pa-pittsburgh-hazelwood/9.17.1937/map.gpkg", -} - -NUPLAN_MAPS_ROOT = os.environ["NUPLAN_MAPS_ROOT"] -MAP_LOCATIONS = {"sg-one-north", "us-ma-boston", "us-nv-las-vegas-strip", "us-pa-pittsburgh-hazelwood"} -GPKG_LAYERS: List[str] = [ - "baseline_paths", - "carpark_areas", - "generic_drivable_areas", - "dubins_nodes", - "lane_connectors", - "intersections", - "boundaries", - "crosswalks", - "lanes_polygons", - "lane_group_connectors", - "lane_groups_polygons", - "road_segments", - "stop_polygons", - "traffic_lights", - "walkways", - "gen_lane_connectors_scaled_width_polygons", -] - -# 0: generic lane I guess. -# 1: ending? -# 3: bike lanes. - -MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges - -NUPLAN_ROAD_LINE_CONVERSION = { - 0: RoadLineType.DASHED_WHITE, - 2: RoadLineType.SOLID_WHITE, - 3: RoadLineType.UNKNOWN, -} - - -class NuPlanMapConverter: - def __init__(self, map_path: Path) -> None: - - self._map_path: Path = map_path - self._gdf: Optional[Dict[str, gpd.GeoDataFrame]] = None - - def convert(self, map_name: str = "us-pa-pittsburgh-hazelwood") -> None: - assert map_name in MAP_LOCATIONS, f"Map name {map_name} is not supported." - - map_file_path = Path(NUPLAN_MAPS_ROOT) / MAP_FILES[map_name] - self._load_dataframes(map_file_path) - - dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} - dataframes[MapLayer.LANE] = self._extract_lane_dataframe() - dataframes[MapLayer.LANE_GROUP] = self._extract_lane_group_dataframe() - dataframes[MapLayer.INTERSECTION] = self._extract_intersection_dataframe() - dataframes[MapLayer.CROSSWALK] = self._extract_crosswalk_dataframe() - dataframes[MapLayer.WALKWAY] = self._extract_walkway_dataframe() - dataframes[MapLayer.CARPARK] = self._extract_carpark_dataframe() - dataframes[MapLayer.GENERIC_DRIVABLE] = self._extract_generic_drivable_dataframe() - dataframes[MapLayer.ROAD_EDGE] = self._extract_road_edge_dataframe() - dataframes[MapLayer.ROAD_LINE] = self._extract_road_line_dataframe() - - if not self._map_path.exists(): - self._map_path.mkdir(parents=True, exist_ok=True) - - map_file_name = self._map_path / f"nuplan_{map_name}.gpkg" - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="'crs' was not provided") - for layer, gdf in dataframes.items(): - gdf.to_file(map_file_name, layer=layer.serialize(), driver="GPKG", mode="a") - - def _load_dataframes(self, map_file_path: Path) -> None: - - # The projected coordinate system depends on which UTM zone the mapped location is in. - map_meta = gpd.read_file(map_file_path, layer="meta", engine="pyogrio") - projection_system = map_meta[map_meta["key"] == "projectedCoordSystem"]["value"].iloc[0] - - self._gdf = {} - for layer_name in GPKG_LAYERS: - with warnings.catch_warnings(): - # Suppress the warnings from the GPKG operations below so that they don't spam the training logs. - warnings.filterwarnings("ignore") - - gdf_in_pixel_coords = pyogrio.read_dataframe(map_file_path, layer=layer_name, fid_as_index=True) - gdf_in_utm_coords = gdf_in_pixel_coords.to_crs(projection_system) - # gdf_in_utm_coords = gdf_in_pixel_coords - - # For backwards compatibility, cast the index to string datatype. - # and mirror it to the "fid" column. - gdf_in_utm_coords.index = gdf_in_utm_coords.index.map(str) - gdf_in_utm_coords["fid"] = gdf_in_utm_coords.index - - self._gdf[layer_name] = gdf_in_utm_coords - - def _extract_lane_dataframe(self) -> gpd.GeoDataFrame: - assert self._gdf is not None, "Call `.initialize()` before retrieving data!" - lane_df = self._extract_nuplan_lane_dataframe() - lane_connector_df = self._extract_nuplan_lane_connector_dataframe() - combined_df = pd.concat([lane_df, lane_connector_df], ignore_index=True) - return combined_df - - def _extract_nuplan_lane_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: lane_index (?), creator_id, name (?), road_type_fid (?), lane_type_fid (?), width (?), left_offset (?), right_offset (?), - # min_speed (?), max_speed (?), stops, left_has_reflectors (?), right_has_reflectors (?), from_edge_fid, to_edge_fid - - ids = self._gdf["lanes_polygons"].lane_fid.to_list() - lane_group_ids = self._gdf["lanes_polygons"].lane_group_fid.to_list() - speed_limits_mps = self._gdf["lanes_polygons"].speed_limit_mps.to_list() - predecessor_ids = [] - successor_ids = [] - left_boundaries = [] - right_boundaries = [] - left_lane_ids = [] - right_lane_ids = [] - baseline_paths = [] - geometries = self._gdf["lanes_polygons"].geometry.to_list() - - for lane_id in ids: - - # 1. predecessor_ids, successor_ids - _predecessor_ids = get_all_rows_with_value( - self._gdf["lane_connectors"], - "entry_lane_fid", - lane_id, - )["fid"].tolist() - _successor_ids = get_all_rows_with_value( - self._gdf["lane_connectors"], - "exit_lane_fid", - lane_id, - )["fid"].tolist() - predecessor_ids.append(_predecessor_ids) - successor_ids.append(_successor_ids) - - # 2. left_boundaries, right_boundaries - lane_series = get_row_with_value(self._gdf["lanes_polygons"], "fid", str(lane_id)) - left_boundary_fid = lane_series["left_boundary_fid"] - left_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] - - right_boundary_fid = lane_series["right_boundary_fid"] - right_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] - - # 3. left_lane_ids, right_lane_ids - lane_index = lane_series["lane_index"] - all_group_lanes = get_all_rows_with_value( - self._gdf["lanes_polygons"], "lane_group_fid", lane_series["lane_group_fid"] - ) - left_lane_id = all_group_lanes[all_group_lanes["lane_index"] == int(lane_index) - 1]["fid"] - right_lane_id = all_group_lanes[all_group_lanes["lane_index"] == int(lane_index) + 1]["fid"] - left_lane_ids.append(left_lane_id.item() if not left_lane_id.empty else None) - right_lane_ids.append(right_lane_id.item() if not right_lane_id.empty else None) - - # 3. baseline_paths - baseline_path = get_row_with_value(self._gdf["baseline_paths"], "lane_fid", float(lane_id))["geometry"] - - left_boundary = align_boundary_direction(baseline_path, left_boundary) - right_boundary = align_boundary_direction(baseline_path, right_boundary) - - left_boundaries.append(left_boundary) - right_boundaries.append(right_boundary) - baseline_paths.append(baseline_path) - - data = pd.DataFrame( - { - "id": ids, - "lane_group_id": lane_group_ids, - "speed_limit_mps": speed_limits_mps, - "predecessor_ids": predecessor_ids, - "successor_ids": successor_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "left_lane_id": left_lane_ids, - "right_lane_id": right_lane_ids, - "baseline_path": baseline_paths, - } - ) - - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - def _extract_nuplan_lane_connector_dataframe(self) -> None: - # NOTE: drops: exit_lane_group_fid, entry_lane_group_fid, to_edge_fid, - # turn_type_fid (?), bulb_fids (?), traffic_light_stop_line_fids (?), overlap (?), creator_id - # left_has_reflectors (?), right_has_reflectors (?) - ids = self._gdf["lane_connectors"].fid.to_list() - lane_group_ids = self._gdf["lane_connectors"].lane_group_connector_fid.to_list() - speed_limits_mps = self._gdf["lane_connectors"].speed_limit_mps.to_list() - predecessor_ids = [] - successor_ids = [] - left_boundaries = [] - right_boundaries = [] - baseline_paths = [] - geometries = [] - - for lane_id in ids: - # 1. predecessor_ids, successor_ids - lane_connector_row = get_row_with_value(self._gdf["lane_connectors"], "fid", str(lane_id)) - predecessor_ids.append([lane_connector_row["entry_lane_fid"]]) - successor_ids.append([lane_connector_row["exit_lane_fid"]]) - - # 2. left_boundaries, right_boundaries - lane_connector_polygons_row = get_row_with_value( - self._gdf["gen_lane_connectors_scaled_width_polygons"], "lane_connector_fid", str(lane_id) - ) - left_boundary_fid = lane_connector_polygons_row["left_boundary_fid"] - left_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] - - right_boundary_fid = lane_connector_polygons_row["right_boundary_fid"] - right_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] - - # 3. baseline_paths - baseline_path = get_row_with_value(self._gdf["baseline_paths"], "lane_connector_fid", float(lane_id))[ - "geometry" - ] - - left_boundary = align_boundary_direction(baseline_path, left_boundary) - right_boundary = align_boundary_direction(baseline_path, right_boundary) - - left_boundaries.append(left_boundary) - right_boundaries.append(right_boundary) - baseline_paths.append(baseline_path) - - # 4. geometries - geometries.append(lane_connector_polygons_row.geometry) - - data = pd.DataFrame( - { - "id": ids, - "lane_group_id": lane_group_ids, - "speed_limit_mps": speed_limits_mps, - "predecessor_ids": predecessor_ids, - "successor_ids": successor_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "left_lane_id": [None] * len(ids), - "right_lane_id": [None] * len(ids), - "baseline_path": baseline_paths, - } - ) - - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - def _extract_lane_group_dataframe(self) -> gpd.GeoDataFrame: - lane_group_df = self._extract_nuplan_lane_group_dataframe() - lane_connector_group_df = self._extract_nuplan_lane_connector_group_dataframe() - combined_df = pd.concat([lane_group_df, lane_connector_group_df], ignore_index=True) - return combined_df - - def _extract_nuplan_lane_group_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id, from_edge_fid, to_edge_fid - ids = self._gdf["lane_groups_polygons"].fid.to_list() - lane_ids = [] - intersection_ids = [None] * len(ids) - predecessor_lane_group_ids = [] - successor_lane_group_ids = [] - left_boundaries = [] - right_boundaries = [] - geometries = self._gdf["lane_groups_polygons"].geometry.to_list() - - for lane_group_id in ids: - # 1. lane_ids - lane_ids_ = get_all_rows_with_value( - self._gdf["lanes_polygons"], - "lane_group_fid", - lane_group_id, - )["fid"].tolist() - lane_ids.append(lane_ids_) - - # 2. predecessor_lane_group_ids, successor_lane_group_ids - predecessor_lane_group_ids_ = get_all_rows_with_value( - self._gdf["lane_group_connectors"], - "to_lane_group_fid", - lane_group_id, - )["fid"].tolist() - successor_lane_group_ids_ = get_all_rows_with_value( - self._gdf["lane_group_connectors"], - "from_lane_group_fid", - lane_group_id, - )["fid"].tolist() - predecessor_lane_group_ids.append(predecessor_lane_group_ids_) - successor_lane_group_ids.append(successor_lane_group_ids_) - - # 3. left_boundaries, right_boundaries - lane_group_row = get_row_with_value(self._gdf["lane_groups_polygons"], "fid", str(lane_group_id)) - left_boundary_fid = lane_group_row["left_boundary_fid"] - left_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] - - right_boundary_fid = lane_group_row["right_boundary_fid"] - right_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] - - repr_baseline_path = get_row_with_value(self._gdf["baseline_paths"], "lane_fid", float(lane_ids_[0]))[ - "geometry" - ] - - left_boundary = align_boundary_direction(repr_baseline_path, left_boundary) - right_boundary = align_boundary_direction(repr_baseline_path, right_boundary) - - left_boundaries.append(left_boundary) - right_boundaries.append(right_boundary) - - data = pd.DataFrame( - { - "id": ids, - "lane_ids": lane_ids, - "intersection_id": intersection_ids, - "predecessor_lane_group_ids": predecessor_lane_group_ids, - "successor_lane_group_ids": successor_lane_group_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - def _extract_nuplan_lane_connector_group_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id, from_edge_fid, to_edge_fid, intersection_fid - ids = self._gdf["lane_group_connectors"].fid.to_list() - lane_ids = [] - intersection_ids = self._gdf["lane_group_connectors"].intersection_fid.to_list() - predecessor_lane_group_ids = [] - successor_lane_group_ids = [] - left_boundaries = [] - right_boundaries = [] - geometries = self._gdf["lane_group_connectors"].geometry.to_list() - - for lane_group_connector_id in ids: - # 1. lane_ids - lane_ids_ = get_all_rows_with_value( - self._gdf["lane_connectors"], "lane_group_connector_fid", lane_group_connector_id - )["fid"].tolist() - lane_ids.append(lane_ids_) - - # 2. predecessor_lane_group_ids, successor_lane_group_ids - lane_group_connector_row = get_row_with_value( - self._gdf["lane_group_connectors"], "fid", lane_group_connector_id - ) - predecessor_lane_group_ids.append([str(lane_group_connector_row["from_lane_group_fid"])]) - successor_lane_group_ids.append([str(lane_group_connector_row["to_lane_group_fid"])]) - - # 3. left_boundaries, right_boundaries - left_boundary_fid = lane_group_connector_row["left_boundary_fid"] - left_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] - right_boundary_fid = lane_group_connector_row["right_boundary_fid"] - right_boundary = get_row_with_value(self._gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] - - left_boundaries.append(left_boundary) - right_boundaries.append(right_boundary) - - data = pd.DataFrame( - { - "id": ids, - "lane_ids": lane_ids, - "intersection_id": intersection_ids, - "predecessor_lane_group_ids": predecessor_lane_group_ids, - "successor_lane_group_ids": successor_lane_group_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - def _extract_intersection_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id, intersection_type_fid (?), is_mini (?) - ids = self._gdf["intersections"].fid.to_list() - lane_group_ids = [] - for intersection_id in ids: - lane_group_connector_ids = get_all_rows_with_value( - self._gdf["lane_group_connectors"], "intersection_fid", str(intersection_id) - )["fid"].tolist() - lane_group_ids.append(lane_group_connector_ids) - data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids}) - return gpd.GeoDataFrame(data, geometry=self._gdf["intersections"].geometry.to_list()) - - def _extract_crosswalk_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id, intersection_fids, lane_fids, is_marked (?) - data = pd.DataFrame({"id": self._gdf["crosswalks"].fid.to_list()}) - return gpd.GeoDataFrame(data, geometry=self._gdf["crosswalks"].geometry.to_list()) - - def _extract_walkway_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id - data = pd.DataFrame({"id": self._gdf["walkways"].fid.to_list()}) - return gpd.GeoDataFrame(data, geometry=self._gdf["walkways"].geometry.to_list()) - - def _extract_carpark_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: heading, creator_id - data = pd.DataFrame({"id": self._gdf["carpark_areas"].fid.to_list()}) - return gpd.GeoDataFrame(data, geometry=self._gdf["carpark_areas"].geometry.to_list()) - - def _extract_generic_drivable_dataframe(self) -> gpd.GeoDataFrame: - # NOTE: drops: creator_id - data = pd.DataFrame({"id": self._gdf["generic_drivable_areas"].fid.to_list()}) - return gpd.GeoDataFrame(data, geometry=self._gdf["generic_drivable_areas"].geometry.to_list()) - - def _extract_road_edge_dataframe(self) -> gpd.GeoDataFrame: - drivable_polygons = ( - self._gdf["intersections"].geometry.to_list() - + self._gdf["lane_groups_polygons"].geometry.to_list() - + self._gdf["carpark_areas"].geometry.to_list() - + self._gdf["generic_drivable_areas"].geometry.to_list() - ) - road_edge_linear_rings = get_road_edge_linear_rings(drivable_polygons) - road_edges = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) - - ids = [] - road_edge_types = [] - for idx in range(len(road_edges)): - ids.append(idx) - # TODO @DanielDauner: Figure out if other types should/could be assigned here. - road_edge_types.append(int(RoadEdgeType.ROAD_EDGE_BOUNDARY)) - - data = pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}) - return gpd.GeoDataFrame(data, geometry=road_edges) - - def _extract_road_line_dataframe(self) -> gpd.GeoDataFrame: - boundaries = self._gdf["boundaries"].geometry.to_list() - fids = self._gdf["boundaries"].fid.to_list() - boundary_types = self._gdf["boundaries"].boundary_type_fid.to_list() - - ids = [] - road_line_types = [] - geometries = [] - - for idx in range(len(boundary_types)): - ids.append(fids[idx]) - road_line_types.append(int(NUPLAN_ROAD_LINE_CONVERSION[boundary_types[idx]])) - geometries.append(boundaries[idx]) - - data = pd.DataFrame( - { - "id": ids, - "road_line_type": road_line_types, - } - ) - return gpd.GeoDataFrame(data, geometry=geometries) - - -def flip_linestring(linestring: LineString) -> LineString: - # TODO: move somewhere more appropriate or implement in Polyline2D, PolylineSE2, etc. - return LineString(linestring.coords[::-1]) - - -def lines_same_direction(centerline: LineString, boundary: LineString) -> bool: - # TODO: refactor helper function. - center_start = np.array(centerline.coords[0]) - center_end = np.array(centerline.coords[-1]) - boundary_start = np.array(boundary.coords[0]) - boundary_end = np.array(boundary.coords[-1]) - - # Distance from centerline start to boundary start + centerline end to boundary end - same_dir_dist = np.linalg.norm(center_start - boundary_start) + np.linalg.norm(center_end - boundary_end) - opposite_dir_dist = np.linalg.norm(center_start - boundary_end) + np.linalg.norm(center_end - boundary_start) - - return same_dir_dist <= opposite_dir_dist - - -def align_boundary_direction(centerline: LineString, boundary: LineString) -> LineString: - # TODO: refactor helper function. - if not lines_same_direction(centerline, boundary): - return flip_linestring(boundary) - return boundary diff --git a/d123/dataset/dataset_specific/raw_data_converter.py b/d123/dataset/dataset_specific/raw_data_converter.py deleted file mode 100644 index 179f9ef5..00000000 --- a/d123/dataset/dataset_specific/raw_data_converter.py +++ /dev/null @@ -1,44 +0,0 @@ -import abc -from dataclasses import dataclass -from pathlib import Path -from typing import List, Literal, Optional, Union - -from d123.common.multithreading.worker_utils import WorkerPool - - -@dataclass -class DataConverterConfig: - - output_path: Union[str, Path] - force_log_conversion: bool = False - force_map_conversion: bool = False - camera_store_option: Optional[Literal["path", "binary"]] = None - lidar_store_option: Optional[Literal["path", "binary"]] = None - - def __post_init__(self): - if isinstance(self.output_path, str): - self.output_path = Path(self.output_path) - - -class RawDataConverter(abc.ABC): - - def __init__(self, data_converter_config: DataConverterConfig) -> None: - self.data_converter_config = data_converter_config - - @abc.abstractmethod - def get_available_splits(self) -> List[str]: - """Returns a list of available raw data types.""" - - @abc.abstractmethod - def convert_maps(self, worker: WorkerPool) -> None: - """ - Convert maps in raw data format to the uniform 123D format. - :param worker: The worker pool to use for parallel processing. - """ - - @abc.abstractmethod - def convert_logs(self, worker: WorkerPool) -> None: - """ - Convert logs in raw data format to the uniform 123D format. - :param worker: The worker pool to use for parallel processing. - """ diff --git a/d123/dataset/dataset_specific/wopd/waymo_map_utils/wopd_map_utils.py b/d123/dataset/dataset_specific/wopd/waymo_map_utils/wopd_map_utils.py deleted file mode 100644 index 6d3ef7ab..00000000 --- a/d123/dataset/dataset_specific/wopd/waymo_map_utils/wopd_map_utils.py +++ /dev/null @@ -1,388 +0,0 @@ -from collections import defaultdict -from pathlib import Path -from typing import Dict, List, Optional - -import geopandas as gpd -import numpy as np -import numpy.typing as npt -import pandas as pd -import shapely.geometry as geom -from waymo_open_dataset import dataset_pb2 - -from d123.common.geometry.base import Point3DIndex -from d123.common.geometry.line.polylines import Polyline3D -from d123.common.geometry.units import mph_to_mps -from d123.dataset.dataset_specific.wopd.waymo_map_utils.womp_boundary_utils import extract_lane_boundaries -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType - -# TODO: -# - Implement stop signs -# - Implement speed bumps -# - Implement driveways with a different semantic type if needed -# - Implement intersections and lane group logic - -WAYMO_ROAD_LINE_CONVERSION = { - 0: RoadLineType.UNKNOWN, # aka. UNKNOWN - 1: RoadLineType.DASHED_WHITE, # aka. BROKEN_SINGLE_WHITE - 2: RoadLineType.SOLID_WHITE, # aka. SOLID_SINGLE_WHITE - 3: RoadLineType.DOUBLE_SOLID_WHITE, # aka. SOLID_DOUBLE_WHITE - 4: RoadLineType.DASHED_YELLOW, # aka. BROKEN_SINGLE_YELLOW - 5: RoadLineType.DOUBLE_DASH_YELLOW, # aka. BROKEN_DOUBLE_YELLOW - 6: RoadLineType.SOLID_YELLOW, # aka. SOLID_SINGLE_YELLOW - 7: RoadLineType.DOUBLE_SOLID_YELLOW, # aka. SOLID_DOUBLE_YELLOW - 8: RoadLineType.DOUBLE_DASH_YELLOW, # aka. PASSING_DOUBLE_YELLOW -} - -WAYMO_ROAD_EDGE_CONVERSION = { - 0: RoadEdgeType.UNKNOWN, - 1: RoadEdgeType.ROAD_EDGE_BOUNDARY, - 2: RoadEdgeType.ROAD_EDGE_MEDIAN, -} - - -def convert_wopd_map(frame: dataset_pb2.Frame, map_file_path: Path) -> None: - - def _extract_polyline(data) -> npt.NDArray[np.float64]: - polyline = np.array([[p.x, p.y, p.z] for p in data.polyline], dtype=np.float64) - return polyline - - def _extract_polygon(data) -> npt.NDArray[np.float64]: - polygon = np.array([[p.x, p.y, p.z] for p in data.polygon], dtype=np.float64) - assert polygon.shape[0] >= 3, "Polygon must have at least 3 points" - assert polygon.shape[1] == 3, "Polygon must have 3 coordinates (x, y, z)" - return polygon - - def _extract_neighbors(data) -> List[Dict[str, int]]: - neighbors = [] - for neighbor in data: - neighbors.append( - { - "lane_id": neighbor.feature_id, - "self_start_index": neighbor.self_start_index, - "self_end_index": neighbor.self_end_index, - "neighbor_start_index": neighbor.neighbor_start_index, - "neighbor_end_index": neighbor.neighbor_end_index, - } - ) - return neighbors - - lanes: Dict[int, npt.NDArray[np.float64]] = {} - lanes_successors = defaultdict(list) - lanes_predecessors = defaultdict(list) - lanes_speed_limit_mps: Dict[int, float] = {} - lanes_type: Dict[int, int] = {} - lanes_left_neighbors: Dict[int, List[Dict[str, int]]] = {} - lanes_right_neighbors: Dict[int, List[Dict[str, int]]] = {} - - road_lines: Dict[int, npt.NDArray[np.float64]] = {} - road_lines_type: Dict[int, RoadLineType] = {} - - road_edges: Dict[int, npt.NDArray[np.float64]] = {} - road_edges_type: Dict[int, int] = {} - - crosswalks: Dict[int, npt.NDArray[np.float64]] = {} - carparks: Dict[int, npt.NDArray[np.float64]] = {} - - for map_feature in frame.map_features: - if map_feature.HasField("lane"): - polyline = _extract_polyline(map_feature.lane) - # Ignore lanes with less than 2 points or not 2D - if polyline.ndim != 2 or polyline.shape[0] < 2: - continue - lanes[map_feature.id] = polyline - for lane_id_ in map_feature.lane.exit_lanes: - lanes_successors[map_feature.id].append(lane_id_) - for lane_id_ in map_feature.lane.exit_lanes: - lanes_predecessors[map_feature.id].append(lane_id_) - lanes_speed_limit_mps[map_feature.id] = mph_to_mps(map_feature.lane.speed_limit_mph) - lanes_type[map_feature.id] = map_feature.lane.type - lanes_left_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.left_neighbors) - lanes_right_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.right_neighbors) - elif map_feature.HasField("road_line"): - polyline = _extract_polyline(map_feature.road_line) - if polyline.ndim != 2 or polyline.shape[0] < 2: - continue - road_lines[map_feature.id] = polyline - road_lines_type[map_feature.id] = WAYMO_ROAD_LINE_CONVERSION.get( - map_feature.road_line.type, RoadLineType.UNKNOWN - ) - elif map_feature.HasField("road_edge"): - polyline = _extract_polyline(map_feature.road_edge) - if polyline.ndim != 2 or polyline.shape[0] < 2: - continue - road_edges[map_feature.id] = polyline - road_edges_type[map_feature.id] = WAYMO_ROAD_EDGE_CONVERSION.get( - map_feature.road_edge.type, RoadEdgeType.UNKNOWN - ) - elif map_feature.HasField("stop_sign"): - # TODO: implement stop signs - pass - elif map_feature.HasField("crosswalk"): - crosswalks[map_feature.id] = _extract_polygon(map_feature.crosswalk) - elif map_feature.HasField("speed_bump"): - # TODO: implement speed bumps - pass - elif map_feature.HasField("driveway"): - # NOTE: Determine whether to use a different semantic type for driveways. - carparks[map_feature.id] = _extract_polygon(map_feature.driveway) - - lane_left_boundaries_3d, lane_right_boundaries_3d = extract_lane_boundaries( - lanes, lanes_successors, lanes_predecessors, road_lines, road_edges - ) - - lane_df = get_lane_df( - lanes, - lanes_successors, - lanes_predecessors, - lanes_speed_limit_mps, - lane_left_boundaries_3d, - lane_right_boundaries_3d, - lanes_type, - lanes_left_neighbors, - lanes_right_neighbors, - ) - lane_group_df = get_lane_group_df( - lanes, - lanes_successors, - lanes_predecessors, - lane_left_boundaries_3d, - lane_right_boundaries_3d, - ) - intersection_df = get_intersections_df() - crosswalk_df = get_crosswalk_df(crosswalks) - walkway_df = get_walkway_df() - carpark_df = get_carpark_df(carparks) - generic_drivable_df = get_generic_drivable_df() - road_edge_df = get_road_edge_df(road_edges, road_edges_type) - road_line_df = get_road_line_df(road_lines, road_lines_type) - - map_file_path.unlink(missing_ok=True) - if not map_file_path.parent.exists(): - map_file_path.parent.mkdir(parents=True, exist_ok=True) - - lane_df.to_file(map_file_path, layer=MapLayer.LANE.serialize(), driver="GPKG") - lane_group_df.to_file(map_file_path, layer=MapLayer.LANE_GROUP.serialize(), driver="GPKG", mode="a") - intersection_df.to_file(map_file_path, layer=MapLayer.INTERSECTION.serialize(), driver="GPKG", mode="a") - crosswalk_df.to_file(map_file_path, layer=MapLayer.CROSSWALK.serialize(), driver="GPKG", mode="a") - walkway_df.to_file(map_file_path, layer=MapLayer.WALKWAY.serialize(), driver="GPKG", mode="a") - carpark_df.to_file(map_file_path, layer=MapLayer.CARPARK.serialize(), driver="GPKG", mode="a") - generic_drivable_df.to_file(map_file_path, layer=MapLayer.GENERIC_DRIVABLE.serialize(), driver="GPKG", mode="a") - road_edge_df.to_file(map_file_path, layer=MapLayer.ROAD_EDGE.serialize(), driver="GPKG", mode="a") - road_line_df.to_file(map_file_path, layer=MapLayer.ROAD_LINE.serialize(), driver="GPKG", mode="a") - - -def get_lane_df( - lanes: Dict[int, npt.NDArray[np.float64]], - lanes_successors: Dict[int, List[int]], - lanes_predecessors: Dict[int, List[int]], - lanes_speed_limit_mps: Dict[int, float], - lanes_left_boundaries_3d: Dict[int, Polyline3D], - lanes_right_boundaries_3d: Dict[int, Polyline3D], - lanes_type: Dict[int, int], - lanes_left_neighbors: Dict[int, List[Dict[str, int]]], - lanes_right_neighbors: Dict[int, List[Dict[str, int]]], -) -> gpd.GeoDataFrame: - - ids = [] - lane_types = [] - lane_group_ids = [] - speed_limits_mps = [] - predecessor_ids = [] - successor_ids = [] - left_boundaries = [] - right_boundaries = [] - left_lane_ids = [] - right_lane_ids = [] - baseline_paths = [] - geometries = [] - - def _get_majority_neighbor(neighbors: List[Dict[str, int]]) -> Optional[int]: - if len(neighbors) == 0: - return None - length = { - neighbor["lane_id"]: neighbor["self_end_index"] - neighbor["self_start_index"] for neighbor in neighbors - } - return str(max(length, key=length.get)) - - for lane_id, lane_centerline_array in lanes.items(): - if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: - continue - lane_centerline = Polyline3D.from_array(lane_centerline_array) - lane_speed_limit_mps = lanes_speed_limit_mps[lane_id] if lanes_speed_limit_mps[lane_id] > 0.0 else None - - ids.append(lane_id) - lane_types.append(lanes_type[lane_id]) - lane_group_ids.append([lane_id]) - speed_limits_mps.append(lane_speed_limit_mps) - predecessor_ids.append(lanes_predecessors[lane_id]) - successor_ids.append(lanes_successors[lane_id]) - left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) - right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) - left_lane_ids.append(_get_majority_neighbor(lanes_left_neighbors[lane_id])) - right_lane_ids.append(_get_majority_neighbor(lanes_right_neighbors[lane_id])) - baseline_paths.append(lane_centerline.linestring) - - geometry = geom.Polygon( - np.vstack( - [ - lanes_left_boundaries_3d[lane_id].array[:, :2], - lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], - ] - ) - ) - geometries.append(geometry) - - data = pd.DataFrame( - { - "id": ids, - "lane_type": lane_types, - "lane_group_id": lane_group_ids, - "speed_limit_mps": speed_limits_mps, - "predecessor_ids": predecessor_ids, - "successor_ids": successor_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "left_lane_id": left_lane_ids, - "right_lane_id": right_lane_ids, - "baseline_path": baseline_paths, - } - ) - - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_lane_group_df( - lanes: Dict[int, npt.NDArray[np.float64]], - lanes_successors: Dict[int, List[int]], - lanes_predecessors: Dict[int, List[int]], - lanes_left_boundaries_3d: Dict[int, Polyline3D], - lanes_right_boundaries_3d: Dict[int, Polyline3D], -) -> gpd.GeoDataFrame: - - ids = [] - lane_ids = [] - intersection_ids = [] - predecessor_lane_group_ids = [] - successor_lane_group_ids = [] - left_boundaries = [] - right_boundaries = [] - geometries = [] - - # NOTE: WOPD does not provide lane groups, so we create a lane group for each lane. - for lane_id in lanes.keys(): - if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: - continue - ids.append(lane_id) - lane_ids.append([lane_id]) - intersection_ids.append(None) # WOPD does not provide intersections - predecessor_lane_group_ids.append(lanes_predecessors[lane_id]) - successor_lane_group_ids.append(lanes_successors[lane_id]) - left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) - right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) - geometry = geom.Polygon( - np.vstack( - [ - lanes_left_boundaries_3d[lane_id].array[:, :2], - lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], - ] - ) - ) - geometries.append(geometry) - - data = pd.DataFrame( - { - "id": ids, - "lane_ids": lane_ids, - "intersection_id": intersection_ids, - "predecessor_lane_group_ids": predecessor_lane_group_ids, - "successor_lane_group_ids": successor_lane_group_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_intersections_df() -> gpd.GeoDataFrame: - ids = [] - lane_group_ids = [] - geometries = [] - - # NOTE: WOPD does not provide intersections, so we create an empty DataFrame. - data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_carpark_df(carparks) -> gpd.GeoDataFrame: - ids = list(carparks.keys()) - outlines = [geom.LineString(outline) for outline in carparks.values()] - geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in carparks.values()] - - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_walkway_df() -> gpd.GeoDataFrame: - ids = [] - geometries = [] - - # NOTE: WOPD does not provide walkways, so we create an empty DataFrame. - data = pd.DataFrame({"id": ids}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_crosswalk_df(crosswalks: Dict[int, npt.NDArray[np.float64]]) -> gpd.GeoDataFrame: - ids = list(crosswalks.keys()) - outlines = [geom.LineString(outline) for outline in crosswalks.values()] - geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in crosswalks.values()] - - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_generic_drivable_df() -> gpd.GeoDataFrame: - ids = [] - geometries = [] - - # NOTE: WOPD does not provide generic drivable areas, so we create an empty DataFrame. - data = pd.DataFrame({"id": ids}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_road_edge_df( - road_edges: Dict[int, npt.NDArray[np.float64]], road_edges_type: Dict[int, RoadEdgeType] -) -> gpd.GeoDataFrame: - ids = list(road_edges.keys()) - geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_edges.values()] - - data = pd.DataFrame( - { - "id": ids, - "road_edge_type": [int(road_edge_type) for road_edge_type in road_edges_type.values()], - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_road_line_df( - road_lines: Dict[int, npt.NDArray[np.float64]], road_lines_type: Dict[int, RoadLineType] -) -> gpd.GeoDataFrame: - ids = list(road_lines.keys()) - geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_lines.values()] - - data = pd.DataFrame( - { - "id": ids, - "road_line_type": [int(road_line_type) for road_line_type in road_lines_type.values()], - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf diff --git a/d123/dataset/dataset_specific/wopd/wopd_data_converter.py b/d123/dataset/dataset_specific/wopd/wopd_data_converter.py deleted file mode 100644 index f1145c8e..00000000 --- a/d123/dataset/dataset_specific/wopd/wopd_data_converter.py +++ /dev/null @@ -1,546 +0,0 @@ -import gc -import hashlib -import json -import os -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, Final, List, Literal, Tuple, Union - -import numpy as np -import numpy.typing as npt -import pyarrow as pa -import tensorflow as tf -from pyquaternion import Quaternion -from waymo_open_dataset import dataset_pb2 - -from d123.common.datatypes.detection.detection import TrafficLightStatus -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import WopdLidarIndex -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import get_wopd_chrysler_pacifica_parameters -from d123.common.geometry.base import Point3D, StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index -from d123.common.geometry.constants import DEFAULT_PITCH, DEFAULT_ROLL -from d123.common.geometry.transform.se3 import convert_relative_to_absolute_se3_array, get_rotation_matrix -from d123.common.geometry.vector import Vector3D, Vector3DIndex -from d123.common.multithreading.worker_utils import WorkerPool, worker_map -from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.dataset.dataset_specific.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map -from d123.dataset.dataset_specific.wopd.wopd_utils import parse_range_image_and_camera_projection -from d123.dataset.logs.log_metadata import LogMetadata - -os.environ["CUDA_VISIBLE_DEVICES"] = "-1" -D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) - -TARGET_DT: Final[float] = 0.1 -SORT_BY_TIMESTAMP: Final[bool] = False - -NUPLAN_TRAFFIC_STATUS_DICT: Final[Dict[str, TrafficLightStatus]] = { - "green": TrafficLightStatus.GREEN, - "red": TrafficLightStatus.RED, - "unknown": TrafficLightStatus.UNKNOWN, -} - -# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/label.proto#L63 -WOPD_DETECTION_NAME_DICT: Dict[int, DetectionType] = { - 0: DetectionType.GENERIC_OBJECT, # TYPE_UNKNOWN - 1: DetectionType.VEHICLE, # TYPE_VEHICLE - 2: DetectionType.PEDESTRIAN, # TYPE_PEDESTRIAN - 3: DetectionType.SIGN, # TYPE_SIGN - 4: DetectionType.BICYCLE, # TYPE_CYCLIST -} - -# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L50 -WOPD_CAMERA_TYPES: Dict[int, CameraType] = { - 1: CameraType.CAM_F0, # front_camera - 2: CameraType.CAM_L0, # front_left_camera - 3: CameraType.CAM_R0, # front_right_camera - 4: CameraType.CAM_L1, # left_camera - 5: CameraType.CAM_R1, # right_camera -} - -# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L66 -WOPD_LIDAR_TYPES: Dict[int, LiDARType] = { - 0: LiDARType.LIDAR_UNKNOWN, # UNKNOWN - 1: LiDARType.LIDAR_TOP, # TOP - 2: LiDARType.LIDAR_FRONT, # FRONT - 3: LiDARType.LIDAR_SIDE_LEFT, # SIDE_LEFT - 4: LiDARType.LIDAR_SIDE_RIGHT, # SIDE_RIGHT - 5: LiDARType.LIDAR_BACK, # REAR -} - -WOPD_DATA_ROOT = Path("/media/nvme1/waymo_perception") # TODO: set as environment variable - -# Whether to use ego or zero roll and pitch values for bounding box detections (after global conversion) -DETECTION_ROLL_PITCH: Final[Literal["ego", "zero"]] = "zero" - - -def create_token(input_data: str) -> str: - # TODO: Refactor this function. - # TODO: Add a general function to create tokens from arbitrary data. - if isinstance(input_data, str): - input_data = input_data.encode("utf-8") - - hash_obj = hashlib.sha256(input_data) - return hash_obj.hexdigest()[:16] - - -class WOPDDataConverter(RawDataConverter): - def __init__( - self, - splits: List[str], - log_path: Union[Path, str], - data_converter_config: DataConverterConfig, - ) -> None: - super().__init__(data_converter_config) - for split in splits: - assert ( - split in self.get_available_splits() - ), f"Split {split} is not available. Available splits: {self.available_splits}" - - self._splits: List[str] = splits - self._tf_records_per_split: Dict[str, List[Path]] = self._collect_tf_records() - self._target_dt: float = 0.1 - - def _collect_tf_records(self) -> Dict[str, List[Path]]: - tf_records_per_split: Dict[str, List[Path]] = {} - - for split in self._splits: - if split in ["wopd_train"]: - log_path = WOPD_DATA_ROOT / "training" - else: - raise ValueError(f"Split {split} is not supported.") - - log_paths = [log_file for log_file in log_path.glob("*.tfrecord")] - tf_records_per_split[split] = log_paths - - return tf_records_per_split - - def get_available_splits(self) -> List[str]: - # TODO: Add more splits if available - return [ - "wopd_train", - ] - - def convert_maps(self, worker: WorkerPool) -> None: - log_args = [ - { - "tf_record": tf_record, - "split": split, - } - for split, tf_record_paths in self._tf_records_per_split.items() - for tf_record in tf_record_paths - ] - - worker_map( - worker, - partial(convert_wopd_tfrecord_map_to_gpkg, data_converter_config=self.data_converter_config), - log_args, - ) - - def convert_logs(self, worker: WorkerPool) -> None: - log_args = [ - { - "tf_record": tf_record, - "split": split, - } - for split, tf_record_paths in self._tf_records_per_split.items() - for tf_record in tf_record_paths - ] - - worker_map( - worker, - partial(convert_wopd_tfrecord_log_to_arrow, data_converter_config=self.data_converter_config), - log_args, - ) - - -def convert_wopd_tfrecord_map_to_gpkg( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - - for log_info in args: - tf_record_path: Path = log_info["tf_record"] - split: str = log_info["split"] - - if not tf_record_path.exists(): - raise FileNotFoundError(f"TFRecord path {tf_record_path} does not exist.") - - dataset = tf.data.TFRecordDataset(tf_record_path, compression_type="") - for data in dataset: - initial_frame = dataset_pb2.Frame() - initial_frame.ParseFromString(data.numpy()) - break - log_name = str(initial_frame.context.name) - map_file_path = D123_MAPS_ROOT / split / f"{log_name}.gpkg" - - if data_converter_config.force_map_conversion or not map_file_path.exists(): - map_file_path.unlink(missing_ok=True) - convert_wopd_map(initial_frame, map_file_path) - return [] - - -def convert_wopd_tfrecord_log_to_arrow( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - for log_info in args: - try: - - tf_record_path: Path = log_info["tf_record"] - split: str = log_info["split"] - - if not tf_record_path.exists(): - raise FileNotFoundError(f"TFRecord path {tf_record_path} does not exist.") - - dataset = tf.data.TFRecordDataset(tf_record_path, compression_type="") - for data in dataset: - initial_frame = dataset_pb2.Frame() - initial_frame.ParseFromString(data.numpy()) - break - - log_name = str(initial_frame.context.name) - log_file_path = data_converter_config.output_path / split / f"{log_name}.arrow" - - if data_converter_config.force_log_conversion or not log_file_path.exists(): - log_file_path.unlink(missing_ok=True) - if not log_file_path.parent.exists(): - log_file_path.parent.mkdir(parents=True, exist_ok=True) - - metadata = LogMetadata( - dataset="wopd", - log_name=log_name, - location=None, # TODO: implement map name - timestep_seconds=TARGET_DT, # TODO: Check if correct. Maybe not hardcode - map_has_z=True, - ) - vehicle_parameters = get_wopd_chrysler_pacifica_parameters() - camera_metadata = get_wopd_camera_metadata(initial_frame, data_converter_config) - lidar_metadata = get_wopd_lidar_metadata(initial_frame, data_converter_config) - - schema_column_list = [ - ("token", pa.string()), - ("timestamp", pa.int64()), - ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), - ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), - ("detections_token", pa.list_(pa.string())), - ("detections_type", pa.list_(pa.int16())), - ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), - ("traffic_light_ids", pa.list_(pa.int64())), - ("traffic_light_types", pa.list_(pa.int16())), - ("scenario_tag", pa.list_(pa.string())), - ("route_lane_group_ids", pa.list_(pa.int64())), - ] - # TODO: Adjust how cameras are added - if data_converter_config.camera_store_option is not None: - for camera_type in camera_metadata.keys(): - if data_converter_config.camera_store_option == "path": - raise NotImplementedError("Path camera storage is not implemented.") - elif data_converter_config.camera_store_option == "binary": - schema_column_list.append((camera_type.serialize(), pa.binary())) - schema_column_list.append( - (f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 4 * 4)) - ) - - if data_converter_config.lidar_store_option is not None: - for lidar_type in lidar_metadata.keys(): - if data_converter_config.lidar_store_option == "path": - raise NotImplementedError("Filepath lidar storage is not implemented.") - elif data_converter_config.lidar_store_option == "binary": - schema_column_list.append( - (lidar_type.serialize(), pa.list_(pa.list_(pa.float32(), len(WopdLidarIndex)))) - ) - - recording_schema = pa.schema(schema_column_list) - recording_schema = recording_schema.with_metadata( - { - "log_metadata": json.dumps(asdict(metadata)), - "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), - "camera_metadata": camera_metadata_dict_to_json(camera_metadata), - "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), - } - ) - - _write_recording_table(dataset, recording_schema, log_file_path, tf_record_path, data_converter_config) - - del recording_schema, vehicle_parameters, dataset - except Exception as e: - import traceback - - print(f"Error processing log {str(tf_record_path)}: {e}") - traceback.print_exc() - gc.collect() - return [] - - -def get_wopd_camera_metadata( - initial_frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -) -> Dict[CameraType, CameraMetadata]: - - cam_metadatas: Dict[CameraType, CameraMetadata] = {} - if data_converter_config.camera_store_option is not None: - for calibration in initial_frame.context.camera_calibrations: - camera_type = WOPD_CAMERA_TYPES[calibration.name] - - # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L96 - # https://github.com/waymo-research/waymo-open-dataset/issues/834#issuecomment-2134995440 - fx, fy, cx, cy, k1, k2, p1, p2, k3 = calibration.intrinsic - _intrinsics = np.array([[fx, 0, cx], [0, fy, cy], [0, 0, 1]]) - _distortions = np.array([k1, k2, p1, p2, k3]) - - if camera_type in WOPD_CAMERA_TYPES.values(): - cam_metadatas[camera_type] = CameraMetadata( - camera_type=camera_type, - width=calibration.width, - height=calibration.height, - intrinsic=_intrinsics, - distortion=_distortions, - ) - - return cam_metadatas - - -def get_wopd_lidar_metadata( - initial_frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -) -> Dict[LiDARType, LiDARMetadata]: - - laser_metadatas: Dict[LiDARType, LiDARMetadata] = {} - if data_converter_config.lidar_store_option is not None: - for laser_calibration in initial_frame.context.laser_calibrations: - lidar_type = WOPD_LIDAR_TYPES[laser_calibration.name] - extrinsic = ( - np.array(laser_calibration.extrinsic.transform, dtype=np.float64).reshape(4, 4) - if laser_calibration.extrinsic - else None - ) - laser_metadatas[lidar_type] = LiDARMetadata( - lidar_type=lidar_type, - lidar_index=WopdLidarIndex, - extrinsic=extrinsic, - ) - - return laser_metadatas - - -def _write_recording_table( - dataset: tf.data.TFRecordDataset, - recording_schema: pa.schema, - log_file_path: Path, - tf_record_path: Path, - data_converter_config: DataConverterConfig, -) -> None: - - # with pa.ipc.new_stream(str(log_file_path), recording_schema) as writer: - with pa.OSFile(str(log_file_path), "wb") as sink: - with pa.ipc.new_file(sink, recording_schema) as writer: - - dataset = tf.data.TFRecordDataset(tf_record_path, compression_type="") - for frame_idx, data in enumerate(dataset): - frame = dataset_pb2.Frame() - frame.ParseFromString(data.numpy()) - - (detections_state, detections_velocity, detections_token, detections_types) = _extract_detections(frame) - - # TODO: Implement traffic light extraction - traffic_light_ids = [] - traffic_light_types = [] - - # TODO: Implement detections - row_data = { - "token": [create_token(f"{frame.context.name}_{int(frame.timestamp_micros)}")], - "timestamp": [int(frame.timestamp_micros)], - "detections_state": [detections_state], - "detections_velocity": [detections_velocity], - "detections_token": [detections_token], - "detections_type": [detections_types], - "ego_states": [_extract_ego_state(frame)], - "traffic_light_ids": [traffic_light_ids], - "traffic_light_types": [traffic_light_types], - "scenario_tag": ["unknown"], - "route_lane_group_ids": [None], - } - - # TODO: Implement lidar extraction - if data_converter_config.lidar_store_option is not None: - lidar_data_dict = _extract_lidar(frame, data_converter_config) - for lidar_type, lidar_data in lidar_data_dict.items(): - if lidar_data is not None: - row_data[lidar_type.serialize()] = [lidar_data.tolist()] - else: - row_data[lidar_type.serialize()] = [None] - - if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_camera(frame, data_converter_config) - for camera_type, camera_data in camera_data_dict.items(): - if camera_data is not None: - row_data[camera_type.serialize()] = [camera_data[0]] - row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] - else: - row_data[camera_type.serialize()] = [None] - row_data[f"{camera_type.serialize()}_extrinsic"] = [None] - - batch = pa.record_batch(row_data, schema=recording_schema) - writer.write_batch(batch) - del batch, row_data, detections_state, detections_velocity, detections_token, detections_types - - if SORT_BY_TIMESTAMP: - recording_table = open_arrow_table(log_file_path) - recording_table = recording_table.sort_by([("timestamp", "ascending")]) - write_arrow_table(recording_table, log_file_path) - - -def _get_ego_pose_se3(frame: dataset_pb2.Frame) -> StateSE3: - ego_pose_matrix = np.array(frame.pose.transform).reshape(4, 4) - yaw, pitch, roll = Quaternion(matrix=ego_pose_matrix[:3, :3]).yaw_pitch_roll - ego_point_3d = Point3D.from_array(ego_pose_matrix[:3, 3]) - - return StateSE3(x=ego_point_3d.x, y=ego_point_3d.y, z=ego_point_3d.z, roll=roll, pitch=pitch, yaw=yaw) - - -def _extract_detections(frame: dataset_pb2.Frame) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: - # TODO: implement - - ego_rear_axle = _get_ego_pose_se3(frame) - - num_detections = len(frame.laser_labels) - detections_state = np.zeros((num_detections, len(BoundingBoxSE3Index)), dtype=np.float64) - detections_velocity = np.zeros((num_detections, len(Vector3DIndex)), dtype=np.float64) - detections_token: List[str] = [] - detections_types: List[int] = [] - - for detection_idx, detection in enumerate(frame.laser_labels): - if detection.type not in WOPD_DETECTION_NAME_DICT: - continue - - # 1. SS3 Bounding Box - detections_state[detection_idx, BoundingBoxSE3Index.X] = detection.box.center_x - detections_state[detection_idx, BoundingBoxSE3Index.Y] = detection.box.center_y - detections_state[detection_idx, BoundingBoxSE3Index.Z] = detection.box.center_z - detections_state[detection_idx, BoundingBoxSE3Index.ROLL] = DEFAULT_ROLL # not provided in WOPD - detections_state[detection_idx, BoundingBoxSE3Index.PITCH] = DEFAULT_PITCH # not provided in WOPD - detections_state[detection_idx, BoundingBoxSE3Index.YAW] = detection.box.heading - detections_state[detection_idx, BoundingBoxSE3Index.LENGTH] = detection.box.length - detections_state[detection_idx, BoundingBoxSE3Index.WIDTH] = detection.box.width - detections_state[detection_idx, BoundingBoxSE3Index.HEIGHT] = detection.box.height - - # 2. Velocity TODO: check if velocity needs to be rotated - detections_velocity[detection_idx] = Vector3D( - x=detection.metadata.speed_x, - y=detection.metadata.speed_y, - z=detection.metadata.speed_z, - ).array - - # 3. Type and track token - detections_token.append(str(detection.id)) - detections_types.append(int(WOPD_DETECTION_NAME_DICT[detection.type])) - - detections_state[:, BoundingBoxSE3Index.STATE_SE3] = convert_relative_to_absolute_se3_array( - origin=ego_rear_axle, se3_array=detections_state[:, BoundingBoxSE3Index.STATE_SE3] - ) - if DETECTION_ROLL_PITCH == "ego": - pass - if DETECTION_ROLL_PITCH == "zero": - detections_state[:, BoundingBoxSE3Index.ROLL] = DEFAULT_ROLL - detections_state[:, BoundingBoxSE3Index.PITCH] = DEFAULT_PITCH - else: - raise ValueError(f"Invalid DETECTION_ROLL_PITCH value: {DETECTION_ROLL_PITCH}. Must be 'ego' or 'zero'.") - - return detections_state.tolist(), detections_velocity.tolist(), detections_token, detections_types - - -def _extract_ego_state(frame: dataset_pb2.Frame) -> List[float]: - rear_axle_pose = _get_ego_pose_se3(frame) - - vehicle_parameters = get_wopd_chrysler_pacifica_parameters() - # FIXME: Find dynamic state in waymo open perception dataset - # https://github.com/waymo-research/waymo-open-dataset/issues/55#issuecomment-546152290 - dynamic_state = DynamicStateSE3( - velocity=Vector3D(*np.zeros(3)), - acceleration=Vector3D(*np.zeros(3)), - angular_velocity=Vector3D(*np.zeros(3)), - ) - - return EgoStateSE3.from_rear_axle( - rear_axle_se3=rear_axle_pose, - dynamic_state_se3=dynamic_state, - vehicle_parameters=vehicle_parameters, - time_point=None, - ).array.tolist() - - -def _extract_traffic_lights() -> Tuple[List[int], List[int]]: - pass - - -def _extract_camera( - frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -) -> Dict[CameraType, Union[str, bytes]]: - - camera_dict: Dict[str, Union[str, bytes]] = {} # TODO: Fix wrong type hint - np.array(frame.pose.transform).reshape(4, 4) - - # NOTE: The extrinsic matrix in frame.context.camera_calibration is fixed to model the ego to camera transformation. - # The poses in frame.images[idx] are the motion compensated ego poses when the camera triggers. - # - - context_extrinsic: Dict[str, npt.NDArray] = {} - for calibration in frame.context.camera_calibrations: - camera_type = WOPD_CAMERA_TYPES[calibration.name] - - transform = np.array(calibration.extrinsic.transform).reshape(4, 4) - - # FIXME: This is an ugly hack to convert to uniform camera convention. - flip_camera = get_rotation_matrix( - StateSE3( - x=0.0, - y=0.0, - z=0.0, - roll=np.deg2rad(0.0), - pitch=np.deg2rad(90.0), - yaw=np.deg2rad(-90.0), - ) - ) - transform[:3, :3] = transform[:3, :3] @ flip_camera - context_extrinsic[camera_type] = transform - - for image_proto in frame.images: - camera_type = WOPD_CAMERA_TYPES[image_proto.name] - - np.array(image_proto.pose.transform).reshape(4, 4) - camera_bytes = image_proto.image - - # # Compute the transform from ego_global_transform to ego_at_camera_transform - # # ego_global_transform * T = ego_at_camera_transform => T = ego_global_transform^-1 * ego_at_camera_transform - # np.linalg.inv(ego_global_transform) @ ego_at_trigger_transform - - # TODO: figure out the correct transform - camera_dict[camera_type] = camera_bytes, context_extrinsic[camera_type].flatten().tolist() - - return camera_dict - - -def _extract_lidar( - frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -) -> Dict[LiDARType, npt.NDArray[np.float32]]: - from waymo_open_dataset.utils import frame_utils - - assert data_converter_config.lidar_store_option == "binary", "Lidar store option must be 'binary' for WOPD." - (range_images, camera_projections, _, range_image_top_pose) = parse_range_image_and_camera_projection(frame) - - points, cp_points = frame_utils.convert_range_image_to_point_cloud( - frame=frame, - range_images=range_images, - camera_projections=camera_projections, - range_image_top_pose=range_image_top_pose, - keep_polar_features=True, - ) - - lidar_data: Dict[LiDARType, npt.NDArray[np.float32]] = {} - for lidar_idx, frame_lidar in enumerate(frame.lasers): - lidar_type = WOPD_LIDAR_TYPES[frame_lidar.name] - lidar_data[lidar_type] = np.array(points[lidar_idx], dtype=np.float32) - - return lidar_data diff --git a/d123/dataset/logs/log_metadata.py b/d123/dataset/logs/log_metadata.py deleted file mode 100644 index 818bc495..00000000 --- a/d123/dataset/logs/log_metadata.py +++ /dev/null @@ -1,21 +0,0 @@ -from dataclasses import dataclass - -import d123 - -# TODO: move this files and dataclass to a more appropriate place. - - -@dataclass -class LogMetadata: - - # TODO: add - # - split - # - global/local map - - dataset: str - log_name: str - location: str - timestep_seconds: float - - map_has_z: bool - version: str = str(d123.__version__) diff --git a/d123/dataset/scene/abstract_scene.py b/d123/dataset/scene/abstract_scene.py deleted file mode 100644 index 4877fc83..00000000 --- a/d123/dataset/scene/abstract_scene.py +++ /dev/null @@ -1,117 +0,0 @@ -from __future__ import annotations - -import abc -from dataclasses import dataclass -from typing import List, Optional - -from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.sensor.camera import Camera, CameraType -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARType -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.maps.abstract_map import AbstractMap - -# TODO: Remove or improve open/close dynamic of Scene object. - - -class AbstractScene(abc.ABC): - - @property - @abc.abstractmethod - def log_name(self) -> str: - raise NotImplementedError - - @property - @abc.abstractmethod - def token(self) -> str: - raise NotImplementedError - - @property - @abc.abstractmethod - def log_metadata(self) -> LogMetadata: - raise NotImplementedError - - @property - @abc.abstractmethod - def available_camera_types(self) -> List[CameraType]: - raise NotImplementedError - - @property - @abc.abstractmethod - def available_lidar_types(self) -> List[LiDARType]: - raise NotImplementedError - - @property - @abc.abstractmethod - def map_api(self) -> Optional[AbstractMap]: - raise NotImplementedError - - @abc.abstractmethod - def get_number_of_iterations(self) -> int: - raise NotImplementedError - - @abc.abstractmethod - def get_number_of_history_iterations() -> int: - raise NotImplementedError - - @abc.abstractmethod - def get_timepoint_at_iteration(self, iteration: int) -> TimePoint: - raise NotImplementedError - - @abc.abstractmethod - def get_ego_state_at_iteration(self, iteration: int) -> EgoStateSE3: - raise NotImplementedError - - @abc.abstractmethod - def get_box_detections_at_iteration(self, iteration: int) -> BoxDetectionWrapper: - raise NotImplementedError - - @abc.abstractmethod - def get_traffic_light_detections_at_iteration(self, iteration: int) -> TrafficLightDetectionWrapper: - raise NotImplementedError - - @abc.abstractmethod - def get_detection_recording_at_iteration(self, iteration: int) -> DetectionRecording: - raise NotImplementedError - - @abc.abstractmethod - def get_route_lane_group_ids(self, iteration: int) -> List[int]: - raise NotImplementedError - - @abc.abstractmethod - def get_camera_at_iteration(self, iteration: int, camera_type: CameraType) -> Camera: - raise NotImplementedError - - @abc.abstractmethod - def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> LiDAR: - raise NotImplementedError - - def open(self) -> None: - pass - - def close(self) -> None: - pass - - -# TODO: Move to a more appropriate place. -@dataclass(frozen=True) -class SceneExtractionInfo: - - initial_idx: int - duration_s: float - history_s: float - iteration_duration_s: float - - @property - def number_of_iterations(self) -> int: - return round(self.duration_s / self.iteration_duration_s) - - @property - def number_of_history_iterations(self) -> int: - return round(self.history_s / self.iteration_duration_s) - - @property - def end_idx(self) -> int: - return self.initial_idx + self.number_of_iterations diff --git a/d123/dataset/scene/arrow_scene.py b/d123/dataset/scene/arrow_scene.py deleted file mode 100644 index ecd68111..00000000 --- a/d123/dataset/scene/arrow_scene.py +++ /dev/null @@ -1,210 +0,0 @@ -import json -from pathlib import Path -from typing import Dict, List, Optional, Tuple, Union - -import pyarrow as pa - -from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, CameraType, camera_metadata_dict_from_json -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata, LiDARType, lidar_metadata_dict_from_json -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.common.datatypes.vehicle_state.vehicle_parameters import VehicleParameters -from d123.dataset.arrow.conversion import ( - get_box_detections_from_arrow_table, - get_camera_from_arrow_table, - get_ego_vehicle_state_from_arrow_table, - get_lidar_from_arrow_table, - get_timepoint_from_arrow_table, - get_traffic_light_detections_from_arrow_table, -) -from d123.dataset.arrow.helper import open_arrow_table -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.maps.gpkg.gpkg_map import get_local_map_api, get_map_api_from_names -from d123.dataset.scene.abstract_scene import AbstractScene, SceneExtractionInfo - -# TODO: Remove or improve open/close dynamic of Scene object. - - -def _get_scene_data( - arrow_file_path: Union[Path, str], -) -> Tuple[LogMetadata, VehicleParameters, Dict[CameraType, CameraMetadata]]: - """ - Extracts the metadata and vehicle parameters from the arrow file. - """ - # TODO: consider a better way to read metadata, instead of loading the entire table. - table = open_arrow_table(arrow_file_path) - metadata = LogMetadata(**json.loads(table.schema.metadata[b"log_metadata"].decode())) - vehicle_parameters = VehicleParameters(**json.loads(table.schema.metadata[b"vehicle_parameters"].decode())) - - if b"camera_metadata" in table.schema.metadata: - camera_metadata = camera_metadata_dict_from_json(table.schema.metadata[b"camera_metadata"].decode()) - else: - camera_metadata = {} - - if b"lidar_metadata" in table.schema.metadata: - lidar_metadata = lidar_metadata_dict_from_json(table.schema.metadata[b"lidar_metadata"].decode()) - else: - lidar_metadata = {} - - del table - return metadata, vehicle_parameters, camera_metadata, lidar_metadata - - -class ArrowScene(AbstractScene): - def __init__( - self, - arrow_file_path: Union[Path, str], - scene_extraction_info: Optional[SceneExtractionInfo] = None, - ) -> None: - - self._recording_table: pa.Table = None - - ( - _metadata, - _vehicle_parameters, - _camera_metadata, - _lidar_metadata, - ) = _get_scene_data(arrow_file_path) - self._metadata: LogMetadata = _metadata - self._vehicle_parameters: VehicleParameters = _vehicle_parameters - self._camera_metadata: Dict[CameraType, CameraMetadata] = _camera_metadata - self._lidar_metadata: Dict[LiDARType, LiDARMetadata] = _lidar_metadata - - self._map_api: Optional[AbstractMap] = None - - self._arrow_log_path = arrow_file_path - self._scene_extraction_info: SceneExtractionInfo = scene_extraction_info - - def __reduce__(self): - return ( - self.__class__, - ( - self._arrow_log_path, - self._scene_extraction_info, - ), - ) - - @property - def map_api(self) -> AbstractMap: - self._lazy_initialize() - return self._map_api - - @property - def log_name(self) -> str: - return str(self._arrow_log_path.stem) - - @property - def token(self) -> str: - self._lazy_initialize() - return self._recording_table["token"][self._get_table_index(0)].as_py() - - @property - def log_metadata(self) -> LogMetadata: - return self._metadata - - @property - def available_camera_types(self) -> List[CameraType]: - return list(self._camera_metadata.keys()) - - @property - def available_lidar_types(self) -> List[LiDARType]: - return list(self._lidar_metadata.keys()) - - def _get_table_index(self, iteration: int) -> int: - self._lazy_initialize() - assert ( - -self.get_number_of_history_iterations() <= iteration < self.get_number_of_iterations() - ), "Iteration out of bounds" - table_index = self._scene_extraction_info.initial_idx + iteration - return table_index - - def get_number_of_iterations(self) -> int: - self._lazy_initialize() - return self._scene_extraction_info.number_of_iterations - - def get_number_of_history_iterations(self) -> int: - self._lazy_initialize() - return self._scene_extraction_info.number_of_history_iterations - - def get_timepoint_at_iteration(self, iteration: int) -> TimePoint: - self._lazy_initialize() - return get_timepoint_from_arrow_table(self._recording_table, self._get_table_index(iteration)) - - def get_ego_state_at_iteration(self, iteration: int) -> EgoStateSE3: - self._lazy_initialize() - return get_ego_vehicle_state_from_arrow_table( - self._recording_table, self._get_table_index(iteration), self._vehicle_parameters - ) - - def get_box_detections_at_iteration(self, iteration: int) -> BoxDetectionWrapper: - self._lazy_initialize() - return get_box_detections_from_arrow_table(self._recording_table, self._get_table_index(iteration)) - - def get_traffic_light_detections_at_iteration(self, iteration: int) -> TrafficLightDetectionWrapper: - self._lazy_initialize() - return get_traffic_light_detections_from_arrow_table(self._recording_table, self._get_table_index(iteration)) - - def get_detection_recording_at_iteration(self, iteration: int) -> DetectionRecording: - return DetectionRecording( - box_detections=self.get_box_detections_at_iteration(iteration), - traffic_light_detections=self.get_traffic_light_detections_at_iteration(iteration), - ) - - def get_route_lane_group_ids(self, iteration: int) -> List[int]: - self._lazy_initialize() - table_index = self._get_table_index(iteration) - return self._recording_table["route_lane_group_ids"][table_index].as_py() - - def get_camera_at_iteration(self, iteration: int, camera_type: CameraType) -> Camera: - self._lazy_initialize() - assert camera_type in self._camera_metadata, f"Camera type {camera_type} not found in metadata." - table_index = self._get_table_index(iteration) - return get_camera_from_arrow_table( - self._recording_table, - table_index, - self._camera_metadata[camera_type], - self.log_metadata, - ) - - def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> LiDAR: - self._lazy_initialize() - assert lidar_type in self._lidar_metadata, f"LiDAR type {lidar_type} not found in metadata." - table_index = self._get_table_index(iteration) - return get_lidar_from_arrow_table( - self._recording_table, - table_index, - self._lidar_metadata[lidar_type], - self.log_metadata, - ) - - def _lazy_initialize(self) -> None: - self.open() - - def open(self) -> None: - if self._map_api is None: - try: - if self._metadata.dataset in ["wopd", "av2-sensor"]: - # FIXME: - split = str(self._arrow_log_path.parent.name) - self._map_api = get_local_map_api(split, self._metadata.log_name) - else: - self._map_api = get_map_api_from_names(self._metadata.dataset, self._metadata.location) - self._map_api.initialize() - except Exception as e: - print(f"Error initializing map API: {e}") - if self._recording_table is None: - self._recording_table = open_arrow_table(self._arrow_log_path) - if self._scene_extraction_info is None: - self._scene_extraction_info = SceneExtractionInfo( - initial_idx=0, - duration_s=self._metadata.timestep_seconds * len(self._recording_table), - history_s=0.0, - iteration_duration_s=self._metadata.timestep_seconds, - ) - - def close(self) -> None: - del self._recording_table - self._recording_table = None diff --git a/d123/dataset/scene/scene_builder.py b/d123/dataset/scene/scene_builder.py deleted file mode 100644 index 228fcb13..00000000 --- a/d123/dataset/scene/scene_builder.py +++ /dev/null @@ -1,168 +0,0 @@ -import abc -import json -import random -from functools import partial -from pathlib import Path -from typing import Iterator, List, Optional, Set, Union - -from d123.common.multithreading.worker_utils import WorkerPool, worker_map -from d123.dataset.arrow.helper import open_arrow_table -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.dataset.scene.arrow_scene import ArrowScene, SceneExtractionInfo -from d123.dataset.scene.scene_filter import SceneFilter - -# TODO: Fix lazy abstraction implementation for scene builder. - - -class SceneBuilder(abc.ABC): - @abc.abstractmethod - def get_scenes(self, filter: SceneFilter, worker: WorkerPool) -> Iterator[AbstractScene]: - """ - Returns an iterator over scenes that match the given filter. - :param filter: SceneFilter object to filter the scenes. - :param worker: WorkerPool to parallelize the scene extraction. - :return: Iterator over AbstractScene objects. - """ - raise NotImplementedError - - -class ArrowSceneBuilder(SceneBuilder): - """ - A class to build a scene from a dataset. - """ - - def __init__(self, dataset_path: Union[str, Path]): - self._dataset_path = Path(dataset_path) - - def get_scenes(self, filter: SceneFilter, worker: WorkerPool) -> Iterator[AbstractScene]: - """See superclass.""" - - split_types = set(filter.split_types) if filter.split_types else {"train", "val", "test"} - split_names = ( - set(filter.split_names) if filter.split_names else _discover_split_names(self._dataset_path, split_types) - ) - filter_log_names = set(filter.log_names) if filter.log_names else None - log_paths = _discover_log_paths(self._dataset_path, split_names, filter_log_names) - if len(log_paths) == 0: - return [] - scenes = worker_map(worker, partial(_extract_scenes_from_logs, filter=filter), log_paths) - - if filter.shuffle: - random.shuffle(scenes) - - if filter.max_num_scenes is not None: - scenes = scenes[: filter.max_num_scenes] - - return scenes - - -def _discover_split_names(dataset_path: Path, split_types: Set[str]) -> Set[str]: - assert set(split_types).issubset( - {"train", "val", "test"} - ), f"Invalid split types: {split_types}. Valid split types are 'train', 'val', 'test'." - split_names: List[str] = [] - for split in dataset_path.iterdir(): - split_name = split.name - if split.is_dir() and split.name != "maps": - if any(split_type in split_name for split_type in split_types): - split_names.append(split_name) - - return split_names - - -def _discover_log_paths(dataset_path: Path, split_names: Set[str], log_names: Optional[List[str]]) -> List[Path]: - log_paths: List[Path] = [] - for split_name in split_names: - for log_path in (dataset_path / split_name).iterdir(): - if log_path.is_file() and log_path.name.endswith(".arrow"): - if log_names is None or log_path.stem in log_names: - log_paths.append(log_path) - return log_paths - - -def _extract_scenes_from_logs(log_paths: List[Path], filter: SceneFilter) -> List[AbstractScene]: - scenes: List[AbstractScene] = [] - for log_path in log_paths: - try: - scene_extraction_infos = _get_scene_extraction_info(log_path, filter) - except Exception as e: - print(f"Error extracting scenes from {log_path}: {e}") - continue - for scene_extraction_info in scene_extraction_infos: - scenes.append( - ArrowScene( - arrow_file_path=log_path, - scene_extraction_info=scene_extraction_info, - ) - ) - return scenes - - -def _get_scene_extraction_info(log_path: Union[str, Path], filter: SceneFilter) -> List[SceneExtractionInfo]: - scene_extraction_infos: List[SceneExtractionInfo] = [] - - recording_table = open_arrow_table(log_path) - log_metadata = LogMetadata(**json.loads(recording_table.schema.metadata[b"log_metadata"].decode())) - - # 1. Filter map name - if filter.map_names is not None and log_metadata.map_name not in filter.map_names: - return scene_extraction_infos - - start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 - end_idx = ( - len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) - if filter.duration_s is not None - else len(recording_table) - ) - if filter.duration_s is None: - return [ - SceneExtractionInfo( - initial_idx=start_idx, - duration_s=(end_idx - start_idx) * log_metadata.timestep_seconds, - history_s=filter.history_s if filter.history_s is not None else 0.0, - iteration_duration_s=log_metadata.timestep_seconds, - ) - ] - - scene_token_set = set(filter.scene_tokens) if filter.scene_tokens is not None else None - - for idx in range(start_idx, end_idx): - scene_extraction_info: Optional[SceneExtractionInfo] = None - - if scene_token_set is None: - scene_extraction_info = SceneExtractionInfo( - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) - elif str(recording_table["token"][idx]) in scene_token_set: - scene_extraction_info = SceneExtractionInfo( - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) - - if scene_extraction_info is not None: - # Check of timestamp threshold exceeded between previous scene, if specified in filter - if filter.timestamp_threshold_s is not None and len(scene_extraction_infos) > 0: - iteration_delta = idx - scene_extraction_infos[-1].initial_idx - if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: - continue - - # Check if camera data is available for the scene, if specified in filter - # NOTE: We only check camera availability at the initial index of the scene. - if filter.camera_types is not None: - cameras_available = [ - recording_table[camera_type.serialize()][start_idx].as_py() is not None - for camera_type in filter.camera_types - ] - if not all(cameras_available): - continue - - scene_extraction_infos.append(scene_extraction_info) - - del recording_table, log_metadata - return scene_extraction_infos diff --git a/d123/dataset/scene/scene_filter.py b/d123/dataset/scene/scene_filter.py deleted file mode 100644 index 0a58eba6..00000000 --- a/d123/dataset/scene/scene_filter.py +++ /dev/null @@ -1,44 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional - -from d123.common.datatypes.sensor.camera import CameraType - -# TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) - - -@dataclass -class SceneFilter: - - split_types: Optional[List[str]] = None - split_names: Optional[List[str]] = None - # scene_tags: List[str] = None - log_names: Optional[List[str]] = None - - map_names: Optional[List[str]] = None # TODO: - scene_tokens: Optional[List[str]] = None # TODO: - - timestamp_threshold_s: Optional[float] = None # TODO: - ego_displacement_minimum_m: Optional[float] = None # TODO: - - duration_s: Optional[float] = 10.0 - history_s: Optional[float] = 3.0 - - camera_types: Optional[List[CameraType]] = None - - max_num_scenes: Optional[int] = None - shuffle: bool = False - - def __post_init__(self): - if self.camera_types is not None: - assert isinstance(self.camera_types, list), "camera_types must be a list of CameraType" - camera_types = [] - for camera_type in self.camera_types: - if isinstance(camera_type, str): - camera_type = CameraType.deserialize[camera_type] - camera_types.append(camera_type) - elif isinstance(camera_type, int): - camera_type = CameraType(camera_type) - camera_types.append(camera_type) - else: - raise ValueError(f"Invalid camera type: {camera_type}") - self.camera_types = camera_types diff --git a/d123/script/builders/data_converter_builder.py b/d123/script/builders/data_converter_builder.py deleted file mode 100644 index d9c54004..00000000 --- a/d123/script/builders/data_converter_builder.py +++ /dev/null @@ -1,22 +0,0 @@ -import logging -from typing import List - -from hydra.utils import instantiate -from omegaconf import DictConfig - -from d123.dataset.dataset_specific.raw_data_converter import RawDataConverter -from d123.script.builders.utils.utils_type import validate_type - -logger = logging.getLogger(__name__) - - -def build_data_converter(cfg: DictConfig) -> List[RawDataConverter]: - logger.info("Building RawDataProcessor...") - instantiated_datasets: List[RawDataConverter] = [] - for dataset_type in cfg.values(): - processor: RawDataConverter = instantiate(dataset_type) - validate_type(processor, RawDataConverter) - instantiated_datasets.append(processor) - - logger.info("Building RawDataProcessor...DONE!") - return instantiated_datasets diff --git a/d123/script/config/common/default_common.yaml b/d123/script/config/common/default_common.yaml deleted file mode 100644 index f3f88913..00000000 --- a/d123/script/config/common/default_common.yaml +++ /dev/null @@ -1,26 +0,0 @@ -# Default common configs -defaults: - - worker: ray_distributed - - scene_filter: all_scenes - - scene_builder: default_scene_builder - - override hydra/hydra_logging: colorlog - - override hydra/job_logging: colorlog - - _self_ - -distributed_timeout_seconds: 7200 # Sets how long to wait while synchronizing across worker nodes in a distributed context. - -selected_simulation_metrics: null - -# Sets verbosity level, in particular determines if progress bars are shown or not. -verbose: false - -# Logger -logger_level: info # Level of logger -logger_format_string: null # Logger format string, set null to use the default format string - -# Execution -max_number_of_workers: null # Set null to disable threading for simulation execution -gpu: true # Whether to use available GPUs during training/simulation - - -seed: 42 diff --git a/d123/script/config/common/default_dataset_paths.yaml b/d123/script/config/common/default_dataset_paths.yaml deleted file mode 100644 index 28c5151b..00000000 --- a/d123/script/config/common/default_dataset_paths.yaml +++ /dev/null @@ -1,11 +0,0 @@ - -# D123 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} - - -# nuplan -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} diff --git a/d123/script/config/common/default_experiment.yaml b/d123/script/config/common/default_experiment.yaml deleted file mode 100644 index 6ad3c3d5..00000000 --- a/d123/script/config/common/default_experiment.yaml +++ /dev/null @@ -1,9 +0,0 @@ -defaults: - - default_dataset_paths - - _self_ - -# Cache parameters -experiment_name: ??? -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved diff --git a/d123/script/config/common/scene_builder/default_scene_builder.yaml b/d123/script/config/common/scene_builder/default_scene_builder.yaml deleted file mode 100644 index 2a8bfec7..00000000 --- a/d123/script/config/common/scene_builder/default_scene_builder.yaml +++ /dev/null @@ -1,4 +0,0 @@ -_target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder -_convert_: 'all' - -dataset_path: ${d123_data_root} diff --git a/d123/script/config/common/scene_filter/all_scenes.yaml b/d123/script/config/common/scene_filter/all_scenes.yaml deleted file mode 100644 index d5e1b505..00000000 --- a/d123/script/config/common/scene_filter/all_scenes.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_target_: d123.dataset.scene.scene_filter.SceneFilter -_convert_: 'all' - -split_types: null -split_names: null -log_names: null - - -map_names: null -scene_tokens: null -timestamp_threshold_s: null -ego_displacement_minimum_m: null - -duration_s: 9.2 -history_s: 3.0 diff --git a/d123/script/config/common/scene_filter/nuplan_mini_train.yaml b/d123/script/config/common/scene_filter/nuplan_mini_train.yaml deleted file mode 100644 index f2f1c4bf..00000000 --- a/d123/script/config/common/scene_filter/nuplan_mini_train.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_target_: d123.dataset.scene.scene_filter.SceneFilter -_convert_: 'all' - -split_types: null -split_names: - - "nuplan_mini_train" -log_names: null - - -map_names: null -scene_tokens: null -timestamp_threshold_s: 1.0 -ego_displacement_minimum_m: null -max_num_scenes: null - -duration_s: 8.1 -history_s: 1.0 diff --git a/d123/script/config/common/scene_filter/nuplan_mini_val.yaml b/d123/script/config/common/scene_filter/nuplan_mini_val.yaml deleted file mode 100644 index 2e4eefee..00000000 --- a/d123/script/config/common/scene_filter/nuplan_mini_val.yaml +++ /dev/null @@ -1,17 +0,0 @@ -_target_: d123.dataset.scene.scene_filter.SceneFilter -_convert_: 'all' - -split_types: null -split_names: - - "nuplan_mini_val" -log_names: null - - -map_names: null -scene_tokens: null -timestamp_threshold_s: 1.0 -ego_displacement_minimum_m: null -max_num_scenes: null - -duration_s: 8.1 -history_s: 1.0 diff --git a/d123/script/config/common/scene_filter/nuplan_sim_agent.yaml b/d123/script/config/common/scene_filter/nuplan_sim_agent.yaml deleted file mode 100644 index eea010cb..00000000 --- a/d123/script/config/common/scene_filter/nuplan_sim_agent.yaml +++ /dev/null @@ -1,117 +0,0 @@ -_target_: d123.dataset.scene.scene_filter.SceneFilter -_convert_: 'all' - -split_types: null -split_names: - - "nuplan_mini_test" -log_names: null - - -map_names: null -scene_tokens: -- "796266a84fd65c71" -- "1ef8b2f08cd65f9a" -- "8922138735b45195" -- "d851006613b85e7b" -- "38ef797d14445e44" -- "a317258b9849542f" -- "83ca370d430e59bc" -- "3159261edbf95519" -- "d3d80c3089905994" -- "8d3b06d2311955cb" -- "1d33208c85c95470" -- "e333523147ba5cc9" -- "94fa6640d9f25c43" -- "0eb46626e2345bae" -- "c20a47af332e58a0" -- "c4153e20ef9f5429" -- "51dc7988d78d545f" -- "7348ea10ac0d58fd" -- "32abded47532514d" -- "4c8bd4c5b1015b82" -- "24334adc21055f43" -- "c1afafec7c1f55c6" -- "61af5c7ad0a75a17" -- "4dc74226975f5060" -- "f3b9c29ceb205216" -- "6fd1090347e05103" -- "8564bae63ebf5e42" -- "4656fe861aed509e" -- "f4aa5f3bf2095608" -- "5dc7c5ab6ad450cb" -- "852e81b1ce2d5ce0" -- "9634e64f32b0584d" -- "765a3a60360950d8" -- "d726695b080d5662" -- "646d3f76770a5159" -- "cc0ba43b91395054" -- "5a30039504185834" -- "396ced8ecffe5d92" -- "2583343f0e095b76" -- "940480c560a459dd" -- "c106ea1d18fa5b46" -- "8a1f92741cf25e94" -- "1e1d33afe06a5dc9" -- "cd1295e8085d5f8a" -- "6c68b2a6d8cc5ea9" -- "be3c5bf59f675e00" -- "053be6c85efa5e17" -- "14756446a3a65d51" -- "f8b10bcaa98a53e8" -- "d72ab3f1878c51a0" -- "0ee073448a2657dc" -- "5726da45814a5649" -- "f8aaded37e735549" -- "c7f921589ed956f3" -- "3852b5feb1215add" -- "fc931d0c156057b5" -- "a8d21fc93a3755c6" -- "3ee7077fadab55aa" -- "5845b5db282858b9" -- "36908fc5872d538d" -- "a075179737ad5140" -- "754820401e085625" -- "469cb3e6d6055f87" -- "230150f0e8ff5d4d" -- "7a8c727978005f14" -- "50038631e81b58c8" -- "b7ca09eed06d5090" -- "780103da783e597b" -- "0c72e0f43f9e5dc5" -- "68ad7730c5885243" -- "2cdec17051d25bb4" -- "fe17bdf412f755e4" -- "53632970c56a5207" -- "6ba4abec7990550c" -- "023028c423715317" -- "963ff52c54685dd3" -- "fdef7ee7067859d3" -- "79d99553e8c15302" -- "e3eaafd948b15069" -- "123dfe0afe85550f" -- "956bfd38d4e75fea" -- "c979bfba27385ba1" -- "74e40b83c2e55f8d" -- "2f6a2c0802b75922" -- "a190fbdd675052e7" -- "4c59ff2f17cd5b12" -- "adb469e65ae256aa" -- "00273245ce175544" -- "49ce38623a9d56fc" -- "e00f8ae7c6fd548e" -- "c525191820d65766" -- "2e190d8d1d435577" -- "2e8e6e5238495570" -- "6629f76b367b5305" -- "de8590ad0fc15d29" -- "f520d009788d5346" -- "e50e1bb8fce354b6" -- "da55b0a04d465781" -- "652ae5b39e575205" -- "2d767bf67a655fb6" - -timestamp_threshold_s: null -ego_displacement_minimum_m: null - -duration_s: 15.1 -history_s: 1.0 diff --git a/d123/script/config/common/worker/sequential.yaml b/d123/script/config/common/worker/sequential.yaml deleted file mode 100644 index ea004415..00000000 --- a/d123/script/config/common/worker/sequential.yaml +++ /dev/null @@ -1,2 +0,0 @@ -_target_: d123.common.multithreading.worker_sequential.Sequential -_convert_: 'all' diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml deleted file mode 100644 index 0a4544da..00000000 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ /dev/null @@ -1,24 +0,0 @@ -hydra: - run: - dir: ${output_dir} - output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging - searchpath: # Only in these paths are discoverable - - pkg://d123.script.config - - pkg://d123.script.config.common - - job: - chdir: False - -defaults: - - default_common - - default_experiment - - default_dataset_paths - - _self_ - - datasets: - # - nuplan_private_dataset - # - carla_dataset - # - wopd_dataset - - av2_sensor_dataset - -force_log_conversion: True -force_map_conversion: True diff --git a/d123/script/config/datasets/av2_sensor_dataset.yaml b/d123/script/config/datasets/av2_sensor_dataset.yaml deleted file mode 100644 index 4f9a95ec..00000000 --- a/d123/script/config/datasets/av2_sensor_dataset.yaml +++ /dev/null @@ -1,16 +0,0 @@ -av2_sensor_dataset: - _target_: d123.dataset.dataset_specific.av2.av2_data_converter.AV2SensorDataConverter - _convert_: 'all' - - splits: ["av2-sensor-mini_train"] - log_path: "/media/nvme1/argoverse" - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "binary" - lidar_store_option: null diff --git a/d123/script/config/datasets/carla_dataset.yaml b/d123/script/config/datasets/carla_dataset.yaml deleted file mode 100644 index 31184f78..00000000 --- a/d123/script/config/datasets/carla_dataset.yaml +++ /dev/null @@ -1,16 +0,0 @@ -carla_dataset: - _target_: d123.dataset.dataset_specific.carla.carla_data_converter.CarlaDataConverter - _convert_: 'all' - - splits: ["carla"] - log_path: "${oc.env:HOME}/carla_workspace/data" - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "path" - lidar_store_option: "path" diff --git a/d123/script/config/datasets/nuplan_dataset.yaml b/d123/script/config/datasets/nuplan_dataset.yaml deleted file mode 100644 index 8c104287..00000000 --- a/d123/script/config/datasets/nuplan_dataset.yaml +++ /dev/null @@ -1,16 +0,0 @@ -nuplan_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: 'all' - - splits: ["nuplan_train", "nuplan_val", "nuplan_test"] - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits # NOTE: folder including [mini, trainval, test], sometimes not inside "splits" folder - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "path" - lidar_store_option: "path" diff --git a/d123/script/config/datasets/nuplan_mini_dataset.yaml b/d123/script/config/datasets/nuplan_mini_dataset.yaml deleted file mode 100644 index 1fdb2b54..00000000 --- a/d123/script/config/datasets/nuplan_mini_dataset.yaml +++ /dev/null @@ -1,17 +0,0 @@ -nuplan_mini_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: 'all' - - - splits: ["nuplan_mini_train", "nuplan_mini_val", "nuplan_mini_test"] - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits # NOTE: folder including [mini, trainval, test], sometimes not inside "splits" folder - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "path" - lidar_store_option: "path" diff --git a/d123/script/config/datasets/nuplan_private_dataset.yaml b/d123/script/config/datasets/nuplan_private_dataset.yaml deleted file mode 100644 index 399dcb7e..00000000 --- a/d123/script/config/datasets/nuplan_private_dataset.yaml +++ /dev/null @@ -1,16 +0,0 @@ -nuplan_private_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: 'all' - - splits: ["nuplan_private_test"] - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits # NOTE: folder including [mini, trainval, test], sometimes not inside "splits" folder - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "path" - lidar_store_option: "path" diff --git a/d123/script/config/datasets/wopd_dataset.yaml b/d123/script/config/datasets/wopd_dataset.yaml deleted file mode 100644 index 1abb0381..00000000 --- a/d123/script/config/datasets/wopd_dataset.yaml +++ /dev/null @@ -1,16 +0,0 @@ -wopd_dataset: - _target_: d123.dataset.dataset_specific.wopd.wopd_data_converter.WOPDDataConverter - _convert_: 'all' - - splits: ["wopd_train"] - log_path: null # TODO: implement - - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: 'all' - - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: "binary" - lidar_store_option: "binary" diff --git a/d123/script/config/lightning_training/callbacks/default_callbacks.yaml b/d123/script/config/lightning_training/callbacks/default_callbacks.yaml deleted file mode 100644 index 5809cfa9..00000000 --- a/d123/script/config/lightning_training/callbacks/default_callbacks.yaml +++ /dev/null @@ -1,14 +0,0 @@ -defaults: - - model_checkpoint - - model_summary - - learning_rate_monitor - - _self_ - -model_checkpoint: - dirpath: ${output_dir}/checkpoints - filename: "epoch_{epoch:03d}" - save_last: link - auto_insert_metric_name: false - -model_summary: - max_depth: -1 diff --git a/d123/script/config/lightning_training/callbacks/learning_rate_monitor.yaml b/d123/script/config/lightning_training/callbacks/learning_rate_monitor.yaml deleted file mode 100644 index 8cf24c03..00000000 --- a/d123/script/config/lightning_training/callbacks/learning_rate_monitor.yaml +++ /dev/null @@ -1,4 +0,0 @@ -# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.LearningRateMonitor.html -learning_rate_monitor: - _target_: lightning.pytorch.callbacks.LearningRateMonitor - logging_interval: epoch diff --git a/d123/script/config/lightning_training/callbacks/model_checkpoint.yaml b/d123/script/config/lightning_training/callbacks/model_checkpoint.yaml deleted file mode 100644 index b6858737..00000000 --- a/d123/script/config/lightning_training/callbacks/model_checkpoint.yaml +++ /dev/null @@ -1,17 +0,0 @@ -# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.ModelCheckpoint.html - -model_checkpoint: - _target_: lightning.pytorch.callbacks.ModelCheckpoint - dirpath: null # directory to save the model file - filename: null # checkpoint filename - monitor: null # name of the logged metric which determines when model is improving - verbose: false # verbosity mode - save_last: null # additionally always save an exact copy of the last checkpoint to a file last.ckpt - save_top_k: 1 # save k best models (determined by above metric) - mode: "min" # "max" means higher metric value is better, can be also "min" - auto_insert_metric_name: true # when True, the checkpoints filenames will contain the metric name - save_weights_only: false # if True, then only the model’s weights will be saved - every_n_train_steps: null # number of training steps between checkpoints - train_time_interval: null # checkpoints are monitored at the specified time interval - every_n_epochs: 1 # number of epochs between checkpoints - save_on_train_epoch_end: null # whether to run checkpointing at the end of the training epoch or the end of validation diff --git a/d123/script/config/lightning_training/callbacks/model_summary.yaml b/d123/script/config/lightning_training/callbacks/model_summary.yaml deleted file mode 100644 index b75981d8..00000000 --- a/d123/script/config/lightning_training/callbacks/model_summary.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# https://lightning.ai/docs/pytorch/stable/api/lightning.pytorch.callbacks.RichModelSummary.html - -model_summary: - _target_: lightning.pytorch.callbacks.RichModelSummary - max_depth: 1 # the maximum depth of layer nesting that the summary will include diff --git a/d123/script/config/lightning_training/data/waymo.yaml b/d123/script/config/lightning_training/data/waymo.yaml deleted file mode 100644 index fae54e8a..00000000 --- a/d123/script/config/lightning_training/data/waymo.yaml +++ /dev/null @@ -1,15 +0,0 @@ -_target_: d123.training.models.sim_agent.smart.datamodules.scalable_datamodule.MultiDataModule -_convert_: 'all' - -train_batch_size: 4 -val_batch_size: 4 -test_batch_size: 4 -num_workers: 4 -shuffle: true -pin_memory: true -persistent_workers: true -train_raw_dir: ${paths.cache_root}/training -val_raw_dir: ${paths.cache_root}/validation -val_tfrecords_splitted: ${paths.cache_root}/validation_tfrecords_splitted -test_raw_dir: ${paths.cache_root}/testing -train_max_num: 32 diff --git a/d123/script/config/lightning_training/default_lightning_training.yaml b/d123/script/config/lightning_training/default_lightning_training.yaml deleted file mode 100644 index 3ff3b2ce..00000000 --- a/d123/script/config/lightning_training/default_lightning_training.yaml +++ /dev/null @@ -1,29 +0,0 @@ -hydra: - run: - dir: ${output_dir} - output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging - searchpath: # Only in these paths are discoverable - - pkg://d123.script.config.common - - pkg://d123.script.config.lightning_training - job: - chdir: False - -defaults: - - default_common - - default_experiment - - data: waymo - - model: smart_nano_1M_forecasting - - callbacks: default_callbacks - - trainer: default_trainer - # - default_dataset_paths - - _self_ - # - datasets: - # - nuplan_mini_dataset - # - carla_dataset - -# force_feature_computation: True - -ckpt_path: null -action: fit -paths: - cache_root: /home/daniel/nuplan_cache diff --git a/d123/script/config/lightning_training/model/smart.yaml b/d123/script/config/lightning_training/model/smart.yaml deleted file mode 100644 index 4c05352b..00000000 --- a/d123/script/config/lightning_training/model/smart.yaml +++ /dev/null @@ -1,77 +0,0 @@ - -_target_: d123.training.models.sim_agent.smart.smart.SMART -_convert_: 'all' - -model_config: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTConfig - _convert_: 'all' - - lr: 0.0005 - lr_warmup_steps: 0 - lr_total_steps: 100000 - lr_min_ratio: 0.05 - - val_open_loop: True - val_closed_loop: True - - # Tokenizer - map_token_file: "map_traj_token5.pkl" - agent_token_file: "agent_vocab_555_s2.pkl" - - map_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - agent_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - # Rollout Sampling - validation_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 5 - temp: 1.0 - criteria: "topk_prob" - - training_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: -1 - temp: 1.0 - criteria: "topk_prob" - - - # Decoder - hidden_dim: 128 - num_freq_bands: 64 - num_heads: 8 - head_dim: 16 - dropout: 0.1 - hist_drop_prob: 0.1 - num_map_layers: 3 - num_agent_layers: 6 - pl2pl_radius: 10 - pl2a_radius: 30 - a2a_radius: 60 - time_span: 30 - num_historical_steps: 11 - num_future_steps: 80 - - # train loss - use_gt_raw: True - gt_thresh_scale_length: -1.0 # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - label_smoothing: 0.1 - rollout_as_gt: False - - # else: - n_rollout_closed_val: 10 - n_vis_batch: 2 - n_vis_scenario: 2 - n_vis_rollout: 5 diff --git a/d123/script/config/lightning_training/model/smart_mini_3M.yaml b/d123/script/config/lightning_training/model/smart_mini_3M.yaml deleted file mode 100644 index e24ae979..00000000 --- a/d123/script/config/lightning_training/model/smart_mini_3M.yaml +++ /dev/null @@ -1,77 +0,0 @@ - -_target_: d123.training.models.sim_agent.smart.smart.SMART -_convert_: 'all' - -model_config: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTConfig - _convert_: 'all' - - lr: 0.0005 - lr_warmup_steps: 0 - lr_total_steps: 100000 - lr_min_ratio: 0.05 - - val_open_loop: True - val_closed_loop: True - - # Tokenizer - map_token_file: "map_traj_token5.pkl" - agent_token_file: "agent_vocab_555_s2.pkl" - - map_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - agent_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - # Rollout Sampling - validation_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 5 - temp: 1.0 - criteria: "topk_prob" - - training_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: -1 - temp: 1.0 - criteria: "topk_prob" - - - # Decoder - hidden_dim: 128 - num_freq_bands: 64 - num_heads: 4 - head_dim: 8 - dropout: 0.1 - hist_drop_prob: 0.1 - num_map_layers: 2 - num_agent_layers: 4 - pl2pl_radius: 10 - pl2a_radius: 30 - a2a_radius: 60 - time_span: 30 - num_historical_steps: 11 - num_future_steps: 80 - - # train loss - use_gt_raw: True - gt_thresh_scale_length: -1.0 # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - label_smoothing: 0.1 - rollout_as_gt: False - - # else: - n_rollout_closed_val: 10 - n_vis_batch: 2 - n_vis_scenario: 2 - n_vis_rollout: 5 diff --git a/d123/script/config/lightning_training/model/smart_nano_1M.yaml b/d123/script/config/lightning_training/model/smart_nano_1M.yaml deleted file mode 100644 index 1fa717e0..00000000 --- a/d123/script/config/lightning_training/model/smart_nano_1M.yaml +++ /dev/null @@ -1,77 +0,0 @@ - -_target_: d123.training.models.sim_agent.smart.smart.SMART -_convert_: 'all' - -model_config: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTConfig - _convert_: 'all' - - lr: 0.0005 - lr_warmup_steps: 0 - lr_total_steps: 100000 - lr_min_ratio: 0.05 - - val_open_loop: True - val_closed_loop: True - - # Tokenizer - map_token_file: "map_traj_token5.pkl" - agent_token_file: "agent_vocab_555_s2.pkl" - - map_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - agent_token_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 1 - temp: 1.0 - criteria: null - - # Rollout Sampling - validation_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: 5 - temp: 1.0 - criteria: "topk_prob" - - training_rollout_sampling: - _target_: d123.training.models.sim_agent.smart.smart_config.SMARTRolloutSampling - _convert_: 'all' - num_k: -1 - temp: 1.0 - criteria: "topk_prob" - - - # Decoder - hidden_dim: 64 - num_freq_bands: 64 - num_heads: 4 - head_dim: 8 - dropout: 0.1 - hist_drop_prob: 0.1 - num_map_layers: 2 - num_agent_layers: 4 - pl2pl_radius: 10 - pl2a_radius: 20 - a2a_radius: 20 - time_span: 20 - num_historical_steps: 11 - num_future_steps: 80 - - # train loss - use_gt_raw: True - gt_thresh_scale_length: -1.0 # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - label_smoothing: 0.1 - rollout_as_gt: False - - # else: - n_rollout_closed_val: 10 - n_vis_batch: 2 - n_vis_scenario: 2 - n_vis_rollout: 5 diff --git a/d123/script/config/lightning_training/trainer/ddp.yaml b/d123/script/config/lightning_training/trainer/ddp.yaml deleted file mode 100644 index 20fe3133..00000000 --- a/d123/script/config/lightning_training/trainer/ddp.yaml +++ /dev/null @@ -1,13 +0,0 @@ -defaults: - - default_trainer - -strategy: - _target_: lightning.pytorch.strategies.DDPStrategy - find_unused_parameters: false - gradient_as_bucket_view: true - -accelerator: gpu -devices: -1 -num_nodes: 1 -sync_batchnorm: true -log_every_n_steps: 20 diff --git a/d123/script/config/lightning_training/trainer/default_trainer.yaml b/d123/script/config/lightning_training/trainer/default_trainer.yaml deleted file mode 100644 index 7496cc30..00000000 --- a/d123/script/config/lightning_training/trainer/default_trainer.yaml +++ /dev/null @@ -1,27 +0,0 @@ -_target_: lightning.pytorch.trainer.Trainer - -default_root_dir: ${output_dir} - -limit_train_batches: null -limit_val_batches: null -limit_test_batches: 1.0 - -# max_steps: 25000 -# val_check_interval: 0.5 - -max_epochs: 100 - -accelerator: gpu -devices: -1 - -precision: 32-true -check_val_every_n_epoch: 1 - -# set True to to ensure deterministic results -# makes training slower but gives more reproducibility than just setting seeds -deterministic: false -gradient_clip_val: 0.5 -num_sanity_val_steps: 0 -accumulate_grad_batches: 1 -log_every_n_steps: 1 -strategy: auto diff --git a/d123/script/config/preprocessing/default_preprocessing.yaml b/d123/script/config/preprocessing/default_preprocessing.yaml deleted file mode 100644 index 7b9bc0c5..00000000 --- a/d123/script/config/preprocessing/default_preprocessing.yaml +++ /dev/null @@ -1,20 +0,0 @@ -hydra: - run: - dir: ${output_dir} - output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging - searchpath: # Only in these paths are discoverable - - pkg://d123.script.config - - pkg://d123.script.config.common - - pkg://d123.script.config.preprocessing - job: - chdir: False - -defaults: - - default_common - - default_experiment - - default_dataset_paths - - _self_ - - - -cache_path: ??? diff --git a/d123/script/config/simulation/default_simulation.yaml b/d123/script/config/simulation/default_simulation.yaml deleted file mode 100644 index 0be7fd09..00000000 --- a/d123/script/config/simulation/default_simulation.yaml +++ /dev/null @@ -1,20 +0,0 @@ -hydra: - run: - dir: ${output_dir} - output_subdir: ${output_dir}/code/hydra # Store hydra's config breakdown here for debugging - searchpath: # Only in these paths are discoverable - - pkg://d123.script.config - - pkg://d123.script.config.common - - pkg://d123.script.config.preprocessing - job: - chdir: False - -defaults: - - default_common - - default_experiment - - default_dataset_paths - - _self_ - - -paths: - cache_root: /home/daniel/waymo_training_catk diff --git a/d123/script/config/viser/default_viser.yaml b/d123/script/config/viser/default_viser.yaml deleted file mode 100644 index c42164dc..00000000 --- a/d123/script/config/viser/default_viser.yaml +++ /dev/null @@ -1,16 +0,0 @@ -hydra: - run: - dir: . - output_subdir: null - searchpath: # Only in these paths are discoverable - - pkg://d123.script.config - - pkg://d123.script.config.common - - pkg://d123.script.config.preprocessing - job: - chdir: False -# -defaults: - - default_common - - default_dataset_paths - -port_number: 8080 diff --git a/d123/script/run_dataset_conversion.py b/d123/script/run_dataset_conversion.py deleted file mode 100644 index 62ad1e30..00000000 --- a/d123/script/run_dataset_conversion.py +++ /dev/null @@ -1,46 +0,0 @@ -import logging -from typing import List - -import hydra -from nuplan.planning.script.builders.logging_builder import build_logger -from omegaconf import DictConfig - -from d123.script.builders.data_converter_builder import RawDataConverter, build_data_converter -from d123.script.builders.worker_pool_builder import build_worker - -logger = logging.getLogger(__name__) - -CONFIG_PATH = "config/dataset_conversion" -CONFIG_NAME = "default_dataset_conversion" - - -@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) -def main(cfg: DictConfig) -> None: - """ - Main entrypoint for metric caching. - :param cfg: omegaconf dictionary - """ - # Configure logger - build_logger(cfg) - - # Build worker - worker = build_worker(cfg) - - # Precompute and cache all features - logger.info("Starting Dataset Caching...") - data_processors: List[RawDataConverter] = build_data_converter(cfg.datasets) - for data_processor in data_processors: - - logger.info(f"Processing dataset: {data_processor.__class__.__name__}") - - data_processor.convert_maps(worker=worker) - logger.info(f"Finished maps: {data_processor.__class__.__name__}") - - data_processor.convert_logs(worker=worker) - logger.info(f"Finished logs: {data_processor.__class__.__name__}") - - logger.info(f"Finished processing dataset: {data_processor.__class__.__name__}") - - -if __name__ == "__main__": - main() diff --git a/d123/script/run_preprocessing.py b/d123/script/run_preprocessing.py deleted file mode 100644 index 4ed6cf97..00000000 --- a/d123/script/run_preprocessing.py +++ /dev/null @@ -1,58 +0,0 @@ -import logging -import pickle -from functools import partial -from pathlib import Path -from typing import List - -import hydra -import lightning as L -from omegaconf import DictConfig - -from d123.common.multithreading.worker_utils import worker_map -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.script.builders.scene_builder_builder import build_scene_builder -from d123.script.builders.scene_filter_builder import build_scene_filter -from d123.script.run_dataset_conversion import build_worker -from d123.training.feature_builder.smart_feature_builder import SMARTFeatureBuilder - -logger = logging.getLogger(__name__) - -CONFIG_PATH = "config/preprocessing" -CONFIG_NAME = "default_preprocessing" - - -@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) -def main(cfg: DictConfig) -> None: - - L.seed_everything(cfg.seed, workers=True) - - worker = build_worker(cfg) - scene_filter = build_scene_filter(cfg.scene_filter) - scene_builder = build_scene_builder(cfg.scene_builder) - - scenes = scene_builder.get_scenes(scene_filter, worker=worker) - logger.info(f"Found {len(scenes)} scenes.") - - cache_path = Path(cfg.cache_path) - cache_path.mkdir(parents=True, exist_ok=True) - - feature_builder = SMARTFeatureBuilder() - - worker_map(worker, partial(_apply_feature_builder, feature_builder=feature_builder, cache_path=cache_path), scenes) - - -def _apply_feature_builder(scenes: List[AbstractScene], feature_builder: SMARTFeatureBuilder, cache_path: Path): - - for scene in scenes: - scene.open() - feature_dict = feature_builder.build_features(scene=scene) - output_file = cache_path / f"{feature_dict['scenario_id']}.pkl" - with open(output_file, "wb") as f: - pickle.dump(feature_dict, f) - scene.close() - - return [] - - -if __name__ == "__main__": - main() diff --git a/d123/script/run_simulation.py b/d123/script/run_simulation.py deleted file mode 100644 index 393e4367..00000000 --- a/d123/script/run_simulation.py +++ /dev/null @@ -1,83 +0,0 @@ -import logging -import traceback -from pathlib import Path -from typing import Dict, List - -import hydra -import lightning as L -import pandas as pd -from omegaconf import DictConfig -from tqdm import tqdm - -from d123.common.multithreading.worker_utils import worker_map -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.script.builders.scene_builder_builder import build_scene_builder -from d123.script.builders.scene_filter_builder import build_scene_filter -from d123.script.run_dataset_conversion import build_worker -from d123.simulation.gym.demo_gym_env import DemoGymEnv -from d123.simulation.metrics.sim_agents.sim_agents import get_sim_agents_metrics - -logger = logging.getLogger(__name__) - -CONFIG_PATH = "config/preprocessing" -CONFIG_NAME = "default_preprocessing" - - -@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) -def main(cfg: DictConfig) -> None: - - L.seed_everything(cfg.seed, workers=True) - - worker = build_worker(cfg) - scene_filter = build_scene_filter(cfg.scene_filter) - scene_builder = build_scene_builder(cfg.scene_builder) - - scenes = scene_builder.get_scenes(scene_filter, worker=worker) - logger.info(f"Found {len(scenes)} scenes.") - - results = worker_map(worker, _run_simulation, scenes) - - df = pd.DataFrame(results) - avg_row = df.drop(columns=["token"]).mean(numeric_only=True) - avg_row["token"] = "average" - df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True) - - output_dir = Path(cfg.output_dir) - df.to_csv(output_dir / "sim_agent_results.csv") - - -def _run_simulation(scenes: List[AbstractScene]) -> List[Dict[str, float]]: - - action = [1.0, 0.1] # Placeholder action, replace with actual action logic - env = DemoGymEnv(scenes) - - results = [] - - for scene in tqdm(scenes): - try: - - agent_rollouts = [] - - map_api, ego_state, detection_observation, current_scene = env.reset(scene) - agent_rollouts.append(detection_observation.box_detections) - - result = {} - result["token"] = scene.token - for i in range(150): - ego_state, detection_observation, end = env.step(action) - agent_rollouts.append(detection_observation.box_detections) - if end: - break - result.update(get_sim_agents_metrics(current_scene, agent_rollouts)) - results.append(result) - except Exception: - print(current_scene.token) - traceback.print_exc() - continue - - scene.close() - return results - - -if __name__ == "__main__": - main() diff --git a/d123/script/run_training.py b/d123/script/run_training.py deleted file mode 100644 index c42a4b44..00000000 --- a/d123/script/run_training.py +++ /dev/null @@ -1,80 +0,0 @@ -import logging -from typing import List - -import hydra -import lightning as L -from lightning import Callback, LightningDataModule, LightningModule, Trainer -from omegaconf import DictConfig - -logger = logging.getLogger(__name__) - -CONFIG_PATH = "config/lightning_training" -CONFIG_NAME = "default_lightning_training" - - -def instantiate_callbacks(callbacks_cfg: DictConfig) -> List[Callback]: - """Instantiates callbacks from config. - - :param callbacks_cfg: A DictConfig object containing callback configurations. - :return: A list of instantiated callbacks. - """ - callbacks: List[Callback] = [] - - if not callbacks_cfg: - logger.warning("No callback configs found! Skipping..") - return callbacks - - if not isinstance(callbacks_cfg, DictConfig): - raise TypeError("Callbacks config must be a DictConfig!") - - for _, cb_conf in callbacks_cfg.items(): - if isinstance(cb_conf, DictConfig) and "_target_" in cb_conf: - logger.info(f"Instantiating callback <{cb_conf._target_}>") - callbacks.append(hydra.utils.instantiate(cb_conf)) - - return callbacks - - -@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) -def main(cfg: DictConfig) -> None: - L.seed_everything(cfg.seed, workers=True) - - logger.info(f"Instantiating datamodule <{cfg.data._target_}>") - datamodule: LightningDataModule = hydra.utils.instantiate(cfg.data) - - logger.info(f"Instantiating model <{cfg.model._target_}>") - model: LightningModule = hydra.utils.instantiate(cfg.model) - - logger.info("Instantiating callbacks...") - callbacks: List[Callback] = instantiate_callbacks(cfg.callbacks) - - # logger.info(f"Instantiating loggers...") - # logger: List[Logger] = instantiate_loggers(cfg.get("logger")) - # # setup model watching - # for _logger in logger: - # if isinstance(_logger, WandbLogger): - # _logger.watch(model, log="all") - - logger.info(f"Instantiating trainer <{cfg.trainer._target_}>") - trainer: Trainer = hydra.utils.instantiate(cfg.trainer, callbacks=callbacks) - - logger.info(f"Resuming from ckpt: cfg.ckpt_path={cfg.ckpt_path}") - if cfg.action == "fit": - logger.info("Starting training!") - trainer.fit(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path")) - # elif cfg.action == "finetune": - # logger.info("Starting finetuning!") - # model.load_state_dict(torch.load(cfg.ckpt_path)["state_dict"], strict=False) - # trainer.fit(model=model, datamodule=datamodule) - # elif cfg.action == "validate": - # logger.info("Starting validating!") - # trainer.validate( - # model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path") - # ) - # elif cfg.action == "test": - # logger.info("Starting testing!") - # trainer.test(model=model, datamodule=datamodule, ckpt_path=cfg.get("ckpt_path")) - - -if __name__ == "__main__": - main() diff --git a/d123/script/run_viser.py b/d123/script/run_viser.py deleted file mode 100644 index e682a96e..00000000 --- a/d123/script/run_viser.py +++ /dev/null @@ -1,29 +0,0 @@ -import logging - -import hydra -from omegaconf import DictConfig - -from d123.common.visualization.viser.server import ViserVisualizationServer -from d123.script.builders.scene_builder_builder import build_scene_builder -from d123.script.builders.scene_filter_builder import build_scene_filter -from d123.script.run_dataset_conversion import build_worker - -logger = logging.getLogger(__name__) - -CONFIG_PATH = "config/viser" -CONFIG_NAME = "default_viser" - - -@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) -def main(cfg: DictConfig) -> None: - - worker = build_worker(cfg) - scene_filter = build_scene_filter(cfg.scene_filter) - scene_builder = build_scene_builder(cfg.scene_builder) - scenes = scene_builder.get_scenes(scene_filter, worker=worker) - - ViserVisualizationServer(scenes=scenes) - - -if __name__ == "__main__": - main() diff --git a/d123/simulation/agents/abstract_agents.py b/d123/simulation/agents/abstract_agents.py deleted file mode 100644 index 2d7f6cde..00000000 --- a/d123/simulation/agents/abstract_agents.py +++ /dev/null @@ -1,27 +0,0 @@ -from abc import abstractmethod -from typing import List, Optional - -from d123.common.datatypes.detection.detection import BoxDetection -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene - - -class AbstractAgents: - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - @abstractmethod - def reset( - self, - map_api: AbstractMap, - target_agents: List[BoxDetection], - non_target_agents: List[BoxDetection], - scene: Optional[AbstractScene] = None, - ) -> List[BoxDetection]: - raise NotImplementedError - - @abstractmethod - def step(self, non_target_agents: List[BoxDetection]) -> List[BoxDetection]: - raise NotImplementedError diff --git a/d123/simulation/agents/constant_velocity_agents.py b/d123/simulation/agents/constant_velocity_agents.py deleted file mode 100644 index 75f4e343..00000000 --- a/d123/simulation/agents/constant_velocity_agents.py +++ /dev/null @@ -1,67 +0,0 @@ -import copy -from abc import abstractmethod -from typing import List, Optional - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionSE2 -from d123.common.geometry.base import Point2D -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.agents.abstract_agents import AbstractAgents - - -class ConstantVelocityAgents(AbstractAgents): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = False - - def __init__(self) -> None: - """ - Initialize the constant velocity agents. - """ - super().__init__() - self._timestep_s: float = 0.1 - self._current_iteration: int = 0 - self._map_api: AbstractMap = None - - self._initial_target_agents: List[BoxDetection] = [] - - @abstractmethod - def reset( - self, - map_api: AbstractMap, - target_agents: List[BoxDetection], - non_target_agents: List[BoxDetection], - scene: Optional[AbstractScene] = None, - ) -> List[BoxDetection]: - assert scene is None - - self._map_api = map_api - self._current_iteration = 0 - self._initial_target_agents = [copy.deepcopy(agent) for agent in target_agents] - return self._initial_target_agents - - def step(self, non_target_agents: List[BoxDetection]): - self._current_iteration += 1 - - time_delta_s = self._timestep_s * self._current_iteration - current_target_agents = [] - for initial_agent in self._initial_target_agents: - speed: float = float(initial_agent.velocity.vector_2d.magnitude()) - - propagated_center = translate_along_yaw(initial_agent.center, Point2D(speed * time_delta_s, 0.0)) - propagated_bounding_box = BoundingBoxSE2( - propagated_center, - initial_agent.bounding_box_se3.length, - initial_agent.bounding_box_se3.width, - ) - propagated_agent: BoxDetectionSE2 = BoxDetectionSE2( - metadata=initial_agent.metadata, - bounding_box_se2=propagated_bounding_box, - velocity=initial_agent.velocity, - ) - current_target_agents.append(propagated_agent) - - return current_target_agents diff --git a/d123/simulation/agents/idm_agents.py b/d123/simulation/agents/idm_agents.py deleted file mode 100644 index 4e9159f5..00000000 --- a/d123/simulation/agents/idm_agents.py +++ /dev/null @@ -1,195 +0,0 @@ -import copy -from abc import abstractmethod -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple - -import numpy as np -from shapely.geometry import CAP_STYLE, Polygon - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionSE2 -from d123.common.geometry.base import Point2D, StateSE2 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.geometry.line.polylines import PolylineSE2 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.vector import Vector2D -from d123.dataset.arrow.conversion import BoxDetectionWrapper -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.agents.abstract_agents import AbstractAgents - - -@dataclass -class IDMConfig: - target_velocity: float = 10.0 # [m/s] - min_gap_to_lead_agent: float = 1.0 # [m] - headway_time: float = 1.5 # [s] - accel_max: float = 1.0 # [m/s^2] - decel_max: float = 2.0 # [m/s^2] - acceleration_exponent: float = 4.0 # Usually set to 4 - - -class IDMAgents(AbstractAgents): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - def __init__(self) -> None: - """ - Initialize the constant velocity agents. - """ - super().__init__() - self._timestep_s: float = 0.1 - self._current_iteration: int = 0 - self._map_api: AbstractMap = None - - self._idm_config: IDMConfig = IDMConfig(target_velocity=5.0, accel_max=1.0, decel_max=2.0) - - self._initial_target_agents: List[BoxDetection] = [] - self._past_target_agents: List[BoxDetection] = [] - self._agent_paths: Dict[str, PolylineSE2] = {} - self._agent_paths_buffer: Dict[str, Polygon] = {} - self._agent_initial_vel: Dict[str, float] = {} - self._extend_path_length: float = 100 - - @abstractmethod - def reset( - self, - map_api: AbstractMap, - target_agents: List[BoxDetection], - non_target_agents: List[BoxDetection], - scene: Optional[AbstractScene] = None, - ) -> List[BoxDetection]: - assert scene is not None - - self._map_api = map_api - self._current_iteration = 0 - self._initial_target_agents = [copy.deepcopy(agent) for agent in target_agents] - - future_box_detections = [ - scene.get_box_detections_at_iteration(iteration) for iteration in range(0, scene.get_number_of_iterations()) - ] - - # TODO: refactor or move for general use - for agent in self._initial_target_agents: - future_trajectory: List[StateSE2] = [] - for box_detections in future_box_detections: - agent_at_iteration = box_detections.get_detection_by_track_token(agent.metadata.track_token) - if agent_at_iteration is None: - break - - future_trajectory.append(agent_at_iteration.center.state_se2) - - if len(future_trajectory) < 2: - future_trajectory = [agent.center.state_se2, translate_along_yaw(agent.center, Point2D(0.1, 0.0))] - - future_trajectory.append(translate_along_yaw(future_trajectory[-1], Point2D(self._extend_path_length, 0.0))) - - polyline_se2 = PolylineSE2.from_discrete_se2(future_trajectory) - self._agent_paths[agent.metadata.track_token] = polyline_se2 - self._agent_paths_buffer[agent.metadata.track_token] = polyline_se2.linestring.buffer( - agent.bounding_box_se2.width / 2, cap_style=CAP_STYLE.square - ) - self._agent_initial_vel[agent.metadata.track_token] = float(agent.velocity.vector_2d.magnitude()) - - self._past_target_agents = self._initial_target_agents - return self._initial_target_agents - - def step(self, non_target_agents: List[BoxDetection]): - self._current_iteration += 1 - - box_detections = BoxDetectionWrapper(box_detections=non_target_agents + self._past_target_agents) - occupancy_map = box_detections.occupancy_map - - # time_delta_s = self._timestep_s * self._current_iteration - current_target_agents = [] - for past_agent in self._past_target_agents: - agent_velocity: float = float(past_agent.velocity.vector_2d.magnitude()) - - agent_path = self._agent_paths[past_agent.metadata.track_token] - agent_path_buffer = self._agent_paths_buffer[past_agent.metadata.track_token] - agent_distance_on_path = agent_path.project(past_agent.center.point_2d) - - track_token_in_path: List[str] = occupancy_map.intersects(agent_path_buffer) - - leading_agent: Optional[BoxDetection] = None - leading_agent_distance_on_path: float = float("inf") - for track_token in track_token_in_path: - if track_token == past_agent.metadata.track_token: - continue - - other_agent = box_detections.get_detection_by_track_token(track_token) - if other_agent is None: - continue - - other_agent_distance_on_path = agent_path.project(other_agent.center.point_2d) - if other_agent_distance_on_path < agent_distance_on_path: - continue - - if other_agent_distance_on_path < leading_agent_distance_on_path: - leading_agent = other_agent - leading_agent_distance_on_path = other_agent_distance_on_path - - if leading_agent is not None: - distance_to_lead_agent = past_agent.shapely_polygon.distance(leading_agent.shapely_polygon) - lead_agent_velocity = float(leading_agent.velocity.vector_2d.magnitude()) - else: - distance_to_lead_agent = float( - np.clip(agent_path.length - agent_distance_on_path, a_min=0.0, a_max=None) - ) - lead_agent_velocity = 0.0 - - # propagate the agent using IDM - self._idm_config.target_velocity = self._agent_initial_vel[past_agent.metadata.track_token] + 0.01 - x_dot, v_agent_dot = _propagate_idm( - agent_velocity, lead_agent_velocity, distance_to_lead_agent, self._idm_config - ) - - v_agent_dot = min(max(-self._idm_config.decel_max, v_agent_dot), self._idm_config.accel_max) - propagate_distance = agent_distance_on_path + x_dot * self._timestep_s - propagated_center = agent_path.interpolate(propagate_distance) - propagated_bounding_box = BoundingBoxSE2( - propagated_center, - past_agent.bounding_box_se2.length, - past_agent.bounding_box_se2.width, - ) - new_velocity = Vector2D(agent_velocity + v_agent_dot * self._timestep_s, 0.0) - propagated_agent: BoxDetectionSE2 = BoxDetectionSE2( - metadata=past_agent.metadata, - bounding_box_se2=propagated_bounding_box, - velocity=new_velocity, - ) - current_target_agents.append(propagated_agent) - - self._past_target_agents = current_target_agents - return current_target_agents - - -def _propagate_idm( - agent_velocity: float, lead_velocity: float, agent_lead_distance: float, idm_config: IDMConfig -) -> Tuple[float, float]: - - # convenience definitions - s_star = ( - idm_config.min_gap_to_lead_agent - + agent_velocity * idm_config.headway_time - + (agent_velocity * (agent_velocity - lead_velocity)) - / (2 * np.sqrt(idm_config.accel_max * idm_config.decel_max)) - ) - s_alpha = max(agent_lead_distance, idm_config.min_gap_to_lead_agent) # clamp to avoid zero division - - # differential equations - x_dot = agent_velocity - try: - v_agent_dot = idm_config.accel_max * ( - 1 - - (agent_velocity / idm_config.target_velocity) ** idm_config.acceleration_exponent - - (s_star / s_alpha) ** 2 - ) - except: # noqa: E722 - print("input", agent_velocity, lead_velocity, agent_lead_distance) - print("s_star", s_star) - print("s_alpha", s_alpha) - print("x_dot", x_dot) - v_agent_dot = 0.0 - return [x_dot, v_agent_dot] diff --git a/d123/simulation/agents/path_following.py b/d123/simulation/agents/path_following.py deleted file mode 100644 index bc4ec577..00000000 --- a/d123/simulation/agents/path_following.py +++ /dev/null @@ -1,90 +0,0 @@ -import copy -from abc import abstractmethod -from typing import Dict, List, Optional - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionSE2 -from d123.common.geometry.base import Point2D, StateSE2 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.geometry.line.polylines import PolylineSE2 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.agents.abstract_agents import AbstractAgents - - -class PathFollowingAgents(AbstractAgents): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - def __init__(self) -> None: - """ - Initialize the constant velocity agents. - """ - super().__init__() - self._timestep_s: float = 0.1 - self._current_iteration: int = 0 - self._map_api: AbstractMap = None - - self._initial_target_agents: List[BoxDetection] = [] - self._agent_paths: Dict[str, PolylineSE2] = {} - self._extend_path_length: float = 0.1 - - @abstractmethod - def reset( - self, - map_api: AbstractMap, - target_agents: List[BoxDetection], - non_target_agents: List[BoxDetection], - scene: Optional[AbstractScene] = None, - ) -> List[BoxDetection]: - assert scene is not None - - self._map_api = map_api - self._current_iteration = 0 - self._initial_target_agents = [copy.deepcopy(agent) for agent in target_agents] - - future_box_detections = [ - scene.get_box_detections_at_iteration(iteration) for iteration in range(0, scene.get_number_of_iterations()) - ] - - # TODO: refactor or move for general use - for agent in self._initial_target_agents: - future_trajectory: List[StateSE2] = [] - for box_detections in future_box_detections: - agent_at_iteration = box_detections.get_detection_by_track_token(agent.metadata.track_token) - if agent_at_iteration is None: - break - - future_trajectory.append(agent_at_iteration.center.state_se2) - - future_trajectory.append(translate_along_yaw(future_trajectory[-1], Point2D(self._extend_path_length, 0.0))) - - self._agent_paths[agent.metadata.track_token] = PolylineSE2.from_discrete_se2(future_trajectory) - - return self._initial_target_agents - - def step(self, non_target_agents: List[BoxDetection]): - self._current_iteration += 1 - - time_delta_s = self._timestep_s * self._current_iteration - current_target_agents = [] - for initial_agent in self._initial_target_agents: - speed: float = float(initial_agent.velocity.vector_2d.magnitude()) - - propagate_distance = speed * time_delta_s - propagated_center = self._agent_paths[initial_agent.metadata.track_token].interpolate(propagate_distance) - propagated_bounding_box = BoundingBoxSE2( - propagated_center, - initial_agent.bounding_box_se3.length, - initial_agent.bounding_box_se3.width, - ) - propagated_agent: BoxDetectionSE2 = BoxDetectionSE2( - metadata=initial_agent.metadata, - bounding_box_se2=propagated_bounding_box, - velocity=initial_agent.velocity, - ) - current_target_agents.append(propagated_agent) - - return current_target_agents diff --git a/d123/simulation/agents/smart_agents.py b/d123/simulation/agents/smart_agents.py deleted file mode 100644 index 18c1b48e..00000000 --- a/d123/simulation/agents/smart_agents.py +++ /dev/null @@ -1,146 +0,0 @@ -from abc import abstractmethod -from pathlib import Path -from typing import List, Optional - -import torch -from torch_geometric.data import HeteroData - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionSE2 -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.geometry.transform.se2_array import convert_relative_to_absolute_point_2d_array -from d123.common.geometry.utils import normalize_angle -from d123.dataset.arrow.conversion import BoxDetectionWrapper, DetectionType -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.agents.abstract_agents import AbstractAgents -from d123.training.feature_builder.smart_feature_builder import SMARTFeatureBuilder -from d123.training.models.sim_agent.smart.datamodules.target_builder import _numpy_dict_to_torch -from d123.training.models.sim_agent.smart.smart import SMART -from d123.training.models.sim_agent.smart.smart_config import SMARTConfig - - -class SMARTAgents(AbstractAgents): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - def __init__(self) -> None: - """ - Initialize the constant velocity agents. - """ - super().__init__() - self._timestep_s: float = 0.1 - self._current_iteration: int = 0 - self._map_api: AbstractMap = None - - checkpoint_path = Path( - "/home/daniel/d123_workspace/exp/smart_mini_run/2025.06.23.20.45.20/checkpoints/epoch_050.ckpt" - ) - # checkpoint_path = Path("/home/daniel/epoch_050.ckpt") - # checkpoint_path = Path("/home/daniel/epoch_027.ckpt") - # checkpoint_path = Path("/home/daniel/epoch_008.ckpt") - config = SMARTConfig( - hidden_dim=64, - num_freq_bands=64, - num_heads=4, - head_dim=8, - dropout=0.1, - hist_drop_prob=0.1, - num_map_layers=2, - num_agent_layers=4, - pl2pl_radius=10, - pl2a_radius=20, - a2a_radius=20, - time_span=20, - num_historical_steps=11, - num_future_steps=90, - ) - - self._device = "cuda:0" if torch.cuda.is_available() else "cpu" - self._smart_model = SMART.load_from_checkpoint( - checkpoint_path, config=config, strict=False, map_location=self._device - ) - self._smart_model.eval() - self._smart_model.to(self._device) - - self._smart_model.encoder.agent_encoder.num_future_steps = 150 - self._smart_model.validation_rollout_sampling.num_k = 1 - - self._initial_box_detections: Optional[BoxDetectionWrapper] = None - self._agent_indices: List[int] = [] - - @abstractmethod - def reset( - self, - map_api: AbstractMap, - target_agents: List[BoxDetection], - non_target_agents: List[BoxDetection], - scene: Optional[AbstractScene] = None, - ) -> List[BoxDetection]: - assert scene is not None - self._current_iteration = 0 - - feature_builder = SMARTFeatureBuilder() - features = feature_builder.build_features(scene) - self._agent_indices = features["agent"]["id"].tolist() - _numpy_dict_to_torch(features, device="cpu") - torch_features = HeteroData(features) - from torch_geometric.loader import DataLoader - - # If you have a dataset - dataset = [torch_features] # List with single sample - loader = DataLoader(dataset, batch_size=1, shuffle=False) - with torch.no_grad(): - for batch in loader: - batch.to(self._device) - pred_traj, pred_z, pred_head = self._smart_model.test_step(batch, 0) - break - - origin = scene.get_ego_state_at_iteration(0).bounding_box.center.state_se2 - - self._pred_traj = convert_relative_to_absolute_point_2d_array(origin, pred_traj.cpu().numpy()) - self._pred_z = pred_z.cpu().numpy() - self._pred_head = normalize_angle(pred_head.cpu().numpy() + origin.yaw) - - self._initial_box_detections = scene.get_box_detections_at_iteration(0) - - # self._initial_target_agents = [copy.deepcopy(agent) for agent in target_agents] - return target_agents - - def step(self, non_target_agents: List[BoxDetection]): - - # (16, 10, 80, 2) - pred_traj = self._pred_traj[:, 0] - pred_head = self._pred_head[:, 0] - - current_target_agents = [] - for agent_idx, agent_id in enumerate(self._agent_indices): - if agent_id == -1: - continue - - initial_agent = self._initial_box_detections[agent_id] - if initial_agent.metadata.detection_type != DetectionType.VEHICLE: - continue - - new_center = StateSE2( - x=pred_traj[agent_idx, self._current_iteration, 0], - y=pred_traj[agent_idx, self._current_iteration, 1], - yaw=pred_head[agent_idx, self._current_iteration], - ) - propagated_bounding_box = BoundingBoxSE2( - new_center, - initial_agent.bounding_box_se2.length, - initial_agent.bounding_box_se2.width, - ) - new_velocity = initial_agent.velocity - propagated_agent: BoxDetectionSE2 = BoxDetectionSE2( - metadata=initial_agent.metadata, - bounding_box_se2=propagated_bounding_box, - velocity=new_velocity, - ) - current_target_agents.append(propagated_agent) - - self._current_iteration += 1 - return current_target_agents diff --git a/d123/simulation/callback/abstract_callback.py b/d123/simulation/callback/abstract_callback.py deleted file mode 100644 index bd22c678..00000000 --- a/d123/simulation/callback/abstract_callback.py +++ /dev/null @@ -1,79 +0,0 @@ -from abc import ABC, abstractmethod - -from d123.simulation.history.simulation_history import Simulation2DHistory, Simulation2DHistorySample -from d123.simulation.planning.abstract_planner import AbstractPlanner -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.simulation_2d_setup import Simulation2DSetup - - -class AbstractCallback(ABC): - """ - Base class for simulation callbacks. - """ - - @abstractmethod - def on_initialization_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """ - Called when initialization of simulation starts. - :param setup: simulation setup - :param planner: planner before initialization - """ - - @abstractmethod - def on_initialization_end(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """ - Called when initialization of simulation ends. - :param setup: simulation setup - :param planner: planner after initialization - """ - - @abstractmethod - def on_step_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """ - Called when simulation step starts. - :param setup: simulation setup - :param planner: planner at start of a step - """ - - @abstractmethod - def on_step_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, sample: Simulation2DHistorySample - ) -> None: - """ - Called when simulation step ends. - :param setup: simulation setup - :param planner: planner at end of a step - :param sample: result of a step - """ - - @abstractmethod - def on_planner_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """ - Called when planner starts to compute trajectory. - :param setup: simulation setup - :param planner: planner before planner.compute_trajectory() is called - """ - - @abstractmethod - def on_planner_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, planner_output: AbstractPlannerOutput - ) -> None: - pass - - @abstractmethod - def on_simulation_start(self, setup: Simulation2DSetup) -> None: - """ - Called when simulation starts. - :param setup: simulation setup - """ - - @abstractmethod - def on_simulation_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, history: Simulation2DHistory - ) -> None: - """ - Called when simulation ends. - :param setup: simulation setup - :param planner: planner when simulation ends - :param history: resulting from simulation - """ diff --git a/d123/simulation/callback/multi_callback.py b/d123/simulation/callback/multi_callback.py deleted file mode 100644 index f1dd1d58..00000000 --- a/d123/simulation/callback/multi_callback.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import List - -from d123.simulation.callback.abstract_callback import AbstractCallback -from d123.simulation.history.simulation_history import Simulation2DHistory, Simulation2DHistorySample -from d123.simulation.planning.abstract_planner import AbstractPlanner -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.simulation_2d_setup import Simulation2DSetup - - -class MultiCallback(AbstractCallback): - """ - This class simply calls many callbacks for simplified code. - """ - - def __init__(self, callbacks: List[AbstractCallback]): - """ - Initialize with multiple callbacks. - :param callbacks: all callbacks that will be called sequentially. - """ - self._callbacks = callbacks - - @property - def callbacks(self) -> List[AbstractCallback]: - """ - Property to access callbacks. - :return: list of callbacks this MultiCallback runs. - """ - return self._callbacks - - def on_initialization_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_initialization_start(setup, planner) - - def on_initialization_end(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_initialization_end(setup, planner) - - def on_step_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_step_start(setup, planner) - - def on_step_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, sample: Simulation2DHistorySample - ) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_step_end(setup, planner, sample) - - def on_planner_start(self, setup: Simulation2DSetup, planner: AbstractPlanner) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_planner_start(setup, planner) - - def on_planner_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, planner_output: AbstractPlannerOutput - ) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_planner_end(setup, planner, planner_output) - - def on_simulation_start(self, setup: Simulation2DSetup) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_simulation_start(setup) - - def on_simulation_end( - self, setup: Simulation2DSetup, planner: AbstractPlanner, history: Simulation2DHistory - ) -> None: - """Inherited, see superclass.""" - for callback in self._callbacks: - callback.on_simulation_end(setup, planner, history) diff --git a/d123/simulation/controller/abstract_controller.py b/d123/simulation/controller/abstract_controller.py deleted file mode 100644 index fc7f4a4f..00000000 --- a/d123/simulation/controller/abstract_controller.py +++ /dev/null @@ -1,42 +0,0 @@ -import abc - -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class AbstractEgoController(abc.ABC): - """ - Interface for generic ego controllers. - """ - - @abc.abstractmethod - def get_state(self) -> EgoStateSE2: - """ - Returns the current ego state. - :return: The current ego state. - """ - - @abc.abstractmethod - def reset(self, scene: AbstractScene) -> EgoStateSE2: - """ - Reset the observation (all internal states should be reseted, if any). - """ - - @abc.abstractmethod - def step( - self, - current_iteration: SimulationIteration, - next_iteration: SimulationIteration, - ego_state: EgoStateSE2, - planner_output: AbstractPlannerOutput, - ) -> EgoStateSE2: - """ - Update the ego state based on the planner output and the current state. - :param current_iteration: The current simulation iteration. - :param next_iteration: The next simulation iteration after propagation. - :param ego_state: The current ego state. - :param planner_output: The output of a planner, e.g. action or trajectory. - :return: The updated ego state. - """ diff --git a/d123/simulation/controller/action_controller.py b/d123/simulation/controller/action_controller.py deleted file mode 100644 index 65199590..00000000 --- a/d123/simulation/controller/action_controller.py +++ /dev/null @@ -1,55 +0,0 @@ -from typing import Optional - -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.controller.abstract_controller import AbstractEgoController -from d123.simulation.controller.motion_model.abstract_motion_model import AbstractMotionModel -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.planning.planner_output.action_planner_output import ActionPlannerOutput -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class ActionController(AbstractEgoController): - - def __init__(self, motion_model: AbstractMotionModel): - - self._motion_model = motion_model - - # lazy loaded - self._scene: Optional[AbstractScene] = None - self._current_state: Optional[EgoStateSE2] = None - - def get_state(self) -> EgoStateSE2: - """Inherited, see superclass.""" - if self._current_state is None: - self._current_state = self._scene.get_ego_state_at_iteration(0).ego_state_se2 - return self._current_state - - def reset(self, scene: AbstractScene) -> EgoStateSE2: - """Inherited, see superclass.""" - self._current_state = None - self._scene = scene - return self.get_state() - - def step( - self, - current_iteration: SimulationIteration, - next_iteration: SimulationIteration, - ego_state: EgoStateSE2, - planner_output: AbstractPlannerOutput, - ) -> EgoStateSE2: - """Inherited, see superclass.""" - - assert isinstance(planner_output, ActionPlannerOutput) - action: ActionPlannerOutput = planner_output - - # Compute the dynamic state to propagate the model - dynamic_state = action.dynamic_state_se2 - - # Propagate ego state using the motion model - self._current_state = self._motion_model.step( - ego_state=ego_state, - ideal_dynamic_state=dynamic_state, - next_timepoint=next_iteration.time_point, - ) - return self._current_state diff --git a/d123/simulation/controller/motion_model/abstract_motion_model.py b/d123/simulation/controller/motion_model/abstract_motion_model.py deleted file mode 100644 index 22862c5a..00000000 --- a/d123/simulation/controller/motion_model/abstract_motion_model.py +++ /dev/null @@ -1,25 +0,0 @@ -import abc - -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE2, EgoStateSE2 - - -class AbstractMotionModel(abc.ABC): - """ - Interface for generic ego motion model. - """ - - @abc.abstractmethod - def step( - self, - ego_state: EgoStateSE2, - ideal_dynamic_state: DynamicStateSE2, - next_timepoint: TimePoint, - ) -> EgoStateSE2: - """ - Propagate the ego state using the ideal dynamic state and next timepoint. - :param ego_state: The current ego state. - :param ideal_dynamic_state: The ideal dynamic state to propagate. - :param next_timepoint: The next timepoint for propagation. - :return: The updated ego state after propagation. - """ diff --git a/d123/simulation/controller/motion_model/kinematic_bicycle_model.py b/d123/simulation/controller/motion_model/kinematic_bicycle_model.py deleted file mode 100644 index 8a4d9802..00000000 --- a/d123/simulation/controller/motion_model/kinematic_bicycle_model.py +++ /dev/null @@ -1,166 +0,0 @@ -import numpy as np -from nuplan.common.geometry.compute import principal_value - -from d123.common.datatypes.time.time_point import TimeDuration, TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE2, EgoStateSE2 -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.vector import Vector2D -from d123.simulation.controller.motion_model.abstract_motion_model import AbstractMotionModel - - -def forward_integrate(init: float, delta: float, sampling_duration: TimeDuration) -> float: - return float(init + delta * sampling_duration.time_s) - - -class KinematicBicycleModel(AbstractMotionModel): - - def __init__( - self, - max_steering_angle: float = np.pi / 3, - acceleration_time_constant: float = 0.2, - steering_angle_time_constant: float = 0.05, - acceleration_low_pass_filter: bool = True, - steering_angle_low_pass_filter: bool = True, - ): - self._max_steering_angle = max_steering_angle - self._acceleration_time_constant = acceleration_time_constant - self._steering_angle_time_constant = steering_angle_time_constant - self._acceleration_low_pass_filter = acceleration_low_pass_filter - self._steering_angle_low_pass_filter = steering_angle_low_pass_filter - - def get_state_dot(self, state: EgoStateSE2) -> EgoStateSE2: - - long_speed = state.dynamic_state_se2.velocity.x - wheel_base = state.vehicle_parameters.wheel_base - x_dot = long_speed * np.cos(state.rear_axle.yaw) - y_dot = long_speed * np.sin(state.rear_axle.yaw) - yaw_dot = long_speed * np.tan(state.tire_steering_angle) / wheel_base - - return EgoStateSE2.from_rear_axle( - rear_axle_se2=StateSE2(x=x_dot, y=y_dot, yaw=yaw_dot), - dynamic_state_se2=DynamicStateSE2( - velocity=state.dynamic_state_se2.acceleration, - acceleration=Vector2D(0.0, 0.0), - angular_velocity=0.0, - tire_steering_rate=0.0, - ), - vehicle_parameters=state.vehicle_parameters, - time_point=state.timepoint, - tire_steering_angle=state.dynamic_state_se2.tire_steering_rate, - ) - - def _update_commands( - self, - ego_state: EgoStateSE2, - ideal_dynamic_state: DynamicStateSE2, - step_duration: TimeDuration, - ) -> EgoStateSE2: - - dt_control = step_duration.time_s - long_acceleration = ego_state.dynamic_state_se2.acceleration.x - tire_steering_angle = ego_state.tire_steering_angle - - ideal_long_acceleration = ideal_dynamic_state.acceleration.x - ideal_steering_angle = dt_control * ideal_dynamic_state.tire_steering_rate + tire_steering_angle - - if self._acceleration_low_pass_filter: - updated_long_acceleration = ( - dt_control - / (dt_control + self._acceleration_time_constant) - * (ideal_long_acceleration - long_acceleration) - + long_acceleration - ) - else: - updated_long_acceleration = ideal_long_acceleration - - if self._steering_angle_low_pass_filter: - updated_steering_angle = ( - dt_control - / (dt_control + self._steering_angle_time_constant) - * (ideal_steering_angle - tire_steering_angle) - + tire_steering_angle - ) - else: - updated_steering_angle = ideal_steering_angle - - updated_steering_rate = (updated_steering_angle - tire_steering_angle) / dt_control - dynamic_state = DynamicStateSE2( - velocity=ego_state.dynamic_state_se2.velocity, - acceleration=Vector2D(updated_long_acceleration, 0.0), - angular_velocity=0.0, - tire_steering_rate=updated_steering_rate, - angular_acceleration=0.0, - ) - propagating_state = EgoStateSE2( - center_se2=ego_state.center_se2, - dynamic_state_se2=dynamic_state, - vehicle_parameters=ego_state.vehicle_parameters, - timepoint=ego_state.timepoint, - tire_steering_angle=ego_state.tire_steering_angle, - ) - return propagating_state - - def step( - self, - ego_state: EgoStateSE2, - ideal_dynamic_state: DynamicStateSE2, - next_timepoint: TimePoint, - ) -> EgoStateSE2: - - vehicle_parameters = ego_state.vehicle_parameters - - # step_duration = ego_state.timepoint.diff(sampling_time) - step_duration = next_timepoint.diff(ego_state.timepoint) - propagating_state = self._update_commands(ego_state, ideal_dynamic_state, step_duration) - - # Compute state derivatives - state_dot = self.get_state_dot(propagating_state) - - # Integrate position and heading - next_x = forward_integrate(propagating_state.rear_axle.x, state_dot.rear_axle.x, step_duration) - next_y = forward_integrate(propagating_state.rear_axle.y, state_dot.rear_axle.y, step_duration) - next_yaw = forward_integrate(propagating_state.rear_axle.yaw, state_dot.rear_axle.yaw, step_duration) - # Wrap angle between [-pi, pi] - next_yaw = principal_value(next_yaw) - - # Compute rear axle velocity in car frame - next_point_velocity_x = forward_integrate( - propagating_state.dynamic_state_se2.velocity.x, - state_dot.dynamic_state_se2.velocity.x, - step_duration, - ) - next_point_velocity_y = 0.0 # Lateral velocity is always zero in kinematic bicycle model - - # Integrate steering angle and clip to bounds - next_point_tire_steering_angle = np.clip( - forward_integrate(propagating_state.tire_steering_angle, state_dot.tire_steering_angle, step_duration), - -self._max_steering_angle, - self._max_steering_angle, - ) - - # Compute angular velocity - next_point_angular_velocity = ( - next_point_velocity_x * np.tan(next_point_tire_steering_angle) / vehicle_parameters.wheel_base - ) - - rear_axle_accel = [ - state_dot.dynamic_state_se2.velocity.x, - state_dot.dynamic_state_se2.velocity.y, - ] - angular_accel = ( - next_point_angular_velocity - ego_state.dynamic_state_se2.angular_velocity - ) / step_duration.time_s - - return EgoStateSE2.from_rear_axle( - rear_axle_se2=StateSE2(next_x, next_y, next_yaw), - dynamic_state_se2=DynamicStateSE2( - velocity=Vector2D(next_point_velocity_x, next_point_velocity_y), - acceleration=Vector2D(rear_axle_accel[0], rear_axle_accel[1]), - tire_steering_rate=state_dot.tire_steering_angle, - angular_velocity=next_point_angular_velocity, - angular_acceleration=angular_accel, - ), - vehicle_parameters=vehicle_parameters, - time_point=next_timepoint, - tire_steering_angle=float(next_point_tire_steering_angle), - ) diff --git a/d123/simulation/gym/demo_gym_env.py b/d123/simulation/gym/demo_gym_env.py deleted file mode 100644 index 54051c31..00000000 --- a/d123/simulation/gym/demo_gym_env.py +++ /dev/null @@ -1,117 +0,0 @@ -from typing import List, Optional, Tuple - -import numpy as np -import numpy.typing as npt -from nuplan.common.actor_state.dynamic_car_state import DynamicCarState -from nuplan.common.actor_state.ego_state import EgoState -from nuplan.common.actor_state.state_representation import StateSE2, StateVector2D, TimePoint -from nuplan.common.geometry.compute import get_pacifica_parameters -from nuplan.planning.simulation.controller.motion_model.kinematic_bicycle import KinematicBicycleModel - -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.dataset.arrow.conversion import EgoStateSE3 -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.observation.abstract_observation import AbstractObservation -from d123.simulation.observation.agents_observation import AgentsObservation - -# from d123.simulation.observation.log_replay_observation import LogReplayObservation - - -class DemoGymEnv: - """ - A simple demo environment for testing purposes. - This class is a placeholder and does not implement any specific functionality. - """ - - def __init__(self, scenes: List[AbstractScene]) -> None: - - self._scenes = scenes - self._current_iteration = 0 - self._current_scene: Optional[AbstractScene] = None - self._current_ego_vehicle_state: Optional[EgoState] = None - - self._observation: AbstractObservation = AgentsObservation(None) - # self._observation: AbstractObservation = LogReplayObservation() - self._observation.initialize() - - self._ego_replay: bool = True - - def reset(self, scene: Optional[AbstractScene]) -> Tuple[AbstractMap, EgoState, DetectionRecording]: - """ - Reset the environment to the initial state. - Returns a tuple containing the map, ego vehicle state, and detection observation. - """ - if scene is not None: - self._current_scene = scene - else: - self._current_scene = np.random.choice(self._scenes) - - self._current_scene_index = 0 - self._current_ego_vehicle_state = to_nuplan_ego_vehicle_state( - self._current_scene.get_ego_vehicle_state_at_iteration(self._current_scene_index) - ) - # detection_observation = DetectionRecording( - # box_detections=self._current_scene.get_box_detections_at_iteration(self._current_scene_index), - # traffic_light_detections=self._current_scene.get_traffic_light_detections_at_iteration( - # self._current_scene_index - # ), - # ) - detection_observation = self._observation.reset(self._current_scene) - - return self._current_scene.map_api, self._current_ego_vehicle_state, detection_observation, self._current_scene - - def step(self, action: npt.NDArray[np.float64]) -> Tuple[EgoState, DetectionRecording, bool]: - self._current_scene_index += 1 - if self._ego_replay: - ego_vehicle_state = self._current_scene.get_ego_vehicle_state_at_iteration(self._current_scene_index) - self._current_ego_vehicle_state = to_nuplan_ego_vehicle_state(ego_vehicle_state) - else: - dynamic_car_state = get_dynamic_car_state(ego_state=self._current_ego_vehicle_state, action=action) - self._current_ego_vehicle_state = KinematicBicycleModel(get_pacifica_parameters()).propagate_state( - self._current_ego_vehicle_state, dynamic_car_state, TimePoint(int(0.1 * int(1e6))) - ) - - detection_observation = self._observation.step() - is_done = self._current_scene_index == self._current_scene.get_number_of_iterations() - 1 - - return self._current_ego_vehicle_state, detection_observation, is_done - - -def to_nuplan_ego_vehicle_state(ego_vehicle_state: EgoStateSE3) -> EgoState: - """ - Convert a custom EgoVehicleState to a NuPlan EgoVehicleState. - This is a placeholder function and should be implemented based on the actual structure of EgoVehicleState. - """ - - # Assuming EgoVehicleState has attributes like position, velocity, heading, etc. - return EgoState.build_from_rear_axle( - rear_axle_pose=StateSE2( - ego_vehicle_state.bounding_box.center.x, - ego_vehicle_state.bounding_box.center.y, - ego_vehicle_state.bounding_box.center.yaw, - ), - rear_axle_velocity_2d=StateVector2D( - ego_vehicle_state.dynamic_state.velocity.x, ego_vehicle_state.dynamic_state.velocity.y - ), - rear_axle_acceleration_2d=StateVector2D( - ego_vehicle_state.dynamic_state.acceleration.x, ego_vehicle_state.dynamic_state.acceleration.y - ), - tire_steering_angle=0.0, - time_point=TimePoint(0), - vehicle_parameters=get_pacifica_parameters(), - is_in_auto_mode=True, - angular_vel=ego_vehicle_state.dynamic_state.angular_velocity.z, - angular_accel=0.0, - tire_steering_rate=0.0, - ) - - -def get_dynamic_car_state(ego_state: EgoState, action: npt.NDArray[np.float64]) -> DynamicCarState: - acceleration, steering_rate = action[0], action[1] - return DynamicCarState.build_from_rear_axle( - rear_axle_to_center_dist=ego_state.car_footprint.rear_axle_to_center_dist, - rear_axle_velocity_2d=ego_state.dynamic_car_state.rear_axle_velocity_2d, - rear_axle_acceleration_2d=StateVector2D(acceleration, 0), - tire_steering_rate=steering_rate, - ) diff --git a/d123/simulation/gym/environment/environment_wrapper.py b/d123/simulation/gym/environment/environment_wrapper.py deleted file mode 100644 index fe3cd5b9..00000000 --- a/d123/simulation/gym/environment/environment_wrapper.py +++ /dev/null @@ -1,191 +0,0 @@ -import logging -import sys -import traceback -from threading import Timer -from typing import Any, Dict, Optional - -import gymnasium as gym -import numpy as np -import numpy.typing as npt - -from d123.simulation.gym.environment.gym_observation.abstract_gym_observation import ( - AbstractGymObservation, -) -from d123.simulation.gym.environment.output_converter.abstract_output_converter import ( - AbstractOutputConverter, -) -from d123.simulation.gym.environment.reward_builder.abstract_reward_builder import AbstractRewardBuilder -from d123.simulation.gym.environment.scenario_sampler.abstract_scenario_sampler import AbstractScenarioSampler -from d123.simulation.gym.environment.simulation_builder.abstract_simulation_builder import ( - AbstractSimulationBuilder, -) -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - -logger = logging.getLogger(__name__) - - -class EnvironmentWrapper(gym.Env): - """ - Gymnasium environment class interface. Wraps the simulation, trajectory builder, observation builder, and reward builder. - """ - - metadata = {"render_modes": ["rgb_array"]} # TODO: Figure out the purpose of this metadata. - - def __init__( - self, - scenario_sampler: AbstractScenarioSampler, - simulation_builder: AbstractSimulationBuilder, - output_converter: AbstractOutputConverter, - observation_builder: AbstractGymObservation, - reward_builder: AbstractRewardBuilder, - terminate_on_failure: bool = False, - ): - """ - Initializes the EnvironmentWrapper. - :param scenario_sampler: Scenario sampler to sample scenarios for the environment. - :param simulation_builder: Simulation builder to create the simulation from the sampled scenario. - :param trajectory_builder: Trajectory builder to create trajectories based on actions. - :param observation_builder: Observation builder to create observations from the simulation state. - :param reward_builder: Reward builder to create rewards based on the simulation state and actions. - :param terminate_on_failure: Whether to terminate during an error of the simulation, defaults to False - """ - - self._scenario_sampler = scenario_sampler - self._simulation_builder = simulation_builder - self._trajectory_builder = output_converter - self._observation_builder = observation_builder - self._reward_builder = reward_builder - - # lazy loaded - self._simulation_wrapper: Optional[SimulationWrapper] = None - - # timer - # TODO: Consider removing the timers. - self._reset_timer = Timer(end_key="reset_total") - self._step_timer = Timer(end_key="step_total") - - self._terminate_on_error = terminate_on_failure - - # Set for super class - self.observation_space = observation_builder.get_observation_space() - self.action_space = output_converter.get_action_space() - - def reset(self, seed: Optional[int] = None, options: Optional[dict] = None): - """Inherited, see superclass.""" - super().reset(seed=seed) - info: Dict[str, Any] = {} - - try: - - self._reward_builder.reset() - self._observation_builder.reset() - - self._reset_timer.flush() - self._reset_timer.start() - - scenario = self._scenario_sampler.sample(seed) - self._reset_timer.log("reset_1_sample_scenario") - - simulation = self._simulation_builder.build_simulation(scenario) - self._reset_timer.log("reset_2_build_simulation") - - self._simulation_wrapper = SimulationWrapper(simulation) - self._reset_timer.log("reset_3_wrap_simulation") - - ( - planner_input, - planner_initialization, - ) = self._simulation_wrapper.initialize() - self._reset_timer.log("reset_4_init_wrapper") - - observation = self._observation_builder.get_gym_observation(planner_input, planner_initialization, info) - self._reset_timer.log("reset_5_build_observation") - self._reset_timer.end() - - info["timing"] = self._reset_timer.info() - - except Exception: - logger.warning(f"{type(self).__name__} failed during .reset() with the following exception:") - traceback.print_exc() - - if self._terminate_on_error: - sys.exit(1) - else: - observation = create_zero_like_observation(self.observation_space) - - return observation, info - - def step(self, action): - """Inherited, see superclass.""" - info: Dict[str, Any] = {} - - try: - assert self._simulation_wrapper is not None - - self._step_timer.flush() - self._step_timer.start() - - trajectory = self._trajectory_builder.build_trajectory( - action, self._simulation_wrapper.current_ego_state, info - ) - self._step_timer.log("step_1_build_trajectory") - - planner_input, is_simulation_running = self._simulation_wrapper.step(trajectory) - self._step_timer.log("step_2_simulation_step") - - reward, termination, truncation = self._reward_builder.build_reward(self._simulation_wrapper, info) - termination = termination or not is_simulation_running - self._step_timer.log("step_3_build_reward") - - observation = self._observation_builder.get_gym_observation( - planner_input, self._simulation_wrapper.planner_initialization, info - ) - - self._step_timer.log("step_4_build_observation") - self._step_timer.end() - - info["timing"] = self._step_timer.info() - - except Exception: - logger.warning(f"{type(self).__name__} failed during .step() with the following exception:") - traceback.print_exc() - - if self._terminate_on_error: - sys.exit(1) # Exit with error code 1 - else: - observation = create_zero_like_observation(self.observation_space) - reward = 0.0 - termination = truncation = True - - return observation, reward, termination, truncation, info - - def close(self): - """Inherited, see superclass.""" - # TODO: Figure out the purpose of this function. :D - logger.info("EnvironmentWrapper close!") - - -def create_zero_like_observation( - observation_space: gym.spaces, -) -> Dict[str, npt.NDArray]: - """ - TODO: Consider moving elsewhere. - Creates a zero-like gymnasium observation given the observation space. - :param observation_space: Gymnasium observation space. - :raises TypeError: Invalid observation space. - :return: zero-like gymnasium observation - """ - if isinstance(observation_space, gym.spaces.Discrete): - return 0 - elif isinstance(observation_space, gym.spaces.Box): - return np.zeros(observation_space.shape, dtype=observation_space.dtype) - elif isinstance(observation_space, gym.spaces.MultiBinary): - return np.zeros(observation_space.shape, dtype=np.int8) - elif isinstance(observation_space, gym.spaces.MultiDiscrete): - return np.zeros(observation_space.shape, dtype=np.int64) - elif isinstance(observation_space, gym.spaces.Dict): - return {key: create_zero_like_observation(subspace) for key, subspace in observation_space.spaces.items()} - elif isinstance(observation_space, gym.spaces.Tuple): - return tuple(create_zero_like_observation(subspace) for subspace in observation_space.spaces) - else: - raise TypeError(f"Unsupported space type: {type(observation_space)}") diff --git a/d123/simulation/gym/environment/gym_observation/abstract_gym_observation.py b/d123/simulation/gym/environment/gym_observation/abstract_gym_observation.py deleted file mode 100644 index 819e5052..00000000 --- a/d123/simulation/gym/environment/gym_observation/abstract_gym_observation.py +++ /dev/null @@ -1,36 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict - -from gymnasium import spaces - -from d123.simulation.planning.abstract_planner import PlannerInitialization, PlannerInput - - -class AbstractGymObservation(ABC): - """Abstract class for building observations in a gym environment.""" - - @abstractmethod - def reset(self) -> None: - """Reset the observation builder.""" - - @abstractmethod - def get_observation_space(self) -> spaces.Space: - """ - Get the observation space of the environment. - :return: Observation space as a gymnasium Space. - """ - - @abstractmethod - def get_gym_observation( - self, - planner_input: PlannerInput, - planner_initialization: PlannerInitialization, - info: Dict[str, Any], - ) -> Dict[str, Any]: - """ - Build an observation from the planner input and initialization. - :param planner_input: Planner input as defined in the d123 interface. - :param planner_initialization: Planner initialization as defined in the d123 interface. - :param info: Arbitrary information dictionary, for passing information between modules. - :return: Observation as a named dictionary. - """ diff --git a/d123/simulation/gym/environment/gym_observation/raster/raster_gym_observation.py b/d123/simulation/gym/environment/gym_observation/raster/raster_gym_observation.py deleted file mode 100644 index 51072e1f..00000000 --- a/d123/simulation/gym/environment/gym_observation/raster/raster_gym_observation.py +++ /dev/null @@ -1,276 +0,0 @@ -from __future__ import annotations - -import math -from enum import IntEnum -from typing import Any, Dict, List, Optional - -import numpy as np -import numpy.typing as npt -from gymnasium import spaces - -from d123.simulation.gym.environment.gym_observation.abstract_gym_observation import AbstractGymObservation -from d123.simulation.gym.environment.gym_observation.raster.raster_renderer import RasterRenderer -from d123.simulation.gym.environment.helper.environment_area import AbstractEnvironmentArea -from d123.simulation.gym.environment.helper.environment_cache import ( - BoxDetectionCache, - MapCache, - build_environment_caches, -) -from d123.simulation.planning.abstract_planner import PlannerInitialization, PlannerInput - - -def del_keys_in_dict(info: Dict[str, Any], keys: List[str]) -> None: - """ - Deletes specified keys from the info dictionary if they exist. - :param info: Dictionary from which keys will be deleted. - :param keys: List of keys to delete from the dictionary. - """ - for key in keys: - if key in info.keys(): - del info[key] - - -class RasterObservationType(IntEnum): - """Enum to represent behavior at different stages in the RasterGymObservation.""" - - INFERENCE = 0 - RESET = 1 - STEP = 2 - - -class RasterGymObservation(AbstractGymObservation): - """Default raster observation builder for the CaRL model.""" - - def __init__( - self, - environment_area: AbstractEnvironmentArea, - renderer: RasterRenderer, - obs_num_measurements: int = 10, - num_value_measurements: int = 4, - action_space_dim: int = 2, - inference: bool = False, - ) -> None: - """ - Initializes the RasterGymObservation. - :param environment_area: Environment area to be used for rendering. - :param renderer: Renderer class, see implementation of DefaultRenderer. - :param obs_num_measurements: number of observation measurements passed to the policy, defaults to 10 - :param num_value_measurements: number of value measurements passed to the value network, defaults to 4 - :param action_space_dim: dimension of action space (steering and acceleration), defaults to 2 - :param inference: Whether the observation builder is used during inference, defaults to False - """ - - self._environment_area = environment_area - self._renderer = renderer - - self._obs_num_measurements = obs_num_measurements - self._num_value_measurements = num_value_measurements - self._action_space_dim = action_space_dim - - self._inference = inference - - # lazy loaded during inference - # NOTE: route roadblocks of current scenario are stored, as they may require correction during inference - self._route_lane_group_ids: Optional[List[str]] = None - - def reset(self) -> None: - """Inherited, see superclass.""" - self._route_lane_group_ids = None - - def get_observation_space(self) -> spaces.Space: - """Inherited, see superclass.""" - return spaces.Dict( - { - "bev_semantics": spaces.Box( - 0, - 255, - shape=self._renderer.shape, - dtype=np.uint8, - ), - "measurements": spaces.Box( - -math.inf, - math.inf, - shape=(self._obs_num_measurements,), - dtype=np.float32, - ), - "value_measurements": spaces.Box( - -math.inf, - math.inf, - shape=(self._num_value_measurements,), - dtype=np.float32, - ), - } - ) - - def get_gym_observation( - self, - planner_input: PlannerInput, - planner_initialization: PlannerInitialization, - info: Dict[str, Any], - ) -> Dict[str, Any]: - """Inherited, see superclass.""" - - if self._inference: - observation_type = RasterObservationType.INFERENCE - elif planner_input.iteration.index == 0: - observation_type = RasterObservationType.RESET - else: - observation_type = RasterObservationType.STEP - - observation = {} - observation["bev_semantics"] = self._get_bev_semantics( - planner_input, - planner_initialization, - observation_type, - info, - ) - observation["measurements"] = self._get_build_measurements(planner_input, observation_type, info) - observation["value_measurements"] = self._get_value_measurements(observation_type, info) - - return observation - - def _get_bev_semantics( - self, - planner_input: PlannerInput, - planner_initialization: PlannerInitialization, - observation_type: RasterObservationType, - info: Dict[str, Any], - ) -> npt.NDArray[np.uint8]: - """ - Helper function to build BEV raster of the current environment step. - :param planner_input: Planner input interface of d123. - :param planner_initialization: Planner initialization interface of d123. - :param observation_type: Enum whether to render for inference, reset or step. - :param info: Arbitrary information dictionary, for passing information between modules. - :raises ValueError: If the DefaultObservationType is invalid. - :return: BEV raster as a numpy array. - """ - # FIXME: - # if observation_type == RasterObservationType.INFERENCE: - - if observation_type in [RasterObservationType.INFERENCE, RasterObservationType.RESET]: - map_cache, detection_cache = build_environment_caches( - planner_input, planner_initialization, self._environment_area - ) - elif observation_type == RasterObservationType.STEP: - assert "map_cache" in info.keys() - assert "detection_cache" in info.keys() - assert isinstance(info["map_cache"], MapCache) - assert isinstance(info["detection_cache"], BoxDetectionCache) - map_cache, detection_cache = info["map_cache"], info["detection_cache"] - else: - raise ValueError("RasterObservationType is invalid") - - del_keys_in_dict(info, ["map_cache", "detection_cache"]) - return self._renderer.render(map_cache, detection_cache) - - def _get_build_measurements( - self, - planner_input: PlannerInput, - observation_type: RasterObservationType, - info: Dict[str, Any], - ) -> npt.NDArray[np.float32]: - """ - Helper function to build measurements of the current environment step. - :param planner_input: Planner input interface of d123. - :param observation_type: Enum whether to render for inference, reset or step. - :param info: Arbitrary information dictionary, for passing information between modules. - :return: Ego measurements as a numpy array. - """ - - if "last_action" in info.keys(): - assert observation_type in [ - RasterObservationType.INFERENCE, - RasterObservationType.STEP, - ] - assert len(info["last_action"]) == self._action_space_dim - last_action = info["last_action"] - else: - assert observation_type in [ - RasterObservationType.INFERENCE, - RasterObservationType.RESET, - ] - last_action = np.zeros(self._action_space_dim, dtype=np.float32) - - ego_state = planner_input.history.current_state[0] - last_acceleration, last_steering_rate = last_action[0], last_action[1] - - # FIXME: rear axle to center conversion of kinematic states - # state_array = ego_state_to_center_state_array(ego_state) - # observation_measurements = np.array( - # [ - # last_acceleration, - # last_steering_rate, - # state_array[StateIndex.VELOCITY_X], - # state_array[StateIndex.VELOCITY_Y], - # state_array[StateIndex.ACCELERATION_X], - # state_array[StateIndex.ACCELERATION_Y], - # state_array[StateIndex.STEERING_ANGLE], - # state_array[StateIndex.STEERING_RATE], - # state_array[StateIndex.ANGULAR_VELOCITY], - # state_array[StateIndex.ANGULAR_ACCELERATION], - # ], - # dtype=np.float32, - # ) - observation_measurements = np.array( - [ - last_acceleration, - last_steering_rate, - ego_state.dynamic_state_se2.velocity.x, - ego_state.dynamic_state_se2.velocity.y, - ego_state.dynamic_state_se2.acceleration.x, - ego_state.dynamic_state_se2.acceleration.y, - ego_state.tire_steering_angle, - ego_state.dynamic_state_se2.tire_steering_rate, - ego_state.dynamic_state_se2.angular_velocity, - ego_state.dynamic_state_se2.angular_acceleration, - ], - dtype=np.float32, - ) - del_keys_in_dict(info, ["last_action"]) - return observation_measurements - - def _get_value_measurements( - self, - observation_type: RasterObservationType, - info: Dict[str, Any], - ) -> npt.NDArray[np.float32]: - """ - Helper function to build value measurements of the current environment step. - :param observation_type: Enum whether to render for inference, reset or step. - :param info: Arbitrary information dictionary, for passing information between modules. - :raises ValueError: If the DefaultObservationType is invalid. - :return: Value measurements as a numpy array. - """ - - if observation_type in [ - RasterObservationType.INFERENCE, - RasterObservationType.RESET, - ]: - remaining_time = 1.0 - remaining_progress = 1.0 - comfort_score = 1.0 - ttc_score = 1.0 - elif observation_type == RasterObservationType.STEP: - assert "remaining_time" in info.keys() - assert "remaining_progress" in info.keys() - assert "comfort_score" in info.keys() - assert "ttc_score" in info.keys() - remaining_time = info["remaining_time"] - remaining_progress = info["remaining_progress"] - comfort_score = info["comfort_score"] - ttc_score = info["ttc_score"] - else: - raise ValueError("DefaultObservationType is invalid") - - value_measurements = np.array( - [ - remaining_time, - remaining_progress, - comfort_score, - ttc_score, - ], - dtype=np.float32, - ) - del_keys_in_dict(info, ["remaining_time", "remaining_progress", "comfort_score", "ttc_score"]) - return value_measurements diff --git a/d123/simulation/gym/environment/gym_observation/raster/raster_renderer.py b/d123/simulation/gym/environment/gym_observation/raster/raster_renderer.py deleted file mode 100644 index c5a4d57a..00000000 --- a/d123/simulation/gym/environment/gym_observation/raster/raster_renderer.py +++ /dev/null @@ -1,505 +0,0 @@ -from __future__ import annotations - -import itertools -from enum import IntEnum -from functools import cached_property -from typing import Callable, Dict, Final, List, Optional, Tuple - -import cv2 -import numpy as np -import numpy.typing as npt -from shapely import LineString, Polygon, union_all -from shapely.affinity import scale as shapely_scale - -from d123.common.datatypes.detection.detection import BoxDetectionSE2, TrafficLightStatus -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.transform.se2_array import convert_absolute_to_relative_point_2d_array -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.vector import Vector2D -from d123.simulation.gym.environment.helper.environment_area import AbstractEnvironmentArea, RectangleEnvironmentArea -from d123.simulation.gym.environment.helper.environment_cache import BoxDetectionCache, MapCache - -# TODO: add to config -MIN_VALUE: Final[int] = 0 # Lowest value for a pixel in the raster -MAX_VALUE: Final[int] = 255 # Highest value for a pixel in the raster -LINE_THICKNESS: int = 1 # Width of the lines in pixels -TRAFFIC_LIGHT_VALUE: Dict[TrafficLightStatus, int] = { - TrafficLightStatus.GREEN: 80, - TrafficLightStatus.YELLOW: 170, - TrafficLightStatus.RED: 255, -} -UNIONIZE: Final[bool] = False # Whether to unionize polygons before rendering - - -class PolygonRenderType(IntEnum): - CONVEX_SINGLE = 0 - NON_CONVEX_SINGLE = 1 - NON_CONVEX_BATCH = 2 - - -RenderFunc = Callable[[npt.NDArray[np.uint8], List[npt.NDArray[np.int32]], int], None] - - -def _render_polygon_convex_single( - raster: npt.NDArray[np.uint8], pixel_exteriors: List[npt.NDArray[np.int32]], color: int -) -> None: - """ - Renders a list of convex polygons on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param pixel_exteriors: List of pixel exteriors of the polygons to render. - :param color: Color to render the polygons in, as an integer value. - """ - for pixel_exterior in pixel_exteriors: - cv2.fillConvexPoly(raster, pixel_exterior, color=color) - - -def _render_polygon_non_convex_single( - raster: npt.NDArray[np.uint8], pixel_exteriors: List[npt.NDArray[np.int32]], color: int -) -> None: - """ - Renders a list of non-convex polygons on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param pixel_exteriors: List of pixel exteriors of the polygons to render. - :param color: Color to render the polygons in, as an integer value. - """ - for pixel_exterior in pixel_exteriors: - cv2.fillPoly(raster, [pixel_exterior], color=color) - - -def _render_polygon_non_convex_batch( - raster: npt.NDArray[np.uint8], pixel_exteriors: List[npt.NDArray[np.int32]], color: int -) -> None: - """ - Renders a list of non-convex polygons on the raster batch-wise. - :param raster: uint8 numpy array representing the raster to render on. - :param pixel_exteriors: List of pixel exteriors of the polygons to render. - :param color: Color to render the polygons in, as an integer value. - """ - cv2.fillPoly(raster, pixel_exteriors, color=color) - - -POLYGON_RENDER_FUNCTIONS: Dict[PolygonRenderType, RenderFunc] = { - PolygonRenderType.CONVEX_SINGLE: _render_polygon_convex_single, - PolygonRenderType.NON_CONVEX_SINGLE: _render_polygon_non_convex_single, - PolygonRenderType.NON_CONVEX_BATCH: _render_polygon_non_convex_batch, -} - - -def unionize_polygons(polygons: List[Polygon], grid_size: Optional[float] = None) -> List[Polygon]: - """ - Unionizes a list of polygons into a single polygon or multiple polygons if they are disjoint. - :param polygons: List of polygons to unionize. - :param grid_size: Precision grid size for union call, defaults to None - :return: List of polygon(s) after unionization. - """ - unionized_polygons: List[Polygon] = [] - if len(polygons) == 1: - unionized_polygons.append(polygons[0]) - elif len(polygons) > 1: - union_polygon = union_all(polygons, grid_size=grid_size) - if union_polygon.geom_type == "Polygon": - unionized_polygons.append(union_polygon) - elif union_polygon.geom_type == "MultiPolygon": - for polygon in union_polygon.geoms: - unionized_polygons.append(polygon) - return unionized_polygons - - -class RasterRenderer: - """Renderer class for observation used in CaRL.""" - - def __init__( - self, - environment_area: AbstractEnvironmentArea, - pixel_per_meter: float = 2.0, - max_vehicle_speed: float = 30.0, - max_pedestrian_speed: float = 4.0, - vehicle_scaling: float = 1.0, - pedestrian_scaling: float = 1.0, - static_scaling: float = 1.0, - include_speed_line: bool = False, - ) -> None: - """ - Initializes the DefaultRenderer object. - :param environment_area: Area to render the observation in (should be rectangular). - :param pixel_per_meter: number of pixels that should represent a meter in raster, defaults to 2.0 - :param max_vehicle_speed: Max vehicle speed after clipping for rendering the color, defaults to 30.0 - :param max_pedestrian_speed: Max pedestrian speed after clipping for rendering the color, defaults to 4.0 - :param vehicle_scaling: Factor to scale size of vehicle bounding boxes, defaults to 1.0 - :param pedestrian_scaling: Factor to scale size of pedestrian bounding boxes, defaults to 1.0 - :param static_scaling: Factor to scale size of static object bounding boxes, defaults to 1.0 - :param include_speed_line: Whether to include the constant velocity speed line into the raster, defaults to False - """ - - assert isinstance( - environment_area, RectangleEnvironmentArea - ), "DefaultRendering requires a rectangular environment area!" - - self._environment_area = environment_area - self._pixel_per_meter = pixel_per_meter # [ppm] - - self._max_vehicle_speed = max_vehicle_speed # [m/s] - self._max_pedestrian_speed = max_pedestrian_speed # [m/s] - - self._vehicle_scaling = vehicle_scaling - self._pedestrian_scaling = pedestrian_scaling - self._static_scaling = static_scaling - - self._include_speed_line = include_speed_line - - # maybe remove: - self._polygon_render_type = PolygonRenderType.NON_CONVEX_SINGLE - - @cached_property - def pixel_frame(self) -> Tuple[int, int]: - """ - :return: Width and height of the pixel frame in pixels. - """ - width, height = self._environment_area.frame - return int(width * self._pixel_per_meter), int(height * self._pixel_per_meter) - - @property - def shape(self) -> Tuple[int, int]: - """ - :return: Shape of the raster (including channel, width, height). - """ - width, height = self.pixel_frame - return (9, width, height) - - @property - def _meter_per_pixel(self) -> float: - """ - :return: Meters per pixel, i.e., the inverse of pixel_per_meter. - """ - return 1 / self._pixel_per_meter - - def _scale_to_color(self, value: Optional[float], max_value: float) -> int: - """ - Scales a value to a color in the range [0, 255]. - :param value: Value to scale, if None, max_value is used instead. - :param max_value: Maximum value to scale to color. - :return: Scaled color value in the range [0, 255]. - """ - _value = value - if value is None: - _value = max_value - normed = np.clip(_value / max_value, 0.0, 1.0) - normed_color = np.clip(int((MAX_VALUE / 2) * normed + (MAX_VALUE / 2)), MIN_VALUE, MAX_VALUE) - return int(normed_color) - - def _scale_polygon(self, polygon: Polygon, factor: float) -> Polygon: - """ - Scales a polygon in size by a factor. - :param polygon: shapely polygon to scale. - :param factor: Scaling factor, e.g., 1.0 for no scaling, 0.5 for half size. - :return: Scaled polygon. - """ - if factor != 1.0: - polygon = shapely_scale(polygon, xfact=factor, yfact=factor, origin="centroid") - return polygon - - def _local_coords_to_pixel(self, coords: npt.NDArray[np.float32]) -> npt.NDArray[np.int32]: - """ - Converts local coordinates to pixel coordinates. - :param coords: Local coordinates to convert, shape (N, 2). - :return: Integer pixel coordinates, shape (N, 2). - """ - pixel_width, pixel_height = self.pixel_frame - pixel_center = np.array([[pixel_height, pixel_width]]) / 2.0 - pixel_coords = (coords * self._pixel_per_meter) + pixel_center - return pixel_coords.astype(np.int32) - - def _global_coords_to_pixel(self, origin: StateSE2, coords: npt.NDArray[np.float64]) -> npt.NDArray[np.int32]: - """ - Converts global coordinates to pixel coordinates. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param coords: Global coordinates to convert, shape (N, 2). - :return: Integer pixel coordinates, shape (N, 2). - """ - local_coords = convert_absolute_to_relative_point_2d_array(origin, coords) - return self._local_coords_to_pixel(local_coords) - - def _global_polygon_to_pixel(self, origin: StateSE2, polygon: Polygon) -> npt.NDArray[np.int32]: - """ - Converts a global polygon to pixel coordinates. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param polygon: Shapely polygon to convert. - :return: Integer pixel coordinates of the polygon exterior, shape (N, 1, 2). - """ - exterior = np.array(polygon.exterior.coords).reshape((-1, 1, 2)) - return self._global_coords_to_pixel(origin, exterior) - - def _global_linestring_to_pixel(self, origin: StateSE2, linestring: LineString) -> npt.NDArray[np.int32]: - """ - Converts a global linestring to pixel coordinates. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param linestring: Shapely linestring to convert. - :return: Integer pixel coordinates of the linestring, shape (N, 1, 2). - """ - coords = np.array(linestring.coords).reshape((-1, 1, 2)) - return self._global_coords_to_pixel(origin, coords) - - def _render_polygons( - self, - raster: npt.NDArray[np.uint8], - origin: StateSE2, - polygons: List[Polygon], - color: int = MAX_VALUE, - ) -> None: - """ - Renders a list of arbitrary polygons on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param polygons: List of shapely polygons to render. - :param color: Integer value of color, defaults to MAX_VALUE - """ - if len(polygons) > 0: - pixel_exteriors: List[npt.NDArray[np.int32]] = [] - if UNIONIZE: - polygons = unionize_polygons(polygons, grid_size=None) - - for polygon in polygons: - pixel_exteriors.append(self._global_polygon_to_pixel(origin, polygon)) - POLYGON_RENDER_FUNCTIONS[PolygonRenderType.NON_CONVEX_SINGLE](raster, pixel_exteriors, color) - - def _render_convex_polygons( - self, - raster: npt.NDArray[np.uint8], - origin: StateSE2, - polygons: List[Polygon], - color: int = MAX_VALUE, - ) -> None: - """ - Renders a list of convex polygons on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param polygons: List of shapely polygons to render. - :param color: Integer value of color, defaults to MAX_VALUE - """ - if len(polygons) > 0: - pixel_exteriors: List[npt.NDArray[np.int32]] = [] - for polygon in polygons: - pixel_exteriors.append(self._global_polygon_to_pixel(origin, polygon)) - POLYGON_RENDER_FUNCTIONS[PolygonRenderType.CONVEX_SINGLE](raster, pixel_exteriors, color) - - def _render_linestrings( - self, - raster: npt.NDArray[np.uint8], - origin: StateSE2, - linestrings: List[LineString], - color: int = MAX_VALUE, - ) -> None: - """ - Renders a list of linestrings on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param linestrings: List of shapely linestrings to render. - :param color: Integer value of color, defaults to MAX_VALUE - """ - if len(linestrings) > 0: - pixel_linestrings: List[npt.NDArray[np.int32]] = [] - for linestring in linestrings: - pixel_linestrings.append(self._global_linestring_to_pixel(origin, linestring)) - cv2.polylines( - raster, - pixel_linestrings, - isClosed=False, - color=color, - thickness=LINE_THICKNESS, - ) - - def _render_speed_line( - self, raster: npt.NDArray[np.uint8], origin: StateSE2, box_detection: BoxDetectionSE2, color: int - ) -> None: - """ - Renders a speed line for the agent on the raster. - :param raster: uint8 numpy array representing the raster to render on. - :param origin: SE2 of origin, i.e. the center of the raster in global coordinates. - :param agent: Agent object containing the state and velocity. - :param color: Integer value of color - """ - if box_detection.velocity.magnitude() > self._meter_per_pixel: - future = translate_along_yaw( - pose=box_detection.center, - translation=Vector2D( - x=box_detection.bounding_box_se2.half_length + box_detection.velocity.magnitude(), - y=0.0, - ), - ) - linestring = LineString( - [ - [box_detection.center.x, box_detection.center.y], - [future.x, future.y], - ] - ) - self._render_linestrings(raster, origin, [linestring], color=color) - - def _get_empty_raster(self) -> npt.NDArray[np.uint8]: - """ - Helper function to create an empty raster with the shape of the pixel frame. - :return: Empty raster with the shape of the pixel frame. - """ - pixel_width, pixel_height = self.pixel_frame - return np.zeros((pixel_width, pixel_height), dtype=np.uint8) - - def _render_map_from_cache(self, map_cache: MapCache) -> List[npt.NDArray[np.uint8]]: - """ - Renders the map from the map cache into a list of rasters. - :param map_cache: MapCache object containing the map data. - :return: List of rasters representing the map data. - """ - - # 1. Drivable Area (Lane group, Intersection, Car-Park), Polygon - # 2. Route (Lane group), Polygon - # 3. Lane Boundaries (Lane), Polyline - # 6. Traffic Light (Lane), Polygon - # 7. Stop-Signs (Stop-Signs), Polygon FIXME - # 8. Speed-Signs (Lane), Polygon FIXME - drivable_area_raster = self._get_empty_raster() - route_raster = self._get_empty_raster() - lane_boundary_raster = self._get_empty_raster() - traffic_light_raster = self._get_empty_raster() - stop_sign_raster = self._get_empty_raster() - speed_raster = self._get_empty_raster() - - mask = self._get_empty_raster() - drivable_area_polygons: List[Polygon] = [] - stop_sign_polygons: List[Polygon] = [] - lane_boundary_linestrings: List[LineString] = [] - - for lane_group_id, lane_group in map_cache.lane_groups.items(): - # Roadblock: (1) drivable_area_raster, (2) route_raster - not_intersection = lane_group.intersection is None - on_route = lane_group_id in map_cache.route_lane_group_ids - - if not_intersection or on_route: - self._render_polygons(mask, map_cache.origin, [lane_group.shapely_polygon], color=MAX_VALUE) - - if not_intersection: - # Lane group without intersection: (1) drivable_area_raster - drivable_area_raster[mask == MAX_VALUE] = MAX_VALUE - - if on_route: - route_raster[mask == MAX_VALUE] = MAX_VALUE - mask.fill(0) - - for lane_id, lane in map_cache.lanes.items(): - # Lane: (3) lane_boundary_raster, (4) traffic_light_raster, (6) speed_raster - # - (3) lane_boundary_raster - if lane.lane_group.intersection is None: - lane_boundary_linestrings.extend( - [lane.left_boundary.polyline_2d.linestring, lane.right_boundary.polyline_2d.linestring] - ) - - # (4) traffic_light_raster - self._render_linestrings(mask, map_cache.origin, [lane.centerline.polyline_2d.linestring], color=MAX_VALUE) - if lane_id in map_cache.traffic_lights.keys(): - traffic_light_status = map_cache.traffic_lights[lane_id] - traffic_light_raster[mask == MAX_VALUE] = TRAFFIC_LIGHT_VALUE[traffic_light_status] - - # (6) speed_raster - speed_raster[mask == MAX_VALUE] = self._scale_to_color(lane.speed_limit_mps, self._max_vehicle_speed) - mask.fill(0) - - for drivable_area_element in itertools.chain(map_cache.intersections.values(), map_cache.car_parks.values()): - # Intersections & Carparks: (1) drivable_area_raster - drivable_area_polygons.append(drivable_area_element.shapely_polygon) - - for stop_sign in map_cache.stop_lines.values(): - # Stop Signs: (1) stop_sign_raster - stop_sign_polygons.append(stop_sign.shapely_polygon) - - self._render_polygons(drivable_area_raster, map_cache.origin, drivable_area_polygons, color=MAX_VALUE) - self._render_polygons(stop_sign_raster, map_cache.origin, stop_sign_polygons, color=MAX_VALUE) - self._render_linestrings(lane_boundary_raster, map_cache.origin, lane_boundary_linestrings, color=MAX_VALUE) - - return [ - drivable_area_raster, - route_raster, - lane_boundary_raster, - traffic_light_raster, - stop_sign_raster, - speed_raster, - ] - - def _render_detections_from_cache(self, box_detection_cache: BoxDetectionCache) -> List[npt.NDArray[np.uint8]]: - """ - Renders the detections from the detection cache into a list of rasters. - :param detection_cache: DetectionCache object containing the detection data. - :return: List of rasters representing the detection data. - """ - - mask = self._get_empty_raster() - - # 1. Vehicles (Vehicles, Bicycles), Polygon, LineString - # 2. Pedestrians+Static (Pedestrians, Static objects), Polygon - vehicles_raster = self._get_empty_raster() - pedestrians_raster = self._get_empty_raster() - ego_raster = self._get_empty_raster() - - # 1. Vehicles - for vehicle in box_detection_cache.vehicles: - if self._include_speed_line: - self._render_speed_line(mask, box_detection_cache.origin, vehicle, color=MAX_VALUE) - - polygon: Polygon = self._scale_polygon(vehicle.bounding_box_se2.shapely_polygon, self._vehicle_scaling) - self._render_convex_polygons(mask, box_detection_cache.origin, [polygon], color=MAX_VALUE) - vehicles_raster[mask > 0] = self._scale_to_color( - vehicle.velocity.magnitude(), - self._max_vehicle_speed, - ) - mask.fill(0) - - # 2. Pedestrian - for pedestrian in box_detection_cache.pedestrians: - if self._include_speed_line: - self._render_speed_line(mask, box_detection_cache.origin, pedestrian, color=MAX_VALUE) - - polygon: Polygon = self._scale_polygon( - pedestrian.bounding_box_se2.shapely_polygon, self._pedestrian_scaling - ) - self._render_convex_polygons(mask, box_detection_cache.origin, [polygon], color=MAX_VALUE) - pedestrians_raster[mask > 0] = self._scale_to_color( - pedestrian.velocity.magnitude(), - self._max_pedestrian_speed, - ) - mask.fill(0) - - # 3. Static Objects - static_polygons: List[Polygon] = [] - for static_object in box_detection_cache.static_objects: - polygon: Polygon = self._scale_polygon(static_object.bounding_box_se2.shapely_polygon, self._static_scaling) - static_polygons.append(polygon) - self._render_convex_polygons( - pedestrians_raster, - box_detection_cache.origin, - static_polygons, - color=self._scale_to_color(0.0, self._max_vehicle_speed), - ) - - # 4. Ego Vehicle - ego_detection = box_detection_cache.ego_state.box_detection_se2 - if self._include_speed_line: - self._render_speed_line(mask, box_detection_cache.origin, ego_detection, color=MAX_VALUE) - - ego_polygon: Polygon = self._scale_polygon(ego_detection.shapely_polygon, self._vehicle_scaling) - self._render_convex_polygons(mask, box_detection_cache.origin, [ego_polygon], color=MAX_VALUE) - ego_raster[mask > 0] = self._scale_to_color(ego_detection.velocity.magnitude(), self._max_vehicle_speed) - mask.fill(0) - - return [vehicles_raster, pedestrians_raster, ego_raster] - - def render(self, map_cache: MapCache, detection_cache: BoxDetectionCache) -> npt.NDArray[np.uint8]: - """ - Renders the map and detections from the caches into a single raster. - :param map_cache: MapCache object containing the map data. - :param detection_cache: DetectionCache object containing the detection data. - :return: Raster representing the map and detections. - """ - map_raster = self._render_map_from_cache(map_cache) - detection_raster = self._render_detections_from_cache(detection_cache) - - raster: npt.NDArray[np.uint8] = np.concatenate( - [channel[None, ...] for channel in (map_raster + detection_raster)], axis=0 - ) - return raster diff --git a/d123/simulation/gym/environment/helper/environment_area.py b/d123/simulation/gym/environment/helper/environment_area.py deleted file mode 100644 index 03f87cd1..00000000 --- a/d123/simulation/gym/environment/helper/environment_area.py +++ /dev/null @@ -1,78 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Tuple - -from shapely import Polygon - -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.vector import Vector2D - - -class AbstractEnvironmentArea(ABC): - """ - Abstract class for defining an environment area in a Gym simulation. - The area defines the area used for the observation, reward, and other simulation components. - """ - - @abstractmethod - def get_global_origin(self, ego_pose: StateSE2) -> StateSE2: - """ - Given the global ego pose, returns the global origin of the environment area. - :param ego_pose: Global ego pose in the environment. - :return: Global origin of the environment area. - """ - - @abstractmethod - def get_global_polygon(self, ego_pose: StateSE2) -> Polygon: - """ - Given the global ego pose, returns the environment area as 2D polygon. - :param ego_pose: Global ego pose in the environment. - :return: 2D polygon representing the environment area. - """ - - -class RectangleEnvironmentArea(AbstractEnvironmentArea): - def __init__( - self, - front: float = 78.0, - back: float = 50.0, - left: float = 64.0, - right: float = 64.0, - ) -> None: - """ - Initializes a rectangular environment area. - :param front: extent of area in the front of the ego vehicle [m], defaults to 78.0 - :param back: extent of area in back of the ego vehicle [m], defaults to 50.0 - :param left: extent of area to the left of the ego vehicle [m], defaults to 64.0 - :param right: extent of area to the right of the ego vehicle [m], defaults to 64.0 - """ - self._front = front - self._back = back - self._left = left - self._right = right - - @property - def frame(self) -> Tuple[float, float]: - """ - Returns the dimensions of the rectangle as a tuple (width, height). - :return: Tuple of width and height of the rectangle. - """ - return (self._left + self._right), (self._front + self._back) - - def get_global_origin(self, ego_pose: StateSE2) -> StateSE2: - """Inherited, see superclass.""" - width, height = self.frame - longitudinal_offset = (height / 2.0) - self._back - lateral_offset = (width / 2.0) - self._right - return translate_along_yaw(ego_pose, Vector2D(longitudinal_offset, lateral_offset)) - - def get_global_polygon(self, ego_pose: StateSE2) -> Polygon: - """Inherited, see superclass.""" - return Polygon( - [ - tuple(translate_along_yaw(ego_pose, Vector2D(self._front, self._left)).point_2d.array), # front left - tuple(translate_along_yaw(ego_pose, Vector2D(self._front, -self._right)).point_2d.array), # front right - tuple(translate_along_yaw(ego_pose, Vector2D(-self._back, -self._right)).point_2d.array), # rear right - tuple(translate_along_yaw(ego_pose, Vector2D(-self._back, self._left)).point_2d.array), # rear left - ] - ) diff --git a/d123/simulation/gym/environment/helper/environment_cache.py b/d123/simulation/gym/environment/helper/environment_cache.py deleted file mode 100644 index 55fbea92..00000000 --- a/d123/simulation/gym/environment/helper/environment_cache.py +++ /dev/null @@ -1,229 +0,0 @@ -from __future__ import annotations - -from functools import cached_property -from typing import Dict, List, Optional, Tuple - -from shapely import Polygon - -from d123.common.datatypes.detection.detection import ( - BoxDetectionSE2, - BoxDetectionWrapper, - TrafficLightDetectionWrapper, - TrafficLightStatus, -) -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.common.geometry.base import StateSE2 -from d123.common.geometry.occupancy_map import OccupancyMap2D -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.maps.abstract_map_objects import ( - AbstractCarpark, - AbstractCrosswalk, - AbstractIntersection, - AbstractLane, - AbstractLaneGroup, - AbstractStopLine, -) -from d123.dataset.maps.map_datatypes import MapLayer -from d123.simulation.gym.environment.helper.environment_area import AbstractEnvironmentArea -from d123.simulation.planning.abstract_planner import PlannerInitialization, PlannerInput - - -class MapCache: - """ - Helper class to save and load map-related data for the current environment area. - NOTE: This class helps to avoid Map API calls during observation and reward computation. - """ - - def __init__( - self, - ego_state: EgoStateSE2, - map_api: AbstractMap, - environment_area: AbstractEnvironmentArea, - traffic_lights: TrafficLightDetectionWrapper, - route_lane_group_ids: List[str], - load_crosswalks: bool = False, - load_stop_lines: bool = False, - ) -> None: - """ - Initializes the MapCache object. - :param ego_state: Current ego state in the environment. - :param map_api: Map interface of nuPlan maps. - :param environment_area: Area to cache map data for. - :param traffic_light_status: Current traffic light status data. - :param route_lane_group_ids: List of lane group ids for the ego route. - :param load_crosswalks: whether to load crosswalks, defaults to False - :param load_stop_lines: whether to load stop lines, defaults to False - """ - - self.ego_state = ego_state - self.map_api = map_api - self.environment_area = environment_area - self.load_crosswalks = load_crosswalks - self.load_stop_lines = load_stop_lines - - self.route_lane_group_ids = route_lane_group_ids - self.traffic_lights: Dict[str, TrafficLightStatus] = {str(data.lane_id): data.status for data in traffic_lights} - - self.lane_groups: Dict[str, AbstractLaneGroup] = {} - self.lanes: Dict[str, AbstractLane] = {} - - self.intersections: Dict[str, AbstractIntersection] = {} - self.stop_lines: Dict[str, AbstractStopLine] = {} - self.car_parks: Dict[str, AbstractCarpark] = {} - self.crosswalks: Dict[str, AbstractCrosswalk] = {} - self._load_cache() - - def _load_cache(self) -> None: - - query_map_layers = [MapLayer.LANE_GROUP, MapLayer.CARPARK] - # FIXME: Add stop lines and crosswalks to the map layers if needed - # if self.load_crosswalks: - # query_map_layers.append(MapLayer.CROSSWALK) - # if self.load_stop_lines: - # query_map_layers.append(MapLayer.STOP_LINE) - - map_object_dict = self.map_api.query( - geometry=self.environment_area.get_global_polygon(self.ego_state.center), - layers=query_map_layers, - predicate="intersects", - ) - - # 1. load (1.1) lane groups, (1.2) lanes, (1.3) intersections - for lane_group in map_object_dict[MapLayer.LANE_GROUP]: - lane_group: AbstractLaneGroup - self.lane_groups[lane_group.id] = lane_group - for lane in lane_group.lanes: - self.lanes[lane.id] = lane - optional_intersection = lane_group.intersection - if optional_intersection is not None: - self.intersections[optional_intersection.id] = optional_intersection - - # 2. load car parks - for car_park in map_object_dict[MapLayer.CARPARK]: - car_park: AbstractCarpark - self.car_parks[car_park.id] = car_park - - # FIXME: Add stop lines and crosswalks to the map layers if needed - # if self.load_crosswalks: - # for crosswalk in map_object_dict[MapLayer.CROSSWALK]: - # crosswalk: AbstractCarpark - # self.crosswalks[crosswalk.id] = crosswalk - - # if self.load_stop_lines: - # for stop_line in map_object_dict[MapLayer.STOP_LINE]: - # stop_line: AbstractStopLine - # self.stop_lines[stop_line.id] = stop_line - - @property - def drivable_area_map(self) -> OccupancyMap2D: - - tokens: List[str] = [] - polygons: List[Polygon] = [] - - # FIXME: Remove lane groups on intersections - for element_dict in [self.intersections, self.lane_groups, self.car_parks]: - for token, element in element_dict.items(): - tokens.append(token) - polygons.append(element.polygon) - return OccupancyMap2D(polygons, tokens) - - @cached_property - def origin(self) -> StateSE2: - """ - Returns the global origin of the environment area based on the ego state. - :return: Global origin of the environment area as StateSE2. - """ - return self.environment_area.get_global_origin(self.ego_state.center) - - -class BoxDetectionCache: - """Helper class to save and load detection-related data for the current environment area.""" - - def __init__( - self, - ego_state: EgoStateSE2, - box_detections: BoxDetectionWrapper, - environment_area: AbstractEnvironmentArea, - ) -> None: - """ - Initializes the BoxDetectionCache object. - :param ego_state: Ego vehicle state in the environment. - :param tracked_objects: Tracked objects wrapper of nuPlan. - :param environment_area: Area to cache detection data for. - """ - - self.ego_state = ego_state - self.environment_area = environment_area - self.tracked_objects = box_detections - - self.vehicles: List[BoxDetectionSE2] = [] - self.pedestrians: List[BoxDetectionSE2] = [] - self.static_objects: List[BoxDetectionSE2] = [] - self._load_cache(box_detections) - - def _load_cache(self, box_detections: BoxDetectionWrapper) -> None: - global_area_polygon = self.environment_area.get_global_polygon(self.ego_state.center) - - for box_detection in box_detections: - if global_area_polygon.contains(box_detection.center.shapely_point): - if box_detection.metadata.detection_type in [DetectionType.VEHICLE, DetectionType.BICYCLE]: - self.vehicles.append(box_detection) - elif box_detection.metadata.detection_type in [DetectionType.PEDESTRIAN]: - self.pedestrians.append(box_detection) - elif box_detection.metadata.detection_type in [ - DetectionType.CZONE_SIGN, - DetectionType.BARRIER, - DetectionType.TRAFFIC_CONE, - DetectionType.GENERIC_OBJECT, - ]: - self.static_objects.append(box_detection) - - @cached_property - def origin(self) -> StateSE2: - """ - Returns the global origin of the environment area based on the ego state. - :return: Global origin of the environment area as StateSE2. - """ - return self.environment_area.get_global_origin(self.ego_state.center) - - -def build_environment_caches( - planner_input: PlannerInput, - planner_initialization: PlannerInitialization, - environment_area: AbstractEnvironmentArea, - route_lane_group_ids: Optional[List[str]] = None, -) -> Tuple[MapCache, BoxDetectionCache]: - """ - Helper function to build the environment caches for the current planner input and initialization. - :param planner_input: Planner input interface of nuPlan, ego, detection, and traffic light data. - :param planner_initialization: Planner initialization interface of nuPlan, map API and route lane group ids. - :param environment_area: Area object used to cache the map and detection data. - :param route_lane_group_ids: Optional route lane group ids, to overwrite the planner initialization, defaults to None - :return: Tuple of MapCache and DetectionCache objects. - """ - - ego_state, detection_recording = planner_input.history.current_state - assert isinstance(detection_recording, DetectionRecording), "Recording must be of type DetectionRecording" - - # TODO: Add route correction? - route_lane_group_ids = planner_initialization.route_lane_group_ids - - # TODO: Add box detection filtering? - box_detections = detection_recording.box_detections - - map_cache = MapCache( - ego_state=ego_state, - map_api=planner_initialization.map_api, - environment_area=environment_area, - traffic_lights=detection_recording.traffic_light_detections, - route_lane_group_ids=route_lane_group_ids, - ) - detection_cache = BoxDetectionCache( - ego_state=ego_state, - box_detections=box_detections, - environment_area=environment_area, - ) - - return map_cache, detection_cache diff --git a/d123/simulation/gym/environment/output_converter/abstract_output_converter.py b/d123/simulation/gym/environment/output_converter/abstract_output_converter.py deleted file mode 100644 index 94f3ef0c..00000000 --- a/d123/simulation/gym/environment/output_converter/abstract_output_converter.py +++ /dev/null @@ -1,32 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict - -import numpy as np -import numpy.typing as npt -from gymnasium import spaces - -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput - - -class AbstractOutputConverter(ABC): - """Abstract class for building trajectories (nuPlan interface) in a Gym simulation environment.""" - - @abstractmethod - def get_action_space(self) -> spaces.Space: - """ - Returns the action space of the gym environment. - :return: gymnasium action space. - """ - - @abstractmethod - def get_planner_output( - self, action: npt.NDArray[np.float32], ego_state: EgoStateSE2, info: Dict[str, Any] - ) -> AbstractPlannerOutput: - """ - Builds a planner output based on the action and the current ego state. - :param action: Action taken by the agent, typically a numpy array. - :param ego_state: Current state of the ego vehicle. - :param info: Arbitrary information dictionary, for passing information between modules. - :return: Planner output object. - """ diff --git a/d123/simulation/gym/environment/output_converter/action_output_converter.py b/d123/simulation/gym/environment/output_converter/action_output_converter.py deleted file mode 100644 index ed8aff94..00000000 --- a/d123/simulation/gym/environment/output_converter/action_output_converter.py +++ /dev/null @@ -1,209 +0,0 @@ -from typing import Any, Dict, Optional - -import numpy as np -import numpy.typing as npt -from gymnasium import spaces - -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.simulation.gym.environment.output_converter.abstract_output_converter import AbstractOutputConverter -from d123.simulation.gym.policy.ppo.ppo_config import GlobalConfig -from d123.simulation.planning.planner_output.action_planner_output import ActionPlannerOutput - - -class ActionOutputConverter(AbstractOutputConverter): - """ - Default action trajectory builder for training CaRL. - TODO: Refactor this class - NOTE @DanielDauner: - We do an unclean hack here use an action (acceleration, steering) but package it into a Trajectory interface according to nuPlan. - The nuPlan simulation strictly requires a trajectory. We use a OneStageController to skip the controller and directly propagate the bicycle model. - You can create a new TrajectoryBuilder and SimulationBuilder to use a TwoStageController if you want to use a trajectory action. - """ - - def __init__( - self, - scale_max_acceleration: float = 2.4, - scale_max_deceleration: float = 3.2, - scale_max_steering_angle: float = 0.83775804096, - clip_max_abs_steering_rate: Optional[float] = None, - clip_max_abs_lon_jerk: Optional[float] = None, - clip_max_abs_yaw_accel: Optional[float] = None, - clip_angular_adjustment: bool = False, - convert_low_pass_acceleration: bool = False, - convert_low_pass_steering: bool = False, - disable_reverse_driving: bool = True, - ): - """ - Initializes the ActionTrajectoryBuilder. - :param scale_max_acceleration: max acceleration used for scaling the normed action [m/s^2], defaults to 2.4 - :param scale_max_deceleration: max deceleration (positive) used for scaling the normed action [m/s^2], defaults to 3.2 - :param scale_max_steering_angle: max absolute steering angle used for scaling the normed action [rad], defaults to 0.83775804096 - :param clip_max_abs_steering_rate: optional value to clip the steering rate [rad/s], defaults to None - :param clip_max_abs_lon_jerk: optional value to clip the longitudinal jerk [m/s^3], defaults to None - :param clip_max_abs_yaw_accel: optional value to clip the yaw acceleration [rad/s^2], defaults to None - :param clip_angular_adjustment: Whether to adjust the longitudinal acceleration for lower rotation, defaults to False - :param convert_low_pass_acceleration: Undo the low pass filtering of nuPlan's bicycle model, defaults to False - :param convert_low_pass_steering: Undo the low pass filtering of nuPlan's bicycle model, defaults to False - :param disable_reverse_driving: Whether to disable reverse driving with a controller, defaults to True - """ - self._scale_max_acceleration = scale_max_acceleration # [m/s^2] - self._scale_max_deceleration = scale_max_deceleration # [m/s^2] - self._scale_max_steering_angle = scale_max_steering_angle # [rad] - - self._clip_max_abs_steering_rate = clip_max_abs_steering_rate # [rad/s] - self._clip_max_abs_lon_jerk = clip_max_abs_lon_jerk # [m/s^3] - self._clip_max_abs_yaw_accel = clip_max_abs_yaw_accel # [rad/s^2] - self._clip_angular_adjustment = clip_angular_adjustment - - self._convert_low_pass_acceleration = convert_low_pass_acceleration - self._convert_low_pass_steering = convert_low_pass_steering - self._disable_reverse_driving = disable_reverse_driving - - self._config = GlobalConfig() - self._dt_control = 0.1 # [s] - self._accel_time_constant = 0.2 # [s] - self._steering_angle_time_constant = 0.05 # [s] - - def get_action_space(self) -> spaces.Space: - """Inherited, see superclass.""" - return spaces.Box( - self._config.action_space_min, - self._config.action_space_max, - shape=(self._config.action_space_dim,), - dtype=np.float32, - ) - - def get_planner_output( - self, action: npt.NDArray[np.float32], ego_state: EgoStateSE2, info: Dict[str, Any] - ) -> ActionPlannerOutput: - """Inherited, see superclass.""" - assert len(action) == self._config.action_space_dim - info["last_action"] = action - - action_acceleration_normed, action_steering_angle_normed = action - - target_steering_rate = self._scale_steering(action_steering_angle_normed, ego_state.tire_steering_angle) - target_acceleration = self._scale_acceleration( - action_acceleration_normed, ego_state.dynamic_state_se2.acceleration.x - ) - clipped_steering_rate = self._clip_steering(target_acceleration, target_steering_rate, ego_state) - clipped_acceleration = self._clip_acceleration(target_acceleration, clipped_steering_rate, ego_state) - return ActionPlannerOutput(clipped_acceleration, clipped_steering_rate, ego_state) - - def _scale_steering(self, action_steering_angle_normed: float, current_steering_angle: float) -> float: - """ - Scales the steering angle based on the action and current steering angle. - :param action_steering_angle_normed: Normalized steering angle action, typically in [-1, 1]. - :param current_steering_angle: Current steering angle of the ego vehicle. - :return: Scaled steering rate based on the action and current steering angle. - """ - - target_steering_angle = action_steering_angle_normed * self._scale_max_steering_angle - if self._convert_low_pass_steering: - factor = (self._dt_control + self._steering_angle_time_constant) / self._dt_control - target_steering_angle = (target_steering_angle - current_steering_angle) * factor + current_steering_angle - target_steering_rate = (target_steering_angle - current_steering_angle) / self._dt_control - return target_steering_rate - - def _scale_acceleration(self, action_acceleration_normed: float, current_acceleration: float) -> float: - """ - Scales the acceleration based on the action. - :param action_acceleration_normed: Normalized acceleration action, typically in [-1, 1]. - :param current_acceleration: Current acceleration of the ego vehicle. - :return: Scaled acceleration based on the action. - """ - if action_acceleration_normed >= 0: - target_acceleration = self._scale_max_acceleration * action_acceleration_normed - else: - target_acceleration = self._scale_max_deceleration * action_acceleration_normed - if self._convert_low_pass_acceleration: - factor = self._dt_control / (self._dt_control + self._accel_time_constant) - target_acceleration = (target_acceleration - current_acceleration) / factor + current_acceleration - return target_acceleration - - def _clip_acceleration( - self, target_acceleration: float, target_steering_rate: float, ego_state: EgoStateSE2 - ) -> float: - """ - Clips the acceleration based on the target acceleration, steering rate, and current ego state. - :param target_acceleration: Acceleration as targeted by the agent. - :param target_steering_rate: Steering rate as targeted by the agent. - :param ego_state: Current state of the ego vehicle. - :return: Clipped acceleration based on the target acceleration and steering rate. - """ - - current_acceleration = ego_state.dynamic_state_se2.acceleration.x - - if self._disable_reverse_driving: - speed = ego_state.dynamic_state_se2.velocity.x - updated_speed = speed + target_acceleration * self._dt_control - # * self._dt_control - if updated_speed < 0: - k_p, k_d = 1.0, 0.75 - error = -speed - dt_error = -current_acceleration - target_acceleration = k_p * error + k_d * dt_error - - if self._clip_max_abs_lon_jerk is not None: - max_acceleration_change = self._clip_max_abs_lon_jerk * self._dt_control - target_acceleration = np.clip( - target_acceleration, - current_acceleration - max_acceleration_change, - current_acceleration + max_acceleration_change, - ) - - _max_acceleration = self._scale_max_acceleration - if self._clip_angular_adjustment: - rear_axle_to_center_dist = ego_state.vehicle_parameters.rear_axle_to_center_longitudinal - - next_point_velocity_x = ego_state.dynamic_state_se2.velocity.x + target_acceleration * self._dt_control - next_point_tire_steering_angle = ego_state.tire_steering_angle + target_steering_rate * self._dt_control - next_point_angular_velocity = ( - next_point_velocity_x * np.tan(next_point_tire_steering_angle) / ego_state.vehicle_parameters.wheel_base - ) - next_point_angular_acceleration = ( - next_point_angular_velocity - ego_state.dynamic_state_se2.angular_velocity - ) / self._dt_control - - centripetal_acceleration_term = rear_axle_to_center_dist * (next_point_angular_velocity) ** 2 - angular_acceleration_term = rear_axle_to_center_dist * (next_point_angular_acceleration) - _max_acceleration -= centripetal_acceleration_term + angular_acceleration_term - - target_acceleration = np.clip(target_acceleration, -self._scale_max_deceleration, _max_acceleration) - return target_acceleration - - def _clip_steering(self, target_acceleration: float, target_steering_rate: float, ego_state: EgoStateSE2) -> float: - """ - Clips the steering rate based on the target acceleration and current ego state. - :param target_acceleration: Acceleration as targeted by the agent. - :param target_steering_rate: Steering rate as targeted by the agent. - :param ego_state: Current state of the ego vehicle. - :return: Clipped steering rate based on the target acceleration and steering rate. - """ - - current_steering_angle = ego_state.tire_steering_angle - target_steering_angle = current_steering_angle + target_steering_rate * self._dt_control - - if self._clip_max_abs_yaw_accel is not None: - wheel_base = ego_state.vehicle_parameters.wheel_base - target_velocity = ego_state.dynamic_state_se2.acceleration.x + target_acceleration * self._dt_control - - current_angular_velocity = ego_state.dynamic_state_se2.angular_velocity - max_abs_yaw_velocity = self._clip_max_abs_yaw_accel * self._dt_control - - min_angular_velocity = current_angular_velocity - max_abs_yaw_velocity - max_angular_velocity = current_angular_velocity + max_abs_yaw_velocity - - min_tire_steering_angle = np.arctan((min_angular_velocity * wheel_base) / target_velocity) - max_tire_steering_angle = np.arctan((max_angular_velocity * wheel_base) / target_velocity) - target_steering_angle = np.clip(target_steering_angle, min_tire_steering_angle, max_tire_steering_angle) - target_steering_rate = (target_steering_angle - current_steering_angle) / self._dt_control - - if self._clip_max_abs_steering_rate is not None: - target_steering_rate = np.clip( - target_steering_rate, - -self._clip_max_abs_steering_rate, - self._clip_max_abs_steering_rate, - ) - - return target_steering_rate diff --git a/d123/simulation/gym/environment/reward_builder/abstract_reward_builder.py b/d123/simulation/gym/environment/reward_builder/abstract_reward_builder.py deleted file mode 100644 index bbf01920..00000000 --- a/d123/simulation/gym/environment/reward_builder/abstract_reward_builder.py +++ /dev/null @@ -1,24 +0,0 @@ -from abc import ABC, abstractmethod -from typing import Any, Dict, Tuple - -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - - -class AbstractRewardBuilder(ABC): - """Abstract class for building rewards in a Gym simulation environment.""" - - @abstractmethod - def reset(self) -> None: - """Reset the reward builder to its initial state.""" - - @abstractmethod - def build_reward(self, simulation_wrapper: SimulationWrapper, info: Dict[str, Any]) -> Tuple[float, bool, bool]: - """ - Build the reward based on the current simulation state and additional information. - :param simulation_wrapper: Wrapper object containing complete nuPlan simulation. - :param info: Arbitrary information dictionary, for passing information between modules. - :return: A tuple containing: - - reward: The calculated reward value. - - termination: Whether the simulation terminates in the current step. - - truncation: Whether the simulation is truncated in the current step. - """ diff --git a/d123/simulation/gym/environment/reward_builder/components/collision.py b/d123/simulation/gym/environment/reward_builder/components/collision.py deleted file mode 100644 index 7bd00e50..00000000 --- a/d123/simulation/gym/environment/reward_builder/components/collision.py +++ /dev/null @@ -1,103 +0,0 @@ -from typing import Dict, Final, List, Tuple - -from nuplan.common.actor_state.ego_state import EgoState -from nuplan.common.actor_state.oriented_box import in_collision -from nuplan.common.actor_state.tracked_objects import TrackedObject, TrackedObjects -from nuplan.planning.metrics.evaluation_metrics.common.no_ego_at_fault_collisions import ( - _get_collision_type, -) -from nuplan.planning.metrics.utils.collision_utils import CollisionType - -STOPPED_SPEED_THRESHOLD: Final[float] = 5e-02 # Threshold of ego considered stationary [mps] - - -def _get_collisions( - ego_state: EgoState, - tracked_objects: TrackedObjects, - ignore_track_tokens: List[str] = [], -) -> Dict[str, TrackedObject]: - """ - Helper function to get all tracked objects that ego collides with. - Ignores agents with tokens in ignore_track_tokens (similar to the nuPlan collision function) - :param ego_state: Ego state object of nuPlan. - :param tracked_objects: Tracked object wrapper of nuPlan. - :param ignore_track_tokens: list of tokens (str) ego collided before, defaults to [] - :return: dictionary of tokens and tracked objects. - """ - collided_track_dict: Dict[str, TrackedObject] = {} - for tracked_object in tracked_objects: - tracked_object: TrackedObject - if (tracked_object.track_token not in ignore_track_tokens) and in_collision( - ego_state.car_footprint.oriented_box, tracked_object.box - ): - collided_track_dict[tracked_object.track_token] = tracked_object - return collided_track_dict - - -def calculate_all_collisions( - ego_state: EgoState, - tracked_objects: TrackedObjects, - prev_collided_track_tokens: List[str] = [], -) -> Tuple[bool, List[str]]: - """ - Reward term for ego collision. Considers all collision types. - :param ego_state: Ego state object of nuPlan. - :param tracked_objects: Tracked object wrapper of nuPlan. - :param prev_collided_track_tokens: list of tokens (str) ego collided before, defaults to [] - :return: whether ego collides and corresponding detection tokens. - """ - collided_track_dict = _get_collisions(ego_state, tracked_objects, prev_collided_track_tokens) - collided_track_tokens = list(collided_track_dict.keys()) - return len(collided_track_tokens) > 0, collided_track_tokens - - -def calculate_non_stationary_collisions( - ego_state: EgoState, - tracked_objects: TrackedObjects, - prev_collided_track_tokens: List[str], -) -> Tuple[bool, List[str]]: - """ - Reward term for ego collision. Ignores collision when ego stationary. - :param ego_state: Ego state object of nuPlan. - :param tracked_objects: Tracked object wrapper of nuPlan. - :param prev_collided_track_tokens: list of tokens (str) ego collided before, defaults to [] - :return: whether ego collides and corresponding detection tokens. - """ - collided_track_dict = _get_collisions(ego_state, tracked_objects, prev_collided_track_tokens) - collided_track_tokens = list(collided_track_dict.keys()) - ego_stationary = ego_state.dynamic_car_state.speed < STOPPED_SPEED_THRESHOLD - return (len(collided_track_tokens) > 0 and not ego_stationary), collided_track_tokens - - -def calculate_at_fault_collision( - ego_state: EgoState, - tracked_objects: TrackedObjects, - prev_collided_track_tokens: List[str], - in_multiple_lanes_or_offroad: bool = False, -) -> Tuple[bool, List[str]]: - """ - Reward term for ego collision. Ignores non-at-fault collisions. - https://github.com/motional/nuplan-devkit/blob/master/nuplan/planning/metrics/evaluation_metrics/common/no_ego_at_fault_collisions.py - :param ego_state: Ego state object of nuPlan. - :param tracked_objects: Tracked object wrapper of nuPlan. - :param prev_collided_track_tokens: list of tokens (str) ego collided before, defaults to [] - :param in_multiple_lanes_or_offroad: whether ego is in multiple roads of off road. - :return: whether ego collides and corresponding detection tokens. - """ - - collided_track_dict = _get_collisions(ego_state, tracked_objects, prev_collided_track_tokens) - collided_track_tokens = list(collided_track_dict.keys()) - - at_fault_collision: bool = False - for tracked_object in collided_track_dict.values(): - collision_type = _get_collision_type(ego_state, tracked_object) - collisions_at_stopped_track_or_active_front: bool = collision_type in [ - CollisionType.ACTIVE_FRONT_COLLISION, - CollisionType.STOPPED_TRACK_COLLISION, - ] - collision_at_lateral: bool = collision_type == CollisionType.ACTIVE_LATERAL_COLLISION - if collisions_at_stopped_track_or_active_front or (in_multiple_lanes_or_offroad and collision_at_lateral): - at_fault_collision = True - break - - return at_fault_collision, collided_track_tokens diff --git a/d123/simulation/gym/environment/reward_builder/components/comfort.py b/d123/simulation/gym/environment/reward_builder/components/comfort.py deleted file mode 100644 index 04ea40a7..00000000 --- a/d123/simulation/gym/environment/reward_builder/components/comfort.py +++ /dev/null @@ -1,145 +0,0 @@ -""" -NOTE @DanielDauner: - -Learning comfortable driving behavior—according to nuPlan's comfort metrics proved to be challenging for a PPO policy operating directly in action space. -In contrast, the default nuPlan controllers (e.g., LQR, iLQR) explicitly optimize for comfort, so trajectory-based policies tend to achieve high comfort scores. -For example, Gigaflow generates full agent rollouts as trajectories and then uses the nuPlan controller to execute them, which likely mitigates comfort issues. - -Designing effective comfort reward terms (see below) was one of the key challenges in adapting CaRL to nuPlan. A few more comments: -- nuPlan's comfort metrics apply Savitzky-Golay filtering, which can be noisy with short history windows. This also requires a min history length of 4. -- The metrics compute accelerations and velocities using the agent's center, not the rear axle. This mismatch previously introduced bugs in our PDM planners. -- The PPO policy requires a very large number of environment steps to appropriately balance the comfort term against others (e.g., collision avoidance). -- We spent a lot of time trying to make the comfort term work and expected bugs in the reward. In hindsight, we did not train for enough steps required for the comfort terms. - -All comfort-related reward terms remain in the codebase but may be refactored in the future. We used `calculate_kinematics_comfort`. -""" - -from typing import Tuple - -import numpy as np -import numpy.typing as npt -from carl_nuplan.planning.simulation.planner.pdm_planner.scoring.pdm_comfort_metrics import ego_is_comfortable -from carl_nuplan.planning.simulation.planner.pdm_planner.scoring.pdm_comfort_metrics_debug import ( - ego_is_comfortable_debug, -) -from carl_nuplan.planning.simulation.planner.pdm_planner.utils.pdm_array_representation import ( - ego_states_to_state_array, -) - -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - - -def calculate_action_delta_comfort(simulation_wrapper: SimulationWrapper, max_change: float = 0.25) -> float: - """ - Calculate the comfort score based on the change in actions between the last two time steps. - :param simulation_wrapper: Wrapper object containing complete nuPlan simulation. - :param max_change: max change considered comfortable in action space, defaults to 0.25 - :return: float values between 0.0 and 1.0, where 1.0 is most comfortable and 0.0 is least comfortable. - """ - history_trajectories = simulation_wrapper.history_trajectories - if len(history_trajectories) >= 2: - - current_action = simulation_wrapper.history_trajectories[-1]._raw_action - previous_action = simulation_wrapper.history_trajectories[-2]._raw_action - comfort = np.abs(np.array(current_action) - np.array(previous_action)) > max_change - - if np.any(comfort): - return 0.5 - - return 1.0 - - -def calculate_kinematics_comfort(simulation_wrapper: SimulationWrapper) -> Tuple[float, npt.NDArray[np.bool_]]: - """ - Calculate the comfort score based on the six kinematic metrics of nuPlan. - NOTE: uses the debugged version of the comfort metrics using the center coordinate of the ego vehicle. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :return: Whether the ego vehicle is comfortable and the comfort scores. - """ - history_ego_states = simulation_wrapper.simulation_ego_states - is_comfortable: npt.NDArray[np.bool_] = np.zeros((6), dtype=np.bool_) - comfort_score: float = 1.0 - - if len(history_ego_states) >= 4: - history_states_array = ego_states_to_state_array(history_ego_states)[None, ...] - time_points = np.array( - [ego_state.time_point.time_s for ego_state in history_ego_states], - dtype=np.float64, - ) - is_comfortable = ego_is_comfortable_debug(history_states_array, time_points)[0] - comfort_score = is_comfortable.sum() / len(is_comfortable) - - return comfort_score, is_comfortable - - -def calculate_kinematics_history_comfort(simulation_wrapper: SimulationWrapper) -> Tuple[float, npt.NDArray[np.bool_]]: - """ - Calculate the comfort score based on the six kinematic metrics of nuPlan. Includes the ego history for calculation. - NOTE: Adds the ego history of the logs to the comfort metrics. Was not relevant. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :return: Whether the ego vehicle is comfortable and the comfort scores. - """ - history_ego_states = simulation_wrapper.current_planner_input.history.ego_states - assert len(history_ego_states) >= 4 - is_comfortable: npt.NDArray[np.bool_] = np.zeros((6), dtype=np.bool_) - - history_states_array = ego_states_to_state_array(history_ego_states)[None, ...] - time_points = np.array( - [ego_state.time_point.time_s for ego_state in history_ego_states], - dtype=np.float64, - ) - is_comfortable = ego_is_comfortable_debug(history_states_array, time_points)[0] - comfort_score = is_comfortable.sum() / len(is_comfortable) - - return comfort_score, is_comfortable - - -def calculate_kinematics_comfort_legacy(simulation_wrapper: SimulationWrapper) -> Tuple[float, npt.NDArray[np.bool_]]: - """ - Calculate the comfort score based on the six kinematic metrics of nuPlan. - NOTE: Uses the rear-axle instead of center coordinate of the ego vehicle. Leads to slight mismatch to nuPlan metric. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :return: Whether the ego vehicle is comfortable and the comfort scores. - """ - history_ego_states = simulation_wrapper.simulation_ego_states - is_comfortable: npt.NDArray[np.bool_] = np.zeros((6), dtype=np.bool_) - - if len(history_ego_states) >= 4: - history_states_array = ego_states_to_state_array(history_ego_states)[None, ...] - time_points = np.array( - [ego_state.time_point.time_s for ego_state in history_ego_states], - dtype=np.float64, - ) - is_comfortable = ego_is_comfortable(history_states_array, time_points)[0] - if not is_comfortable.all(): - return 0.5, is_comfortable - - return 1.0, is_comfortable - - -def calculate_kinematics_comfort_fixed(simulation_wrapper: SimulationWrapper) -> Tuple[float, npt.NDArray[np.bool_]]: - """ - Calculate the comfort score based on the six kinematic metrics of nuPlan. - NOTE: Ignores certain jerk metrics that are noisy initially. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :return: Whether the ego vehicle is comfortable and the comfort scores. - """ - history_ego_states = simulation_wrapper.simulation_ego_states - is_comfortable: npt.NDArray[np.bool_] = np.zeros((6), dtype=np.bool_) - - if len(history_ego_states) >= 4: - history_states_array = ego_states_to_state_array(history_ego_states)[None, ...] - time_points = np.array( - [ego_state.time_point.time_s for ego_state in history_ego_states], - dtype=np.float64, - ) - is_comfortable = ego_is_comfortable_debug(history_states_array, time_points)[0] - - if len(history_ego_states) < 15: - is_comfortable[2] = True # NOTE: jerk metric is trash in first few frames - is_comfortable[3] = True # NOTE: lon jerk metric is trash in first few frames - - if not is_comfortable.all(): - return 0.5, is_comfortable - - return 1.0, is_comfortable diff --git a/d123/simulation/gym/environment/reward_builder/components/off_route.py b/d123/simulation/gym/environment/reward_builder/components/off_route.py deleted file mode 100644 index 1615bd7a..00000000 --- a/d123/simulation/gym/environment/reward_builder/components/off_route.py +++ /dev/null @@ -1,98 +0,0 @@ -from typing import Dict - -import numpy as np -from carl_nuplan.planning.simulation.planner.pdm_planner.observation.pdm_occupancy_map import ( - PDMOccupancyMap, -) -from nuplan.common.maps.abstract_map_objects import LaneGraphEdgeMapObject -from shapely import Polygon - -from d123.simulation.gym.environment.helper.environment_cache import MapCache -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - - -def calculate_off_route_v1(simulation_wrapper: SimulationWrapper, map_cache: MapCache) -> float: - """ - Calculate the off-route based on the polygons of the route roadblocks and roadblock connectors polygons. - NOTE: The route roadblock connector polygons often have a strange shape. We expect the ego learned to exploit this - during strange overtaking maneuvers. We fixed this in the v2 version below. - TODO: Refactor or remove this implementation. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :param map_cache: Map cache object storing relevant nearby map objects. - :return: 1.0 if the ego is on route, 0.0 if the ego is off route. - """ - iteration = simulation_wrapper.current_planner_input.iteration.index - - ego_state = simulation_wrapper.current_ego_state - expert_ego_state = simulation_wrapper.scenario.get_ego_state_at_iteration(iteration) - - route_roadblocks: Dict[str, Polygon] = {} - for route_roadblock_id in map_cache.route_lane_group_ids: - if route_roadblock_id in map_cache.lane_groups: - route_roadblocks[route_roadblock_id] = map_cache.lane_groups[route_roadblock_id].polygon - if route_roadblock_id in map_cache.roadblock_connectors: - route_roadblocks[route_roadblock_id] = map_cache.roadblock_connectors[route_roadblock_id].polygon - - route_map = PDMOccupancyMap(list(route_roadblocks.keys()), list(route_roadblocks.values())) - - points = np.array( - [ - [point.x, point.y] - for point in [ - ego_state.center.point, - expert_ego_state.center.point, - ] - ], - dtype=np.float64, - ) - points_in_polygon = route_map.points_in_polygons(points) - on_route = points_in_polygon.sum(axis=0) > 0 - ego_on_route, expert_on_route = on_route[0], on_route[1] - - if not ego_on_route and expert_on_route: - return 0.0 - - return 1.0 - - -def calculate_off_route_v2(simulation_wrapper: SimulationWrapper, map_cache: MapCache) -> float: - """ - Calculate the off-route based on the polygons of the route roadblocks and roadblock connectors polygons. - NOTE: This implementation uses the lane/lane-connector polygons instead of the roadblock/roadblock-connector polygons. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :param map_cache: Map cache object storing relevant nearby map objects. - :return: 1.0 if the ego is on route, 0.0 if the ego is off route. - """ - iteration = simulation_wrapper.current_planner_input.iteration.index - - ego_state = simulation_wrapper.current_ego_state - expert_ego_state = simulation_wrapper.scenario.get_ego_state_at_iteration(iteration) - - route_lane_polygons: Dict[str, Polygon] = {} - - for lane_dict in [map_cache.lanes, map_cache.lane_connectors]: - lane_dict: Dict[str, LaneGraphEdgeMapObject] - for lane_id, lane in lane_dict.items(): - if lane.get_roadblock_id() in map_cache.route_lane_group_ids: - route_lane_polygons[lane_id] = lane.polygon - - route_map = PDMOccupancyMap(list(route_lane_polygons.keys()), list(route_lane_polygons.values())) - - points = np.array( - [ - [point.x, point.y] - for point in [ - ego_state.center.point, - expert_ego_state.center.point, - ] - ], - dtype=np.float64, - ) - points_in_polygon = route_map.points_in_polygons(points) - on_route = points_in_polygon.sum(axis=0) > 0 - ego_on_route, expert_on_route = on_route[0], on_route[1] - - if not ego_on_route and expert_on_route: - return 0.0 - - return 1.0 diff --git a/d123/simulation/gym/environment/reward_builder/components/progress.py b/d123/simulation/gym/environment/reward_builder/components/progress.py deleted file mode 100644 index c8bc6b55..00000000 --- a/d123/simulation/gym/environment/reward_builder/components/progress.py +++ /dev/null @@ -1,120 +0,0 @@ -from dataclasses import dataclass -from typing import List, Optional - -import numpy as np -from carl_nuplan.planning.simulation.planner.pdm_planner.utils.pdm_path import PDMPath -from nuplan.common.actor_state.ego_state import EgoState -from nuplan.planning.metrics.evaluation_metrics.common.ego_progress_along_expert_route import ( - PerFrameProgressAlongRouteComputer, -) -from nuplan.planning.metrics.utils.route_extractor import ( - RouteRoadBlockLinkedList, - get_route, - get_route_baseline_roadblock_linkedlist, - get_route_simplified, -) -from nuplan.planning.metrics.utils.state_extractors import extract_ego_center -from shapely import Point - -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - - -@dataclass -class ProgressCache: - - expert_route_roadblocks: Optional[RouteRoadBlockLinkedList] - expert_progress: float # [m] - progress_computer: Optional[PerFrameProgressAlongRouteComputer] = None - - -def calculate_route_completion_human(ego_states: List[EgoState], scenario_simulation: SimulationWrapper) -> float: - """ - Calculates route completion relative to the human trajectory from the logs. Used as default. - :param ego_states: List of ego states from the simulation. - :param scenario_simulation: Simulation wrapper object containing the scenario simulation. - :return: Route completion delta, which is the difference in route completion from the last time step to the current one (normalized). - """ - assert len(ego_states) > 2 - current_ego_state = ego_states[-1] - ego_linestring = scenario_simulation.ego_linestring - current_route_completion = ego_linestring.project(Point(*current_ego_state.center.array), normalized=True) - past_route_completion = scenario_simulation.route_completion - route_completion_delta = np.maximum(0.0, current_route_completion - past_route_completion) - scenario_simulation.update_route_completion(current_route_completion) - return route_completion_delta - - -def calculate_route_completion_mean(ego_states: List[EgoState], scenario_simulation: SimulationWrapper) -> float: - """ - Calculates route completion relative to the overall average in the logs (i.e. 62 meter) - NOTE: This lead to aggressive ego behavior. Function likely removed in the future. - :param ego_states: List of ego states from the simulation. - :param scenario_simulation: Simulation wrapper object containing the scenario simulation. - :return: Route completion delta, which is the difference in route completion from the last time step to the current one (normalized). - """ - MEAN_ROUTE_COMPLETION: float = 62.0 # [m] - ego_linestring = PDMPath([ego_state.center for ego_state in scenario_simulation.simulation_ego_states]).linestring - current_route_completion = np.clip(ego_linestring.length / MEAN_ROUTE_COMPLETION, 0.0, 1.0) - past_route_completion = scenario_simulation.route_completion - route_completion_delta = np.clip(current_route_completion - past_route_completion, 0.0, 1.0) - scenario_simulation.update_route_completion(current_route_completion) - return route_completion_delta - - -def calculate_route_completion_nuplan( - ego_states: List[EgoState], - scenario_simulation: SimulationWrapper, - progress_cache: Optional[ProgressCache] = None, - score_progress_threshold: float = 0.001, -) -> float: - """ - Calculates route completion based on the nuPlan progress metric. - NOTE: This function worked okay, but did not lead to better results compared to the human trajectory. - The implementation is also more complex. We might remove it in the future. - :param ego_states: List of ego states from the simulation. - :param scenario_simulation: Simulation wrapper object containing the scenario simulation. - :return: Route completion delta, which is the difference in route completion from the last time step to the current one (normalized). - """ - - first_iteration = progress_cache is None - - # 1. Calculate expert route and progress - if first_iteration: - scenario = scenario_simulation.scenario - expert_states = scenario.get_expert_ego_trajectory() - expert_poses = extract_ego_center(expert_states) - - expert_route = get_route(map_api=scenario.map_api, poses=expert_poses) - expert_route_simplified = get_route_simplified(expert_route) - expert_route_roadblocks = get_route_baseline_roadblock_linkedlist(scenario.map_api, expert_route_simplified) - - if expert_route_roadblocks.head is None: - progress_cache = ProgressCache(expert_route_roadblocks, 0.0) - else: - expert_progress_computer = PerFrameProgressAlongRouteComputer(expert_route_roadblocks) - expert_progress = np.sum(expert_progress_computer(ego_poses=expert_poses)) - ego_progress_computer = PerFrameProgressAlongRouteComputer(expert_route_roadblocks) - progress_cache = ProgressCache(expert_route_roadblocks, expert_progress, ego_progress_computer) - - # 2. Whether or not valid route was found: - # - Return standard values for no progress. - # - Calculate new route completion. - if progress_cache.expert_route_roadblocks.head is None: - scenario_simulation.update_route_completion(1.0) - route_completion_delta = 0.0 - - else: - ego_poses = extract_ego_center(ego_states[-2:]) - ego_progress = np.sum(progress_cache.progress_computer(ego_poses=ego_poses)) - - current_route_completion = np.clip( - max(ego_progress, score_progress_threshold) / max(progress_cache.expert_progress, score_progress_threshold), - 0.0, - 1.0, - ) - past_route_completion = scenario_simulation.route_completion - - route_completion_delta = np.clip(current_route_completion - past_route_completion, 0.0, 1.0) - scenario_simulation.update_route_completion(current_route_completion) - - return route_completion_delta, progress_cache diff --git a/d123/simulation/gym/environment/reward_builder/components/time_to_collision.py b/d123/simulation/gym/environment/reward_builder/components/time_to_collision.py deleted file mode 100644 index 9a5fd2d1..00000000 --- a/d123/simulation/gym/environment/reward_builder/components/time_to_collision.py +++ /dev/null @@ -1,289 +0,0 @@ -from typing import Dict, Final, List, Optional, Tuple - -import numpy as np -import numpy.typing as npt -from carl_nuplan.planning.simulation.planner.pdm_planner.observation.pdm_occupancy_map import ( - PDMOccupancyMap, -) -from carl_nuplan.planning.simulation.planner.pdm_planner.utils.pdm_enums import ( - BBCoordsIndex, -) -from nuplan.common.actor_state.agent import Agent -from nuplan.common.actor_state.ego_state import EgoState, StateSE2 -from nuplan.common.actor_state.oriented_box import OrientedBox, in_collision -from nuplan.common.actor_state.tracked_objects import TrackedObject, TrackedObjects -from nuplan.common.maps.abstract_map import AbstractMap, SemanticMapLayer -from nuplan.planning.metrics.evaluation_metrics.common.time_to_collision_within_bound import ( - _get_ego_tracks_displacement_info, - _get_relevant_tracks, -) -from nuplan.planning.simulation.observation.idm.utils import ( - is_agent_ahead, - is_agent_behind, -) -from nuplan.planning.simulation.observation.observation_type import DetectionsTracks -from shapely import creation - -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - -# TODO: Add to config. -STOPPED_SPEED_THRESHOLD: Final[float] = 5e-03 # [m/s] (ttc) -SUCCESS_TTC: Final[float] = 1.0 -FAIL_TTC: Final[float] = 0.5 - - -def _get_coords_array(oriented_box: OrientedBox) -> npt.NDArray[np.float64]: - """ - Helper function to get corner coordinates of an oriented box. - :param oriented_box: OrientedBox object from nuPlan. - :return: numpy array with shape (5, 2) containing the closed corner coordinates of the oriented box. - """ - coords_array = np.zeros((len(BBCoordsIndex), 2), dtype=np.float64) - corners = oriented_box.all_corners() - coords_array[BBCoordsIndex.FRONT_LEFT] = corners[0].array - coords_array[BBCoordsIndex.REAR_LEFT] = corners[1].array - coords_array[BBCoordsIndex.REAR_RIGHT] = corners[2].array - coords_array[BBCoordsIndex.FRONT_RIGHT] = corners[3].array - coords_array[BBCoordsIndex.CENTER] = corners[0].array # close polygon - return coords_array - - -def _get_dxy(heading: float, velocity: float) -> npt.NDArray[np.float64]: - """ - Get the displacement vector (global x,y) to propagate a bounding box along its heading with a given velocity. - :param heading: Heading angle of the bounding box [rad]. - :param velocity: Velocity of the bounding box [m/s]. - :return: Displacement vector (x, y) to propagate the bounding box. - """ - dxy = np.stack( - [ - np.cos(heading) * velocity, - np.sin(heading) * velocity, - ], - axis=-1, - ) - return dxy - - -def calculate_ttc_v1(simulation_wrapper: SimulationWrapper, resolution: int = 2) -> float: - """ - Calculate the time to collision (TTC) of the ego vehicle, based on the constant velocity forecast. - NOTE: Uses less complex logic than the v2 version. TTC required many steps to converge. We are unsure if v2 was required. - TODO: Refactor or remove this implementation. - :param simulation_wrapper: Simulation wrapper object containing the scenario simulation. - :param resolution: The temporal resolution to check collisions (i.e. every two steps), defaults to 2 - :return: 1.0 if no collision is expected, 0.5 if a collision is expected. - """ - - future_time_indices = np.arange(1, 10, resolution, dtype=int) - future_time_deltas = future_time_indices * simulation_wrapper.scenario.database_interval - - ( - ego_state, - observation, - ) = simulation_wrapper.current_planner_input.history.current_state - assert isinstance(observation, DetectionsTracks) - tracked_objects = observation.tracked_objects - ego_speed = ego_state.dynamic_car_state.center_velocity_2d.magnitude() - if len(tracked_objects) == 0 or ego_speed < STOPPED_SPEED_THRESHOLD: - return SUCCESS_TTC - - unique_tracked_objects: Dict[str, TrackedObject] = { - tracked_object.track_token: tracked_object for tracked_object in tracked_objects - } - map_api = simulation_wrapper.scenario.map_api - ego_in_intersection = map_api.is_in_layer(ego_state.rear_axle, layer=SemanticMapLayer.INTERSECTION) - - def _add_object(tracked_object: TrackedObject) -> bool: - if is_agent_ahead(ego_state.rear_axle, tracked_object.center) or ( - ego_in_intersection and not is_agent_behind(ego_state.rear_axle, tracked_object.center) - ): - return True - return False - - # extract static object polygons - static_object_tokens, static_object_coords_list = [], [] - for static_object in tracked_objects.get_static_objects(): - if _add_object(static_object): - static_object_tokens.append(static_object.track_token) - static_object_coords_list.append(_get_coords_array(static_object.box)) - static_object_coords_array = np.array(static_object_coords_list, dtype=np.float64) # (num_agents, 5, 2) - if len(static_object_tokens) == 0: - static_object_polygons = np.array([], dtype=np.object_) - else: - static_object_polygons = creation.polygons(static_object_coords_array) - - # extract agents - agent_tokens, agent_coords_list, agent_dxy = [], [], [] - for agent in tracked_objects.get_agents(): - if _add_object(agent): - agent_tokens.append(agent.track_token) - agent_coords_list.append(_get_coords_array(agent.box)) - agent_dxy.append(_get_dxy(agent.box.center.heading, agent.velocity.magnitude())) - agent_coords_array = np.array(agent_coords_list, dtype=np.float64) # (num_agents, 5, 2) - agent_dxy = np.array(agent_dxy, dtype=np.float64) # (num_agents, 2) - if len(agent_tokens) == 0: - projected_agent_polygons = np.array([], dtype=np.object_) - - # extract ego - ego_coords_array = _get_coords_array(ego_state.car_footprint.oriented_box) # (5, 2) - ego_dxy = _get_dxy(ego_state.center.heading, ego_speed) - ego_displacements = future_time_deltas[:, None, None] * ego_dxy # (num_steps, 1, 2) - projected_ego_coords = ego_coords_array[None, ...] + ego_displacements # (num_steps, 5, 2) - projected_ego_polygons = creation.polygons(projected_ego_coords) - - for time_delta, ego_polygon in zip(future_time_deltas, projected_ego_polygons): - - # project agents - if len(agent_tokens) > 0: - agent_displacements = agent_dxy * time_delta - projected_agent_coords = agent_coords_array + agent_displacements[:, None, :] - projected_agent_polygons = creation.polygons(projected_agent_coords) - - polygons = np.concatenate([static_object_polygons, projected_agent_polygons], axis=0) - occupancy_map = PDMOccupancyMap(tokens=static_object_tokens + agent_tokens, geometries=polygons) - - # check for collisions - ego_collision = occupancy_map.intersects(ego_polygon) - if len(ego_collision) > 0: - for ego_collision_token in ego_collision: - track_state = unique_tracked_objects[ego_collision_token].center - if is_agent_ahead(ego_state.rear_axle, track_state) or ( - (map_api.is_in_layer(ego_state.rear_axle, layer=SemanticMapLayer.INTERSECTION)) - and not is_agent_behind(ego_state.rear_axle, track_state) - ): - return FAIL_TTC - - return SUCCESS_TTC - - -def calculate_ttc_v2( - simulation_wrapper: SimulationWrapper, - collided_track_tokens: List[str], - in_multiple_lanes_or_offroad: bool = False, - resolution: int = 2, -) -> float: - """ - Calculate the time to collision (TTC) of the ego vehicle, based on the constant velocity forecast. - NOTE: Uses TTC logic closely aligned to nuPlan's implementation, i.e. first extract relevant tracks, then compute TTC. - :param simulation_wrapper: Simulation wrapper object containing the scenario simulation. - :param collided_track_tokens: Detection track tokens that are already collided (ignored). - :param in_multiple_lanes_or_offroad: Whether the ego agent is in multiple lanes or offroad, defaults to False - :param resolution: The temporal resolution to check collisions (TODO: remove or implement). - :return: 1.0 if no collision is expected, 0.5 if a collision is expected. - """ - ( - ego_state, - observation, - ) = simulation_wrapper.current_planner_input.history.current_state - assert isinstance(observation, DetectionsTracks) - tracked_objects = observation.tracked_objects - - # Early non-violation conditions - if len(tracked_objects) == 0 or ego_state.dynamic_car_state.speed <= STOPPED_SPEED_THRESHOLD: - return SUCCESS_TTC - - ( - tracks_poses, - tracks_speed, - tracks_boxes, - ) = _extract_tracks_info_excluding_collided_tracks( - ego_state, - simulation_wrapper.scenario.map_api, - tracked_objects, - collided_track_tokens, - in_multiple_lanes_or_offroad, - ) - tracks_poses = np.array(tracks_poses, dtype=np.float64) - tracks_speed = np.array(tracks_speed, dtype=np.float64) - tracks_boxes = np.array(tracks_boxes) - - ttc_at_index = _compute_time_to_collision_at_timestamp(ego_state, tracks_poses, tracks_speed, tracks_boxes) - if ttc_at_index is None: - return SUCCESS_TTC - - return FAIL_TTC - - -def _extract_tracks_info_excluding_collided_tracks( - ego_state: EgoState, - map_api: AbstractMap, - tracked_objects: TrackedObjects, - collided_track_tokens: List[str], - in_multiple_lanes_or_offroad: bool = False, -) -> Tuple[List[List[float]], List[float], List[OrientedBox]]: - - ego_in_intersection = map_api.is_in_layer(ego_state.rear_axle, layer=SemanticMapLayer.INTERSECTION) - - relevant_tracked_objects: List[TrackedObject] = [] - for tracked_object in tracked_objects: - tracked_object: TrackedObject - if tracked_object.track_token not in collided_track_tokens: - if is_agent_ahead(ego_state.rear_axle, tracked_object.center) or ( - (in_multiple_lanes_or_offroad or ego_in_intersection) - and not is_agent_behind(ego_state.rear_axle, tracked_object.center) - ): - relevant_tracked_objects.append(tracked_object) - - tracks_poses: List[List[float]] = [[*tracked_object.center] for tracked_object in tracked_objects] - tracks_speed: List[float] = [ - tracked_object.velocity.magnitude() if isinstance(tracked_object, Agent) else 0 - for tracked_object in tracked_objects - ] - tracks_boxes: List[OrientedBox] = [tracked_object.box for tracked_object in tracked_objects] - return tracks_poses, tracks_speed, tracks_boxes - - -def _compute_time_to_collision_at_timestamp( - ego_state: EgoState, - tracks_poses: npt.NDArray[np.float64], - tracks_speed: npt.NDArray[np.float64], - tracks_boxes: List[OrientedBox], - time_step_start: float = 0.1, - time_step_size: float = 0.2, - time_horizon: float = 1.0, -) -> Optional[float]: - - ego_speed = ego_state.dynamic_car_state.speed - - # Remain default if we don't have any agents or ego is stopped - if len(tracks_poses) == 0 or ego_speed <= STOPPED_SPEED_THRESHOLD: - return None - - displacement_info = _get_ego_tracks_displacement_info( - ego_state, ego_speed, tracks_poses, tracks_speed, time_step_size - ) - relevant_tracks_mask = _get_relevant_tracks( - displacement_info.ego_pose, - displacement_info.ego_box, - displacement_info.ego_dx, - displacement_info.ego_dy, - tracks_poses, - tracks_boxes, - displacement_info.tracks_dxy, - time_step_size, - time_horizon, - ) - - # If there is no relevant track affecting TTC, remain default - if not len(relevant_tracks_mask): - return None - - # Find TTC for relevant tracks by projecting ego and tracks boxes with time_step_size - for time_to_collision in np.arange(time_step_start, time_horizon, time_step_size): - # project ego's center pose and footprint with a fixed speed - displacement_info.ego_pose[:2] += ( - displacement_info.ego_dx, - displacement_info.ego_dy, - ) - projected_ego_box = OrientedBox.from_new_pose( - displacement_info.ego_box, StateSE2(*(displacement_info.ego_pose)) - ) - # project tracks's center pose and footprint with a fixed speed - tracks_poses[:, :2] += displacement_info.tracks_dxy - for track_box, track_pose in zip(tracks_boxes[relevant_tracks_mask], tracks_poses[relevant_tracks_mask]): - projected_track_box = OrientedBox.from_new_pose(track_box, StateSE2(*track_pose)) - if in_collision(projected_ego_box, projected_track_box): - return float(time_to_collision) - - return None diff --git a/d123/simulation/gym/environment/reward_builder/default_reward_builder.py b/d123/simulation/gym/environment/reward_builder/default_reward_builder.py deleted file mode 100644 index 527d4e71..00000000 --- a/d123/simulation/gym/environment/reward_builder/default_reward_builder.py +++ /dev/null @@ -1,753 +0,0 @@ -""" -NOTE @DanielDauner: - -This file may be cleaned up in the future. - -""" - -from dataclasses import dataclass -from typing import Any, Dict, Final, List, Optional, Tuple, Union - -import numpy as np -import numpy.typing as npt -from carl_nuplan.planning.simulation.planner.pdm_planner.observation.pdm_occupancy_map import PDMOccupancyMap -from carl_nuplan.planning.simulation.planner.pdm_planner.utils.pdm_array_representation import states_se2_to_array -from carl_nuplan.planning.simulation.planner.pdm_planner.utils.pdm_geometry_utils import normalize_angle -from nuplan.common.actor_state.ego_state import EgoState, StateSE2 -from nuplan.common.maps.abstract_map import Lane -from nuplan.common.maps.abstract_map_objects import LaneConnector, LaneGraphEdgeMapObject -from nuplan.common.maps.maps_datatypes import TrafficLightStatusType -from shapely import Point, Polygon - -from d123.simulation.gym.environment.helper.environment_area import AbstractEnvironmentArea -from d123.simulation.gym.environment.helper.environment_cache import ( - BoxDetectionCache, - MapCache, - environment_cache_manager, -) -from d123.simulation.gym.environment.reward_builder.abstract_reward_builder import AbstractRewardBuilder -from d123.simulation.gym.environment.reward_builder.components.collision import ( - calculate_all_collisions, - calculate_at_fault_collision, - calculate_non_stationary_collisions, -) -from d123.simulation.gym.environment.reward_builder.components.comfort import ( - calculate_action_delta_comfort, - calculate_kinematics_comfort, - calculate_kinematics_comfort_fixed, - calculate_kinematics_comfort_legacy, - calculate_kinematics_history_comfort, -) -from d123.simulation.gym.environment.reward_builder.components.off_route import ( - calculate_off_route_v1, - calculate_off_route_v2, -) -from d123.simulation.gym.environment.reward_builder.components.progress import ( - ProgressCache, - calculate_route_completion_human, - calculate_route_completion_mean, - calculate_route_completion_nuplan, -) -from d123.simulation.gym.environment.reward_builder.components.time_to_collision import ( - FAIL_TTC, - SUCCESS_TTC, - calculate_ttc_v1, - calculate_ttc_v2, -) -from d123.simulation.gym.environment.simulation_wrapper import SimulationWrapper - -NUM_SCENARIO_ITERATIONS: Final[int] = 150 # TODO: Remove this constant. - - -@dataclass -class DefaultRewardComponents: - """Dataclass to store the components of the default reward builder.""" - - route_completion: float = 0.0 - - # hard constraints - blocked: bool = False # not implemented - red_light: bool = False - collision: bool = False - stop_sign: bool = False # not implemented - off_road: bool = False - - # soft constraints - lane_distance: float = 1.0 - too_fast: float = 1.0 - off_route: float = 1.0 - comfort: float = 1.0 - ttc: float = 1.0 - - @property - def hard_constraints(self) -> List[bool]: - """ - :return: boolean values of the hard constraints, i.e. collision, that lead to termination. - """ - return [ - self.blocked, - self.red_light, - self.collision, - self.stop_sign, - self.off_road, - ] - - @property - def soft_constraints(self) -> List[float]: - """ - :return: float values of the soft constraints. - """ - return [ - self.lane_distance, - self.too_fast, - self.off_route, - self.comfort, - self.ttc, - ] - - -@dataclass -class DefaultRewardConfig: - """Configuration for the default reward builder.""" - - route_completion_type: Optional[str] = "human" - collision_type: Optional[str] = "non_stationary" - ttc_type: Optional[str] = "v2" - red_light_type: Optional[str] = None - lane_distance_type: Optional[str] = "v1" - off_route_type: Optional[str] = "v1" - comfort_type: Optional[str] = "kinematics" - - comfort_accumulation: str = "value" - ttc_accumulation: str = "value" - reward_accumulation: str = "regular" - - terminal_penalty: float = 0.0 - collision_terminal_penalty: float = 0.0 - off_road_violation_threshold: float = 0.0 - lane_distance_violation_threshold: float = 0.5 - survival_ratio: float = 0.6 - - reward_factor: float = 100.0 - - def __post_init__(self): - assert self.route_completion_type is None or self.route_completion_type in [ - "human", - "mean", - "nuplan", - ] - assert self.collision_type is None or self.collision_type in [ - "all", - "non_stationary", - "at_fault", - ] - assert self.ttc_type is None or self.ttc_type in ["v1", "v2"] - assert self.red_light_type is None or self.red_light_type in ["v1"] - assert self.lane_distance_type is None or self.lane_distance_type in ["v1"] - assert self.off_route_type is None or self.off_route_type in ["v1", "v2"] - assert self.comfort_type is None or self.comfort_type in [ - "action_delta", - "kinematics", - "kinematics_legacy", - "kinematics_history", - "kinematics_fixed", - ] - assert self.comfort_accumulation in ["terminal", "value"] - assert self.ttc_accumulation in ["terminal", "value"] - assert self.reward_accumulation in ["regular", "nuplan", "survival"] - - -class DefaultRewardBuilder(AbstractRewardBuilder): - """Default reward builder for the Gym simulation environment.""" - - def __init__(self, environment_area: AbstractEnvironmentArea, config: DefaultRewardConfig) -> None: - """ - Initializes the default reward builder. - :param environment_area: Environment area class that defines the map patch to calculate the reward. - :param config: Configuration for the default reward builder. - """ - - self._environment_area = environment_area - self._config = config - - # lazy loaded - self._reward_history: List[DefaultRewardComponents] = [] - self._prev_collided_track_tokens: List[str] = [] - self._expert_red_light_infractions: List[str] = [] - self._comfort_values: List[npt.NDArray[np.bool_]] = [] - self._progress_cache: Optional[ProgressCache] = None - - def reset(self) -> None: - """Inherited, see superclass.""" - self._reward_history: List[DefaultRewardComponents] = [] - self._prev_collided_track_tokens: List[str] = [] - self._expert_red_light_infractions: List[str] = [] - self._comfort_values: List[npt.NDArray[np.bool_]] = [] - self._progress_cache: Optional[ProgressCache] = None - - def build_reward(self, simulation_wrapper: SimulationWrapper, info: Dict[str, Any]) -> Tuple[float, bool, bool]: - """Inherited, see superclass.""" - - map_cache, detection_cache = environment_cache_manager.build_environment_caches( - planner_input=simulation_wrapper.current_planner_input, - planner_initialization=simulation_wrapper.planner_initialization, - environment_area=self._environment_area, - ) - info["map_cache"] = map_cache - info["detection_cache"] = detection_cache - - reward_components = self._calculate_reward_components(simulation_wrapper, map_cache, detection_cache) - - if self._config.reward_accumulation == "nuplan": - reward, termination, truncation = self._nuplan_accumulate(reward_components) - elif self._config.reward_accumulation == "survival": - reward, termination, truncation = self._survival_accumulate(reward_components) - else: - reward, termination, truncation = self._regular_accumulate(reward_components) - self._reward_history.append(reward_components) - - if termination or truncation or not simulation_wrapper.is_simulation_running(): - info["reward"] = self._accumulate_info() - info["comfort"] = self._accumulate_info_comfort() - - self._add_value_measurements(simulation_wrapper, info) - return reward, termination, truncation - - def _regular_accumulate(self, reward_components: DefaultRewardComponents) -> Tuple[float, bool, bool]: - """ - Accumulate the reward components into a single reward value, as described in CaRL paper. - TODO: Refactor this method. - :param reward_components: Dataclass storing reward components. - :return: - - reward: The accumulated reward value. - - termination: Whether the simulation terminates in the current step. - - truncation: Whether the simulation is truncated in the current step. - """ - termination = any(reward_components.hard_constraints) - if self._config.comfort_accumulation == "terminal": - termination = termination or reward_components.comfort < 1.0 - if self._config.ttc_accumulation == "terminal": - termination = termination or reward_components.ttc < 1.0 - truncation = termination - terminal_penalty = ( - self._config.collision_terminal_penalty if reward_components.collision else self._config.terminal_penalty - ) - terminate_factor = 0.0 if termination else 1.0 - reward = ( - reward_components.route_completion * np.prod(reward_components.soft_constraints) * terminate_factor - + terminal_penalty - ) - return reward * self._config.reward_factor, termination, truncation - - def _survival_accumulate(self, reward_components: DefaultRewardComponents) -> Tuple[float, bool, bool]: - """ - Accumulate the reward components into a single reward value, and adding a survival bonus. - TODO: Refactor this method. - :param reward_components: Dataclass storing reward components. - :return: - - reward: The accumulated reward value. - - termination: Whether the simulation terminates in the current step. - - truncation: Whether the simulation is truncated in the current step. - """ - termination = any(reward_components.hard_constraints) - truncation = termination - terminal_penalty = ( - self._config.collision_terminal_penalty if reward_components.collision else self._config.terminal_penalty - ) - terminate_factor = 0.0 if termination else 1.0 - raw_reward = (1 - self._config.survival_ratio) * reward_components.route_completion * np.prod( - reward_components.soft_constraints - ) + (self._config.survival_ratio / NUM_SCENARIO_ITERATIONS) - - reward = raw_reward * terminate_factor + terminal_penalty - return reward * self._config.reward_factor, termination, truncation - - def _nuplan_accumulate(self, reward_components: DefaultRewardComponents) -> Tuple[float, bool, bool]: - """ - Accumulate the reward components into a single reward value, using a weighted combination similar to nuPlan. - TODO: Refactor this method. - :param reward_components: Dataclass storing reward components. - :return: - - reward: The accumulated reward value. - - termination: Whether the simulation terminates in the current step. - - truncation: Whether the simulation is truncated in the current step. - """ - - termination = any(reward_components.hard_constraints) - truncation = termination - reward = 0.0 - - if not termination: - - progress = reward_components.route_completion - - ttc = 1.0 if reward_components.ttc == 1.0 else 0.0 - speed = reward_components.too_fast - comfort = 1.0 if reward_components.comfort == 1.0 else 0.0 - - ttc /= NUM_SCENARIO_ITERATIONS - speed /= NUM_SCENARIO_ITERATIONS - comfort /= NUM_SCENARIO_ITERATIONS - - reward = (5 * progress + 5 * ttc + 4 * speed + 2 * comfort) / 16 - reward = reward * reward_components.off_route * reward_components.lane_distance * self._config.reward_factor - - return reward, termination, truncation - - def _accumulate_info(self) -> Dict[str, float]: - """ - Helper function to log the accumulated reward information. - TODO: Remove this method. - """ - reward_info: Dict[str, float] = {} - reward_info["reward_progress"] = np.sum([reward.route_completion for reward in self._reward_history]) - - # reward_info["reward_blocked"] = not np.any([reward.blocked for reward in self._reward_history]) - reward_info["reward_red_light"] = not np.any([reward.red_light for reward in self._reward_history]) - reward_info["reward_collision"] = not np.any([reward.collision for reward in self._reward_history]) - # reward_info["reward_stop_sign"] = not np.any([reward.stop_sign for reward in self._reward_history]) - reward_info["reward_off_road"] = not np.any([reward.off_road for reward in self._reward_history]) - - reward_info["reward_lane_distance"] = np.mean([reward.lane_distance for reward in self._reward_history]) - reward_info["reward_too_fast"] = np.mean([reward.too_fast for reward in self._reward_history]) - reward_info["reward_off_route"] = np.mean([reward.off_route for reward in self._reward_history]) - reward_info["reward_comfort"] = not np.any([self._reward_history[-1].comfort < 1.0]) - reward_info["reward_ttc"] = not np.any([reward.ttc < 1.0 for reward in self._reward_history]) - - for key, value in reward_info.items(): - reward_info[key] = float(value) - - return reward_info - - def _accumulate_info_comfort(self) -> Dict[str, float]: - """ - Helper function to log the accumulated comfort information. - TODO: Remove this method. - """ - comfort_info: Dict[str, float] = {} - comfort = np.array(self._comfort_values, dtype=np.bool_) - - comfort_info["comfort_lon_acceleration"] = comfort[-1, 0] - comfort_info["comfort_lat_acceleration"] = comfort[-1, 1] - comfort_info["comfort_jerk_metric"] = comfort[-1, 2] - comfort_info["comfort_lon_jerk_metric"] = comfort[-1, 3] - comfort_info["comfort_yaw_accel"] = comfort[-1, 4] - comfort_info["comfort_yaw_rate"] = comfort[-1, 5] - - for key, value in comfort_info.items(): - comfort_info[key] = float(value) - - return comfort_info - - def _add_value_measurements(self, simulation_wrapper: SimulationWrapper, info: Dict[str, Any]) -> None: - """ - Pass some information for the value observation. - TODO: DEBUG/REMOVE. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :param info: Arbitrary information dictionary, for passing information between modules. - """ - assert len(self._reward_history) > 0 - - current_iteration = simulation_wrapper.current_planner_input.iteration.index - num_simulation_iterations = simulation_wrapper.scenario.get_number_of_iterations() - - remaining_time = 1 - (current_iteration / num_simulation_iterations) - remaining_progress = 1 - simulation_wrapper.route_completion - comfort_score = self._reward_history[-1].comfort - ttc_score = self._reward_history[-1].ttc - - info["remaining_time"] = remaining_time - info["remaining_progress"] = remaining_progress - info["comfort_score"] = comfort_score - info["ttc_score"] = ttc_score - - def _calculate_reward_components( - self, simulation_wrapper: SimulationWrapper, map_cache: MapCache, detection_cache: BoxDetectionCache - ) -> DefaultRewardComponents: - """ - Internal method to calculate the reward components based on the current simulation state. - :param simulation_wrapper: Complete simulation wrapper object used for gym simulation. - :param map_cache: Cache map elements in the environment area. - :param detection_cache: Cached objects for detection tracks in the current simulation step. - :return: dataclass containing the reward components. - """ - ego_states = simulation_wrapper.current_planner_input.history.ego_states - current_ego_state = ego_states[-1] - - component_dict: Dict[str, Union[bool, float]] = {} - - current_lane, intersecting_lanes = _find_current_and_intersecting_lanes(current_ego_state, map_cache) - - # ---------------- Route Completion ---------------- - if self._config.route_completion_type is not None: - if self._config.route_completion_type == "human": - component_dict["route_completion"] = calculate_route_completion_human(ego_states, simulation_wrapper) - elif self._config.route_completion_type == "mean": - component_dict["route_completion"] = calculate_route_completion_mean(ego_states, simulation_wrapper) - elif self._config.route_completion_type == "nuplan": - ( - component_dict["route_completion"], - progress_cache, - ) = calculate_route_completion_nuplan(ego_states, simulation_wrapper, self._progress_cache) - self._progress_cache = progress_cache - else: - raise ValueError(f"Invalid route completion type: {self._config.route_completion_type}") - - # ---------------- Hard Constraints ---------------- - - # 5. Off road - component_dict["off_road"], in_multiple_lanes = _calculate_off_road( - current_ego_state, - map_cache, - intersecting_lanes, - self._config.off_road_violation_threshold, - ) - in_multiple_lanes_or_offroad = in_multiple_lanes or component_dict["off_road"] - - # 1. Is ego blocked for 90s - # component_dict["blocked"] = _calculate_blocked() # Not implemented - - # 2. Red light infraction - if self._config.red_light_type is not None: - if self._config.red_light_type == "v1": - ( - component_dict["red_light"], - self._expert_red_light_infractions, - ) = _calculate_red_light( - simulation_wrapper, - map_cache, - current_lane, - self._expert_red_light_infractions, - ) - else: - raise ValueError(f"Invalid red light type: {self._config.red_light_type}") - - # 3. Collision - if self._config.collision_type is not None: - if self._config.collision_type == "all": - collision, collided_track_tokens = calculate_all_collisions( - current_ego_state, - detection_cache.tracked_objects, - self._prev_collided_track_tokens, - ) - elif self._config.collision_type == "non_stationary": - collision, collided_track_tokens = calculate_non_stationary_collisions( - current_ego_state, - detection_cache.tracked_objects, - self._prev_collided_track_tokens, - ) - elif self._config.collision_type == "at_fault": - collision, collided_track_tokens = calculate_at_fault_collision( - current_ego_state, - detection_cache.tracked_objects, - self._prev_collided_track_tokens, - in_multiple_lanes_or_offroad, - ) - else: - raise ValueError(f"Invalid collision type: {self._config.collision_type}") - - component_dict["collision"] = collision - self._prev_collided_track_tokens.extend(collided_track_tokens) - - # 4. Stop signs - # component_dict["stop_sign"] = _calculate_stop_sign() # Not implemented - - # ---------------- Soft Constraints ---------------- - - # 1. Lane Distance - if self._config.lane_distance_type is not None: - if self._config.lane_distance_type == "v1": - component_dict["lane_distance"] = _calculate_lane_distance( - current_ego_state, - current_lane, - self._config.lane_distance_violation_threshold, - ) - else: - raise ValueError(f"Invalid lane distance type: {self._config.lane_distance_type}") - - # 2. Driving too fast - component_dict["too_fast"] = _calculate_too_fast(current_ego_state, current_lane) - - # 3. Driving off route - if self._config.off_route_type is not None: - if self._config.off_route_type == "v1": - component_dict["off_route"] = calculate_off_route_v1(simulation_wrapper, map_cache) - elif self._config.off_route_type == "v2": - component_dict["off_route"] = calculate_off_route_v2(simulation_wrapper, map_cache) - else: - raise ValueError(f"Invalid off route type: {self._config.off_route_type}") - - # 4. comfort - if self._config.comfort_type is not None: - comfort_results = None - if self._config.comfort_type == "action_delta": - component_dict["comfort"] = calculate_action_delta_comfort(simulation_wrapper) - elif self._config.comfort_type == "kinematics": - ( - component_dict["comfort"], - comfort_results, - ) = calculate_kinematics_comfort(simulation_wrapper) - elif self._config.comfort_type == "kinematics_legacy": - ( - component_dict["comfort"], - comfort_results, - ) = calculate_kinematics_comfort_legacy(simulation_wrapper) - elif self._config.comfort_type == "kinematics_history": - ( - component_dict["comfort"], - comfort_results, - ) = calculate_kinematics_history_comfort(simulation_wrapper) - elif self._config.comfort_type == "kinematics_fixed": - ( - component_dict["comfort"], - comfort_results, - ) = calculate_kinematics_comfort_fixed(simulation_wrapper) - else: - raise ValueError(f"Invalid comfort type: {self._config.comfort_type}") - - if comfort_results is not None: - self._comfort_values.append(comfort_results) - - # 5. Time to collision - if self._config.ttc_type is not None: - ttc_failed_previously = any([reward.ttc < SUCCESS_TTC for reward in self._reward_history]) - if ttc_failed_previously: - component_dict["ttc"] = FAIL_TTC - elif self._config.ttc_type == "v1": - component_dict["ttc"] = calculate_ttc_v1(simulation_wrapper) - elif self._config.ttc_type == "v2": - component_dict["ttc"] = calculate_ttc_v2( - simulation_wrapper, - self._prev_collided_track_tokens, - in_multiple_lanes_or_offroad, - ) - else: - raise ValueError(f"Invalid ttc type: {self._config.ttc_type}") - - return DefaultRewardComponents(**component_dict) - - -def _calculate_blocked() -> bool: - """Placeholder for blocked calculation. TODO: remove.""" - return False - - -def _calculate_red_light( - simulation_wrapper: SimulationWrapper, - map_cache: MapCache, - current_lane: Optional[LaneGraphEdgeMapObject], - expert_red_light_infractions: List[str], -) -> Tuple[bool, List[str]]: - """ - Calculates the red light infraction based in the current iteration. - TODO: Refactor this method. - :param simulation_wrapper: Wrapper object containing complete nuPlan simulation. - :param map_cache: Cache map elements in the environment area. - :param current_lane: Lane object aligned to the ego vehicle in the current iteration. - :param expert_red_light_infractions: List of traffic light infractions of the human expert. - :return: Whether the ego vehicle is violating a red light and the updated list of expert red light infractions. - """ - - STOPPED_SPEED_THRESHOLD: float = 5e-02 - - iteration = simulation_wrapper.current_planner_input.iteration.index - ego_state = simulation_wrapper.current_ego_state - expert_ego_state = simulation_wrapper.scenario.get_ego_state_at_iteration(iteration) - - ego_on_lane = current_lane is None or isinstance(current_lane, Lane) - ego_stopped = ego_state.dynamic_car_state.speed < STOPPED_SPEED_THRESHOLD - - if ego_on_lane or ego_stopped: - return False, expert_red_light_infractions - - # add on route checking - red_connectors: Dict[str, LaneConnector] = {} - for connector_id, connector in map_cache.lane_connectors.items(): - if ( - (connector.get_roadblock_id() in map_cache.route_lane_group_ids) - and (connector_id in map_cache.traffic_lights.keys()) - and (map_cache.traffic_lights[connector_id] == TrafficLightStatusType.RED) - ): - red_connectors[connector_id] = connector - - red_connector_map = PDMOccupancyMap( - list(red_connectors.keys()), - [connector.polygon for connector in red_connectors.values()], - ) - ego_center_point = Point(*ego_state.center.array) - expert_center_point = Point(*expert_ego_state.center.array) - - ego_intersecting_connectors = red_connector_map.intersects(ego_center_point) - expert_intersecting_connectors = red_connector_map.intersects(expert_center_point) - expert_red_light_infractions.extend(expert_intersecting_connectors) - - non_covered_infractions = list(set(ego_intersecting_connectors) - set(expert_red_light_infractions)) - if len(non_covered_infractions) > 0: - return True, expert_red_light_infractions - - return False, expert_red_light_infractions - - -def _calculate_stop_sign() -> bool: - """Placeholder for stop sign infraction. TODO: remove.""" - return False - - -def _calculate_off_road( - ego_state: EgoState, - map_cache: MapCache, - intersecting_lanes: List[LaneGraphEdgeMapObject], - violation_threshold: float, -) -> Tuple[bool, bool]: - """ - Calculates whether the ego vehicle is off-road based on its corners and the drivable area map. - :param ego_state: Ego vehicle state of the current iteration. - :param map_cache: Cache map elements in the environment area. - :param intersecting_lanes: List of lanes that intersect with the ego vehicle's position. - :param violation_threshold: Threshold distance to consider a corner as off-road. - :return: Whether the ego vehicle is off-road and whether it is in multiple lanes. - """ - - drivable_area_map = map_cache.drivable_area_map - ego_corners = np.array( - [[point.x, point.y] for point in ego_state.agent.box.all_corners()], - dtype=np.float64, - ) - corner_in_polygons = drivable_area_map.points_in_polygons(ego_corners) # (geom, 4) - - polygon_indices = np.where(corner_in_polygons.sum(axis=-1) > 0)[0] - corners_dwithin_polygons = corner_in_polygons.sum(axis=0) > 0 - - if violation_threshold > 0.0 and not np.all(corners_dwithin_polygons): - ego_polygons = [drivable_area_map.geometries[idx] for idx in polygon_indices] - ego_polygons.extend([lane.polygon for lane in intersecting_lanes]) - - for corner_idx in np.where(~corners_dwithin_polygons)[0]: - distances = [polygon.distance(Point(*ego_corners[corner_idx])) for polygon in ego_polygons] - if len(distances) > 0 and min(distances) < violation_threshold: - corners_dwithin_polygons[corner_idx] = True - - off_road = not np.all(corners_dwithin_polygons) - in_multiple_lanes = len(polygon_indices) > 1 - - return off_road, in_multiple_lanes - - -def _find_current_and_intersecting_lanes( - ego_state: EgoState, - map_cache: MapCache, -) -> Tuple[Optional[LaneGraphEdgeMapObject], List[LaneGraphEdgeMapObject]]: - """ - Helper function to find the current lane and intersecting lanes based on the ego vehicle's state. - TODO: Refactor this method. - :param ego_state: Ego vehicle state of the current iteration. - :param map_cache: Cache map elements in the environment area. - :return: Tuple of - - current_lane: The lane that the ego vehicle is currently on, or None if not found. - - intersecting_lanes: List of lanes that intersect with the ego vehicle's position. - """ - - current_lane: Optional[LaneGraphEdgeMapObject] = None - - # store lanes and lane connectors in common dict - lanes_dict: Dict[str, LaneGraphEdgeMapObject] = { - lane.id: lane for lane in list(map_cache.lanes.values()) + list(map_cache.lane_connectors.values()) - } - - # find intersecting lanes - lane_polygons: List[Polygon] = [lane.polygon for lane in lanes_dict.values()] - lane_map = PDMOccupancyMap(list(lanes_dict.keys()), lane_polygons) - ego_center_point = Point(ego_state.center.x, ego_state.center.y) - intersecting_lanes_ids = lane_map.intersects(ego_center_point) - intersecting_lanes = [lanes_dict[lane_id] for lane_id in intersecting_lanes_ids] - - def _calculate_heading_error(ego_pose: StateSE2, lane: LaneGraphEdgeMapObject) -> float: - """Calculate the heading error of the ego vehicle with respect to the lane.""" - - # calculate nearest state on baseline - lane_se2_array = states_se2_to_array(lane.baseline_path.discrete_path) - lane_distances = np.linalg.norm(ego_pose.point.array[None, ...] - lane_se2_array[..., :2], axis=-1) - - # calculate heading error - heading_error = lane.baseline_path.discrete_path[np.argmin(lane_distances)].heading - ego_pose.heading - heading_error = np.abs(normalize_angle(heading_error)) - - return heading_error - - if len(intersecting_lanes_ids) > 0: - lane_route_errors: Dict[str, float] = {} - lane_errors: Dict[str, float] = {} - - for lane_id in intersecting_lanes_ids: - lane = lanes_dict[lane_id] - heading_error = _calculate_heading_error(ego_state.center, lane) - if lane.get_roadblock_id() in map_cache.route_lane_group_ids: - lane_route_errors[lane_id] = heading_error - lane_errors[lane_id] = heading_error - - # Search for lanes on route first - if len(lane_route_errors) > 0: - current_lane = lanes_dict[min(lane_route_errors, key=lane_route_errors.get)] - - else: # Fallback to all intersecting lanes - current_lane = lanes_dict[min(lane_errors, key=lane_errors.get)] - - return current_lane, intersecting_lanes - - -def _calculate_lane_distance( - ego_state: EgoState, - current_lane: Optional[LaneGraphEdgeMapObject], - lane_distance_violation_threshold: float = 0.5, -) -> float: - """ - Calculates the distance of the ego vehicle to the center of the current lane. - Normalizes the distance to a value between 0 and 1. - :param ego_state: Ego vehicle state of the current iteration. - :param current_lane: Lane object aligned to the ego vehicle in the current iteration. - :param Normed distance, for which ego is not penalized: _description_, defaults to 0.5 - :return: Normed reward distance between 0 and 1, where 1 is the best value. - """ - - if current_lane is not None and isinstance(current_lane, Lane): - ego_center_point = Point(ego_state.center.x, ego_state.center.y) - center_distance, left_distance, right_distance = ( - ego_center_point.distance(current_lane.baseline_path.linestring), - ego_center_point.distance(current_lane.left_boundary.linestring), - ego_center_point.distance(current_lane.right_boundary.linestring), - ) - - # assumes that the ego center is in lane polygon - center_distance_norm = (center_distance - lane_distance_violation_threshold) / ( - (center_distance + np.minimum(left_distance, right_distance) - lane_distance_violation_threshold) + 1e-6 - ) - center_distance_norm = np.clip(center_distance_norm, 0, 1) - return 1.0 - (center_distance_norm * 0.5) - - return 1.0 - - -def _calculate_too_fast( - ego_state: EgoState, - current_lane: Optional[LaneGraphEdgeMapObject], - max_overspeed_value_threshold: float = 2.23, -) -> float: - """ - Calculates the speed of the ego vehicle in relation to the speed limit of the current lane. - :param ego_state: Ego vehicle state of the current iteration. - :param current_lane: Lane object aligned to the ego vehicle in the current iteration. - :param max_overspeed_value_threshold: max exceeding value for linear penalty, defaults to 2.23 - :return: Reward value between 0 and 1, where 1 is the best value. - """ - - # Adding a small tolerance to handle cases where max_overspeed_value_threshold is specified as 0 - max_overspeed_value_threshold_ = max(max_overspeed_value_threshold, 1e-3) - - if current_lane is not None: - speed_limit = current_lane.speed_limit_mps - if speed_limit is not None: - exceeding_speed = ego_state.dynamic_car_state.speed - speed_limit - if exceeding_speed > 0.0: - violation_loss = exceeding_speed / max_overspeed_value_threshold_ - return float(max(0.0, 1.0 - violation_loss)) - return 1.0 diff --git a/d123/simulation/gym/environment/scenario_sampler/abstract_scenario_sampler.py b/d123/simulation/gym/environment/scenario_sampler/abstract_scenario_sampler.py deleted file mode 100644 index 849163ae..00000000 --- a/d123/simulation/gym/environment/scenario_sampler/abstract_scenario_sampler.py +++ /dev/null @@ -1,25 +0,0 @@ -from abc import ABC, abstractmethod -from typing import List, Optional - -from nuplan.planning.scenario_builder.abstract_scenario import AbstractScenario - - -class AbstractScenarioSampler(ABC): - """Abstract class for sampling scenarios in a Gym simulation environment.""" - - @abstractmethod - def sample(self, seed: Optional[int] = None) -> AbstractScenario: - """ - Samples a single scenario. - :param seed: Optional seed used for sampling, defaults to None - :return: Scenario interface of nuPlan. - """ - - @abstractmethod - def sample_batch(self, batch_size: int, seed: Optional[int] = None) -> List[AbstractScenario]: - """ - Samples a batch of scenarios. - :param batch_size: number of scenarios to sample. - :param seed: Optional seed used for sampling, defaults to None - :return: List of scenario interfaces of nuPlan. - """ diff --git a/d123/simulation/gym/environment/scenario_sampler/cache_scenario_sampler.py b/d123/simulation/gym/environment/scenario_sampler/cache_scenario_sampler.py deleted file mode 100644 index b1febeb5..00000000 --- a/d123/simulation/gym/environment/scenario_sampler/cache_scenario_sampler.py +++ /dev/null @@ -1,61 +0,0 @@ -from logging import getLogger -from typing import List, Optional - -import numpy as np -from nuplan.planning.scenario_builder.abstract_scenario import AbstractScenario - -from d123.simulation.gym.cache.gym_scenario_cache import GymScenarioCache -from d123.simulation.gym.environment.scenario_sampler.abstract_scenario_sampler import AbstractScenarioSampler - -logger = getLogger(__name__) - - -class CacheScenarioSampler(AbstractScenarioSampler): - """ - Scenario sampler that loads scenarios from the Gym cache structure. - NOTE: It is possible to implement a scenario sampler from nuPlan SQL database, but not included in the code. - We tried that. It was too slow. - """ - - def __init__( - self, log_names: List[str], cache_path: str, format: str = "gz", ignore_log_names: bool = False - ) -> None: - """ - Initializes the CacheScenarioSampler. - :param log_names: Log names to include during training. - :param cache_path: Path to the cache directory where scenarios are saved. - :param format: Format of the scenario cache (i.e. gzip), defaults to "gz" - """ - - self._log_names = log_names - self._scenario_cache = GymScenarioCache(cache_path, format) - - # NOTE: Additional conditions (e.g. depending on scenario type) could be added heres - if ignore_log_names: - logger.warning( - "Ignoring provided log names: all scenarios from the cache will be loaded. " - "This may lead to training on validation/test splits if the cache was not properly filtered. " - "Ensure your cache only contains appropriate training scenarios." - ) - self._file_paths = self._scenario_cache.file_paths - else: - self._file_paths = [ - file_path - for file_path, log_name in zip(self._scenario_cache.file_paths, self._scenario_cache.log_names) - if log_name in self._log_names - ] - - def sample(self, seed: Optional[int] = None) -> AbstractScenario: - """Inherited, see super class.""" - return self.sample_batch(1, seed=seed)[0] - - def sample_batch(self, batch_size: int, seed: Optional[int] = None) -> List[AbstractScenario]: - """Inherited, see super class.""" - rng = np.random.default_rng(seed=seed) - indices = rng.choice(len(self._file_paths), size=batch_size) - - scenarios: List[AbstractScenario] = [] - for idx in indices: - file_path = self._file_paths[idx] - scenarios.append(self._scenario_cache.load_scenario(file_path)) - return scenarios diff --git a/d123/simulation/gym/environment/simulation_builder/abstract_simulation_builder.py b/d123/simulation/gym/environment/simulation_builder/abstract_simulation_builder.py deleted file mode 100644 index 9aed2f8f..00000000 --- a/d123/simulation/gym/environment/simulation_builder/abstract_simulation_builder.py +++ /dev/null @@ -1,16 +0,0 @@ -from abc import ABC, abstractmethod - -from nuplan.planning.scenario_builder.abstract_scenario import AbstractScenario -from nuplan.planning.simulation.simulation import Simulation - - -class AbstractSimulationBuilder(ABC): - """Abstract class for building a nuPlan simulation object, which includes background traffic, etc.""" - - @abstractmethod - def build_simulation(self, scenario: AbstractScenario) -> Simulation: - """ - Builds a nuPlan simulation object. - :param scenario: Scenario interface of nuPlan. - :return: Simulation object of nuPlan. - """ diff --git a/d123/simulation/gym/environment/simulation_builder/default_simulation_builder.py b/d123/simulation/gym/environment/simulation_builder/default_simulation_builder.py deleted file mode 100644 index 8c8818b8..00000000 --- a/d123/simulation/gym/environment/simulation_builder/default_simulation_builder.py +++ /dev/null @@ -1,91 +0,0 @@ -import random - -from carl_nuplan.planning.simulation.controller.one_stage_controller import OneStageController - -# TODO: refactor for general motion models, observations, etc. -from nuplan.common.actor_state.vehicle_parameters import get_pacifica_parameters -from nuplan.planning.scenario_builder.abstract_scenario import AbstractScenario -from nuplan.planning.simulation.controller.motion_model.kinematic_bicycle import KinematicBicycleModel -from nuplan.planning.simulation.observation.idm_agents import IDMAgents -from nuplan.planning.simulation.observation.tracks_observation import TracksObservation -from nuplan.planning.simulation.simulation import Simulation -from nuplan.planning.simulation.simulation_setup import SimulationSetup -from nuplan.planning.simulation.simulation_time_controller.step_simulation_time_controller import ( - StepSimulationTimeController, -) - -from d123.simulation.gym.environment.simulation_builder.abstract_simulation_builder import ( - AbstractSimulationBuilder, -) - - -class DefaultSimulationBuilder(AbstractSimulationBuilder): - """Default simulation builder for CaRL.""" - - def __init__(self, agent_type: str = "tracks") -> None: - """ - Initializes the DefaultSimulationBuilder. - NOTE: Using "tracks" is by far the fastest option and recommended for testing and experimentation. - The IDM implementation of nuPlan is very slow but could be improved if required. - :param agent_type: whether to use tracks (log-replay), idm agents, a mixture, or no background, defaults to "tracks" - """ - # TODO: use Literal typing. - assert agent_type in ["tracks", "idm_agents", "mixed", "no_tracks"] - - self._agent_type = agent_type - self._callback = None - - self._idm_agents_probability = 0.5 - self._history_buffer_duration = 1.0 # [s] - - def build_simulation(self, scenario: AbstractScenario) -> Simulation: - """Inherited, see superclass.""" - simulation_setup = self._build_simulation_setup(scenario) - return Simulation( - simulation_setup=simulation_setup, - callback=self._callback, - simulation_history_buffer_duration=self._history_buffer_duration, - ) - - def _build_simulation_setup(self, scenario: AbstractScenario) -> SimulationSetup: - """ - Helper function to build the simulation setup from a scenario. - :param scenario: Scenario interface of nuPlan. - :return: SimulationSetup object of nuPlan. - """ - - time_controller = StepSimulationTimeController(scenario) - - if self._agent_type == "mixed": - use_idm_agents = random.random() < self._idm_agents_probability - agent_type = "idm_agents" if use_idm_agents else "tracks" - else: - agent_type = self._agent_type - - if agent_type == "tracks": - observations = TracksObservation(scenario) - elif agent_type == "idm_agents": - observations = IDMAgents( - scenario=scenario, - target_velocity=10, - min_gap_to_lead_agent=1.0, - headway_time=1.5, - accel_max=1.0, - decel_max=2.0, - open_loop_detections_types=[ - "PEDESTRIAN", - "BARRIER", - "CZONE_SIGN", - "TRAFFIC_CONE", - "GENERIC_OBJECT", - ], - minimum_path_length=20, - planned_trajectory_samples=None, - planned_trajectory_sample_interval=None, - radius=100, - ) - - motion_model = KinematicBicycleModel(get_pacifica_parameters()) - ego_controller = OneStageController(scenario, motion_model) - - return SimulationSetup(time_controller, observations, ego_controller, scenario) diff --git a/d123/simulation/gym/environment/simulation_wrapper.py b/d123/simulation/gym/environment/simulation_wrapper.py deleted file mode 100644 index 4ffa8de6..00000000 --- a/d123/simulation/gym/environment/simulation_wrapper.py +++ /dev/null @@ -1,160 +0,0 @@ -# TODO: refactor and maybe move in environment wrapper -from functools import cached_property -from typing import List, Optional, Tuple - -import numpy as np -from nuplan.common.actor_state.ego_state import EgoState -from nuplan.planning.scenario_builder.abstract_scenario import AbstractScenario -from nuplan.planning.simulation.history.simulation_history import AbstractTrajectory -from nuplan.planning.simulation.planner.abstract_planner import ( - PlannerInitialization, - PlannerInput, -) -from nuplan.planning.simulation.simulation import Simulation -from shapely.creation import linestrings -from shapely.geometry import LineString - - -class SimulationWrapper: - """ - Helper object to wrap the nuPlan simulation and provide additional functionality. - TODO: - - Refactor this class. - - Move route completion logic into the reward builder. - """ - - def __init__(self, simulation: Simulation): - """ - Initializes the SimulationWrapper. - :param simulation: Simulation object of nuPlan to wrap.I - """ - - self._simulation: Simulation = simulation - self._route_completion: float = 0.0 - self._history_trajectories: List[AbstractTrajectory] = [] - - # lazy loaded - self._planner_initialization: Optional[PlannerInitialization] - self._planner_input: Optional[PlannerInput] - self._simulation_ego_states: List[EgoState] = [] - - def initialize(self) -> Tuple[PlannerInput, PlannerInitialization]: - """ - Initializes the simulation and returns the planner input and initialization. - :return: Tuple containing the planner input and initialization according to the nuPlan interface. - """ - self._planner_initialization = self._simulation.initialize() - self._planner_input = self._simulation.get_planner_input() - self._simulation_ego_states.append(self._planner_input.history.current_state[0]) - return self._planner_input, self._planner_initialization - - def step(self, trajectory: AbstractTrajectory) -> Tuple[PlannerInput, bool]: - """ - Propagates the simulation and returns the new planner input. - :return: Tuple containing the planner input and whether the simulation is running. - """ - assert self._planner_initialization is not None, "SimulationManager: Call .initialize() first!" - self._history_trajectories.append(trajectory) - self._simulation.propagate(trajectory) - self._planner_input = self._simulation.get_planner_input() - self._simulation_ego_states.append(self._planner_input.history.current_state[0]) - output = self._planner_input, self.is_simulation_running() - return output - - def is_simulation_running(self) -> bool: - """ - Checks if the simulation is still running. - :return: boolean. - """ - iteration = self._simulation._time_controller.get_iteration().index - num_iterations = self._simulation._time_controller.number_of_iterations() - return iteration != num_iterations - 2 - - @property - def simulation(self) -> Simulation: - """ - :return: Simulation used by the SimulationRunner - """ - assert self._simulation is not None - return self._simulation - - @property - def history_trajectories(self) -> List[AbstractTrajectory]: - """ - :return: Simulation used by the SimulationRunner - """ - assert len(self._history_trajectories) > 0 - return self._history_trajectories - - @property - def scenario(self) -> AbstractScenario: - """ - :return: Get the scenario relative to the simulation. - """ - assert self._simulation is not None - return self._simulation.scenario - - @property - def planner_initialization(self) -> PlannerInitialization: - """ - :return: Get the current planner initialization object. - """ - assert self._planner_initialization is not None - return self._planner_initialization - - @property - def simulation_ego_states(self) -> List[EgoState]: - """ - :return: list of ego states from the simulation - """ - return self._simulation_ego_states - - @property - def current_planner_input(self) -> PlannerInput: - """ - :return: Get the current planner initialization object. - """ - assert self._planner_input is not None - return self._planner_input - - @property - def current_ego_state(self) -> EgoState: - """ - :return: Current ego state from the simulation. - """ - assert self._planner_input is not None - ego_state, _ = self._planner_input.history.current_state - return ego_state - - @property - def initial_ego_state(self) -> EgoState: - """ - :return: Initial ego state from the simulation. - """ - return self.scenario.initial_ego_state - - @cached_property - def ego_linestring(self) -> LineString: - """ - Creates a linestring from the human ego states of the simulation. - TODO: remove this function. - :return: Shapely linestring of the human ego states. - """ - ego_states = list(self.scenario.get_expert_ego_trajectory()) - ego_centers = np.array([ego_state.center.array for ego_state in ego_states]) - return linestrings(ego_centers) - - @property - def route_completion(self): - """ - TODO: remove this function. Move to reward builder. - :return: The current route completion of the simulation [m]. - """ - return self._route_completion - - def update_route_completion(self, new_route_completion: float) -> None: - """ - TODO: remove this function. Move to reward builder. - """ - assert 0 <= new_route_completion <= 1 - self._route_completion = max(self._route_completion, new_route_completion) diff --git a/d123/simulation/gym/gym_env.py b/d123/simulation/gym/gym_env.py deleted file mode 100644 index b1ce0746..00000000 --- a/d123/simulation/gym/gym_env.py +++ /dev/null @@ -1,86 +0,0 @@ -from typing import List, Optional, Tuple - -import numpy as np -import numpy.typing as npt - -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE2, EgoStateSE2 -from d123.common.geometry.vector import Vector2D -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.controller.motion_model.kinematic_bicycle_model import KinematicBicycleModel -from d123.simulation.observation.abstract_observation import AbstractObservation -from d123.simulation.observation.log_replay_observation import LogReplayObservation - - -class GymEnvironment: - """ - A simple demo environment for testing purposes. - This class is a placeholder and does not implement any specific functionality. - """ - - def __init__(self, scenes: List[AbstractScene]) -> None: - - self._scenes = scenes - self._current_iteration = 0 - self._current_scene: Optional[AbstractScene] = None - self._current_ego_state_se2: Optional[EgoStateSE2] = None - - # self._observation: AbstractObservation = AgentsObservation(None) - self._observation: AbstractObservation = LogReplayObservation() - self._observation.initialize() - - self._ego_replay: bool = False - - def reset(self, scene: Optional[AbstractScene]) -> Tuple[AbstractMap, EgoStateSE2, DetectionRecording]: - """ - Reset the environment to the initial state. - Returns a tuple containing the map, ego vehicle state, and detection observation. - """ - if scene is not None: - self._current_scene = scene - else: - self._current_scene = np.random.choice(self._scenes) - - self._current_scene_index = 0 - self._current_ego_state_se2 = self._current_scene.get_ego_state_at_iteration( - self._current_scene_index - ).ego_state_se2 - detection_observation = self._observation.reset(self._current_scene) - - return self._current_scene.map_api, self._current_ego_state_se2, detection_observation, self._current_scene - - def step(self, action: npt.NDArray[np.float64]) -> Tuple[EgoStateSE2, DetectionRecording, bool]: - self._current_scene_index += 1 - if self._ego_replay: - self._current_ego_state_se2 = self._current_scene.get_ego_state_at_iteration( - self._current_scene_index - ).ego_state_se2 - else: - dynamic_car_state = dynamic_state_from_action(ego_state=self._current_ego_state_se2, action=action) - next_timepoint = self._current_scene.get_timepoint_at_iteration(self._current_scene_index) - self._current_ego_state_se2 = KinematicBicycleModel().step( - self._current_ego_state_se2, dynamic_car_state, next_timepoint - ) - - detection_observation = self._observation.step() - is_done = self._current_scene_index == self._current_scene.get_number_of_iterations() - 1 - - return self._current_ego_state_se2, detection_observation, is_done - - -def dynamic_state_from_action(ego_state: EgoStateSE2, action: npt.NDArray[np.float64]) -> DynamicStateSE2: - """ - Convert the action to a dynamic car state. - """ - # Assuming action is in the form [acceleration, steering_angle] - long_acceleration = action[0] - tire_steering_rate = action[1] - - return DynamicStateSE2( - velocity=ego_state.dynamic_state_se2.velocity, - acceleration=Vector2D(long_acceleration, 0.0), - angular_velocity=ego_state.dynamic_state_se2.angular_velocity, - tire_steering_rate=tire_steering_rate, - angular_acceleration=0.0, - ) diff --git a/d123/simulation/gym/policy/ppo/ppo_config.py b/d123/simulation/gym/policy/ppo/ppo_config.py deleted file mode 100644 index 9606031c..00000000 --- a/d123/simulation/gym/policy/ppo/ppo_config.py +++ /dev/null @@ -1,282 +0,0 @@ -""" -Config class that contains all the hyperparameters needed to build any model. -""" - -import numpy as np - - -class GlobalConfig: - """ - Config class that contains all the hyperparameters needed to build any model. - """ - - def __init__(self): - self.frame_rate = 10.0 # Frames per second of the CARLA simulator - self.time_interval = 1.0 / self.frame_rate # ms per step in CARLA time. - - self.pixels_per_meter = 5.0 # 1 / pixels_per_meter = size of pixel in meters - self.bev_semantics_width = 192 # Numer of pixels the bev_semantics is wide - self.pixels_ev_to_bottom = 40 - self.bev_semantics_height = 192 # Numer of pixels the bev_semantics is high - # Distance of traffic lights considered relevant (in meters) - self.light_radius = 15.0 - self.debug = False # Whether to turn on debugging functions, like visualizations. - self.logging_freq = 10 # Log every 10 th frame - self.logger_region_of_interest = 30.0 # Meters around the car that will be logged. - self.route_points = 10 # Number of route points to render in logger - - half_second = int(self.frame_rate * 0.5) - # Roach ObsManager config. - self.bev_semantics_obs_config = { - "width_in_pixels": self.bev_semantics_width, - "pixels_ev_to_bottom": self.pixels_ev_to_bottom, - "pixels_per_meter": self.pixels_per_meter, - "history_idx": [ - -3 * half_second - 1, - -2 * half_second - 1, - -1 * half_second - 1, - -0 * half_second - 1, - ], - "scale_bbox": True, - "scale_mask_col": 1.0, - "map_folder": "maps_low_res", - } - self.num_route_points_rendered = 80 # Number of route points rendered into the BEV seg observation. - - # Color format BGR - self.bev_classes_list = ( - (0, 0, 0), # unlabeled - (150, 150, 150), # road - (255, 255, 255), # route - (255, 255, 0), # lane marking - (0, 0, 255), # vehicle - (0, 255, 255), # pedestrian - (255, 255, 0), # traffic light - (160, 160, 0), # stop sign - (0, 255, 0), # speed sign - ) - - # New bev observation parameters - self.use_new_bev_obs = False # Whether to use bev_observation.py instead of chauffeurnet.py - self.bev_semantics_width2 = 192 # Width and height of bev_semantic image - self.pixels_ev_to_bottom2 = 40 # Number of pixels from the ego vehicle to the bottom of the input. - self.pixels_per_meter2 = 5.0 # 1 / pixels_per_meter = size of pixel in meters - self.route_width = 16 # Width of the rendered route in pixel. - self.red_light_thickness = 3 # Width of the red light line - self.use_extra_control_inputs = False # Whether to use extra control inputs such as integral of past steering. - # Rough avg steering angle in degree that the wheel can be set to - # The steering angle for individual wheels is +- 70° and +-48° for the other wheel respectively - self.max_avg_steer_angle = 60.0 - self.condition_outside_junction = True # Whether to render the route outside junctions. - self.use_target_point = False # Whether to input a target point in the measurements. - - self.scale_bbox2 = True # Whether to scale up the bounding boxes extends 1.0 for vehicles, 2.0 for ped. 0.8 max - self.scale_factor_vehicle = 1.0 - self.scale_factor_walker = 2.0 - self.min_ext_bounding_box = 0.8 - self.scale_mask_col2 = 1.0 # Scaling factor for ego vehicle bounding box. - self.map_folder2 = "maps_low_res" # Map folder for the preprocessed map data - self.max_speed_actor = 33.33 # In m/s maximum speed we expect from other actors. = 120 km/h - self.min_speed_actor = -2.67 # In m/s minimum speed we expect from other actors. = -10 km/h - - # Extent of the ego vehicles bounding box - self.ego_extent_x = 2.44619083404541 - self.ego_extent_y = 0.9183566570281982 - self.ego_extent_z = 0.7451388239860535 - - # Roach reward hyperparameters. rr stands for roach reward - self.reward_type = "roach" # Reward function to be used during training. Options: roach, simple_reward - self.use_exploration_suggest = False # Whether to use the exploration loss from roach. - self.rr_maximum_speed = 6.0 # Maximum speed in m/s encouraged by the roach reward function. - self.vehicle_distance_threshold = 15 # Distance in meters within which vehicles are considered for the reward. - self.max_vehicle_detection_number = 10 # Maximum number of vehicles considered for the roach reward. - self.rr_vehicle_proximity_threshold = ( - 9.5 # Threshold within which vehicles are considered hazard in the reward. - ) - # Distance in meters within which pedestrians are considered for the reward. - self.pedestrian_distance_threshold = 15 - self.max_pedestrian_detection_number = 10 # Maximum number of pedestrians considered for the roach reward. - # Threshold within which pedestrians are considered hazard in the reward. - self.rr_pedestrian_proximity_threshold = 9.5 - self.rr_tl_offset = -0.8 * self.ego_extent_x # Probably offset to be kept to the entrance of the intersection. - self.rr_tl_dist_threshold = 18.0 # Distance at which traffic lights are considered for the speed reward. - # Meters. If the agent is father away from the centerline (laterally) it counts as route deviation in the reward - self.min_thresh_lat_dist = 3.5 - self.eval_time = ( - 1200.0 # Seconds. After this time a timeout is triggered in the reward which counts as truncation. - ) - # Number of frames before the end of the episode where the exploration loss is applied. - self.n_step_exploration = 100 - # If true rr_maximum_speed will be overwritten to the current speed limit affecting the ego vehicle. - self.use_speed_limit_as_max_speed = False - - # Simple reward hyperparameters - self.consider_tl = True # If set to false traffic light infractions are turned off. Used in simple reward - self.terminal_reward = 0.0 # Reward at the end of the episode - self.terminal_hint = 10.0 # Reward at the end of the episode when colliding, the number will be subtracted. - self.normalize_rewards = False # Whether to use gymnasiums reward normalization. - self.speeding_infraction = False # Whether to terminate the route if the agent drives too fast. - self.use_comfort_infraction = False # Whether to apply a soft penalty if comfort limits are exceeded - # These values are tuned for the nuPlan dataset - self.max_abs_lon_jerk = 4.13 # m/s^3 Comfort limit for longitudinal jerk - self.max_abs_mag_jerk = 8.37 # m/s^3 Comfort limit for jerk magnitude - self.min_lon_accel = -4.05 # m/s^2 Comfort limit for longitudinal acceleration - self.max_lon_accel = 2.40 # m/s^2 Comfort limit for longitudinal acceleration - self.max_abs_lat_accel = 4.89 # m/s^2 Comfort limit for lateral acceleration - self.max_abs_yaw_rate = 0.95 # rad/s Comfort limit for angular velocity - self.max_abs_yaw_accel = 1.93 # rad/s^2 Comfort limit for angular yaw acceleration - self.use_vehicle_close_penalty = False # Whether to use a penalty for being too close to the front vehicle. - # Whether to give a penalty depending on vehicle speed when crashing or running red light - self.use_termination_hint = False - self.ego_forecast_time = 1.0 # Number of seconds that the ego agent is forecasted. - self.ego_forecast_min_speed = 2.5 # In m/s. Minimum speed in the ego forecast. - self.use_perc_progress = False # Whether to multiply RC reward by percentage away from lane center. - self.use_min_speed_infraction = ( - False # Whether to penalize the agent for driving slower than other agents on avg. - ) - self.use_leave_route_done = True # Whether to terminate the route when leaving the precomputed path. - self.use_outside_route_lanes = ( - False # Whether to terminate the route when invading opposing lanes or sidewalks. - ) - self.use_max_change_penalty = False # Whether to apply a soft penalty when the action changes too fast. - self.max_change = 0.25 # Maximum change in action allowed compared to last frame before a penalty is applied - self.penalize_yellow_light = True # Whether to penalize running a yellow light. - - # How often an action is repeated. - self.action_repeat = 1 - - self.num_value_measurements = 4 # Number of measurements exclusive to the value head. - - # Action and observation space - self.obs_num_measurements = 10 # Number of scalar measurements in observation. - self.obs_num_channels = 9 # Number of channels in the bev observation. - - # ###### Distribution parameters ############ - self.distribution = "beta" # Distribution used for the action space. Options beta, normal, beta_uni_mix - # Minimum value for a, b of the beta distribution that the model can predict. Gets added to the softplus output. - self.beta_min_a_b_value = 1.0 - - self.normal_dist_init = ( - (0, -2), - (0, -2), - ) # Initial bias parameters of the normal distribution - self.normal_dist_action_dep_std = True # Whether the std of the normal distribution is dependent of the input - - self.uniform_percentage_z = 0.03 # Mixing percentage of uniform distribution in beta_uni_mix - - # We have 2 actions, corresponding to left right steering and negative to positive acceleration. - self.action_space_dim = 2 - self.action_space_min = -1.0 # Minimum value of the action space - self.action_space_max = 1.0 # Maximum value of the action space - # Number of frames at the beginning before learning starts, return brake - self.start_delay_frames = int(2.0 / self.time_interval + 0.5) - - # PPO training hyperparameters - self.experiment_name = "PPO_000" # the name of this experiment - self.gym_id = "NuPlanEnv-v0" # the id of the gym environment - self.learning_rate = 1.0e-5 # the learning rate of the optimizer - self.seed = 1 # seed of the experiment - self.total_timesteps = 10_000_000 # total time steps of the experiments - self.torch_deterministic = True # if toggled, `torch.backends.cudnn.deterministic=False` - self.cuda = True # if toggled, cuda will be enabled by default - self.track = False # if toggled, this experiment will be tracked with Weights and Biases - self.wandb_project_name = "ppo-roach" # the wandb project name - self.wandb_entity = None # the entity (team) of wandb project - self.capture_video = False # whether to capture videos of the agent performances (check out `videos` folder) - self.num_envs = 5 # the number of parallel game environments - self.lr_schedule = "kl" # Which lr schedule to use. Options: (linear, kl, none, step, cosine, cosine_restart) - self.gae = True # Use GAE for advantage computation - self.gamma = 0.99 # the discount factor gamma - self.gae_lambda = 0.95 # the lambda for the general advantage estimation - self.update_epochs = 4 # the K epochs to update the policy - self.norm_adv = False # Toggles advantages normalization - self.clip_coef = 0.1 # the surrogate clipping coefficient - self.clip_vloss = False # Toggles whether to use a clipped loss for the value function, as per the paper. - self.ent_coef = 0.01 # coefficient of the entropy - self.vf_coef = 0.5 # coefficient of the value function - self.max_grad_norm = 0.5 # the maximum norm for the gradient clipping - self.target_kl = 0.015 # the target KL divergence threshold - self.visualize = False # if toggled, Game will render on screen - self.logdir = "" # The directory to log the data into. - self.load_file = None # model weights for initialization - # Ports of the carla_gym wrapper to connect to. It requires to submit a port for every envs ports == --num_envs - self.ports = (1111, 1112, 1113, 1114, 1115) - self.train_gpu_ids = (0,) # Which GPUs to train on. Index 0 indicates GPU for rank 0 etc. - self.compile_model = False # Whether to use torch compile on the model. - self.total_batch_size = 512 # The total amount of data collected at every step across all environments - self.total_minibatch_size = 256 # The total minibatch sized used for training (across all environments) - self.expl_coef = 0.05 # Weight / coefficient of the exploration loss - self.lr_schedule_step = 8 # Number of time the KL divergence can be triggered before the lr reduces. - self.current_learning_rate = self.learning_rate # Learning rate at the latest iteration. - self.kl_early_stop = 0 # Counter that reduces lr once it reaches lr_schedule_step - self.schedule_free = False - self.adam_eps = 1e-5 # Adam optimizer parameter parameter. Standard PPO value is 1e-5 - # Did not observe a significant speedup with these so we turn them off for better numerical precision. - self.allow_tf32 = False # Whether to use tf32 format, which has better speed but lower numeric precision. - self.benchmark = False # Whether to use cudnn benchmarking - self.matmul_precision = "highest" # Options highest float32, high tf32, medium bfloat16 - # Whether to collect data on cpu. This can be a bit faster, since it avoid CPU GPU ping pong, - # at the cost of running the model on the CPU during data collection. - self.cpu_collect = False - # Robust policy optimization https://arxiv.org/abs/2212.07536 - self.use_rpo = False - self.rpo_alpha = 0.5 # Size of the uniform random value that gets added to a, b - self.use_green_wave = False # If true in some routes all TL that the agent encounters are set to green. - self.green_wave_prob = 0.05 # Probability of a route using green wave (if use_green_wave=True) - # You should pick tiny networks for efficiency e.g. convnext_atto.d2_in1k - self.image_encoder = "roach_ln2" # Which image cnn encoder to use. Either roach, or timm model name - self.use_layer_norm = True # Whether to use LayerNorm before ReLU in MLPs. - # Applicable if use_layer_norm=True, whether to also apply layernorm to the policy head. - # Can be useful to remove to allow the policy to predict large values (for a, b of Beta). - self.use_layer_norm_policy_head = True - self.features_dim = 256 # Dimension of features produced by the state encoder - self.use_lstm = False # Whether to use an LSTM after the feature encoder. - self.num_lstm_layers = 1 # How many LSTM layers to use. - - # Whether to let the model predict the next frame as auxiliary task during the training - self.use_world_model_loss = False - # Number of frames to predict ahead with the world model loss. - self.num_future_prediction = int(0.5 * self.frame_rate) - self.world_model_loss_weight = 1.0 # Weight of the world model loss. - self.render_green_tl = True # Whether to render green traffic lights into the observation. - self.lr_schedule_step_factor = 0.1 # Multiplier when doing a step decrease in learning rate - self.lr_schedule_step_perc = ( - 0.5, - 0.75, - ) # Percentage of training run after which the lr is decayed - self.weight_decay = 0.0 # Weight decay applied to optimizer. AdamW is used when > 0.0 - self.lr_schedule_cosine_restarts = ( - 0.0, - 0.25, - 0.50, - 0.75, - 1.0, - ) # Percentage of training to do a restart - # https://arxiv.org/abs/1911.00357 - self.use_dd_ppo_preempt = False # Whether to use the dd-ppo preemption technique to early stop stragglers - self.dd_ppo_preempt_threshold = 0.6 # Percentage of nodes that need to be finished before the rest is stopped. - self.dd_ppo_min_perc = 0.25 # Minimum percentage of data points that need to be collected before preemption. - self.num_envs_per_gpu = 5 # Number of environments to put on one GPU. Only considered for dd_ppo.py - # Percentage of training at which the model is evaluated - self.eval_intervals = (0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9) - self.current_eval_interval_idx = 0 # Helper variable to remember which model to save next. - self.use_temperature = False # Whether the output distribution parameters are divided by a learned temperature - self.min_temperature = 0.1 # Whether the output distribution parameters are divided by a learned temperature - - # Whether to use the histogram loss gauss to train the value head via classification (instead of regression + L2) - self.use_hl_gauss_value_loss = False - self.hl_gauss_std = 0.75 # Standard deviation use for the gaussian histogram loss - self.hl_gauss_vmin = -10.0 # Min value of the histogram in HL_Gauss. Tune to be in return range - self.hl_gauss_vmax = 30.0 # Max value of the histogram in HL_Gauss. Tune to be in return range - self.hl_gauss_bucket_size = 1.0 # Size of each bucket in the HL_Gauss histogram. - self.hl_gauss_num_classes = int((self.hl_gauss_vmax - self.hl_gauss_vmin) / self.hl_gauss_bucket_size) + 1 - - self.global_step = 0 # Current iteration of the training - self.max_training_score = -np.inf # Highest training score achieved so far - self.best_iteration = 0 # Iteration of the best model - self.latest_iteration = 0 # Iteration of the latest model - - def initialize(self, **kwargs): - for k, v in kwargs.items(): - if hasattr(self, k): - setattr(self, k, v) diff --git a/d123/simulation/gym/policy/ppo/ppo_distributions.py b/d123/simulation/gym/policy/ppo/ppo_distributions.py deleted file mode 100644 index 5b8321e6..00000000 --- a/d123/simulation/gym/policy/ppo/ppo_distributions.py +++ /dev/null @@ -1,371 +0,0 @@ -""" -Contains various classes for different probability distributions that sample the actions. -E.g. Gaussian, Beta, Uniform+Beta -""" - -from typing import Tuple - -import torch -from torch import nn -from torch.distributions import Beta, Normal - - -def sum_independent_dims(tensor: torch.Tensor) -> torch.Tensor: - if len(tensor.shape) > 1: - tensor = tensor.sum(dim=1) - else: - tensor = tensor.sum() - return tensor - - -class DiagGaussianDistribution(nn.Module): - """ - Wrapper around the torch Normal distribution with some additional functionality. - """ - - def __init__(self, action_dim: int, dist_init=None, action_dependent_std=False): - super().__init__() - assert action_dim == 2 - - self.distribution = None - self.action_dim = action_dim - self.dist_init = dist_init - self.action_dependent_std = action_dependent_std - - self.low = None - self.high = None - self.log_std_max = 2 - self.log_std_min = -20 - - self.suggest_go = nn.Parameter(torch.FloatTensor([0.66, -3]), requires_grad=False) - self.suggest_stop = nn.Parameter(torch.FloatTensor([-0.66, -3]), requires_grad=False) - self.suggest_turn = nn.Parameter(torch.FloatTensor([0.0, -1]), requires_grad=False) - self.suggest_straight = nn.Parameter(torch.FloatTensor([3.0, 3.0]), requires_grad=False) - - def proba_distribution_net(self, latent_dim: int) -> Tuple[nn.Module, nn.Parameter]: - mean_actions = nn.Linear(latent_dim, self.action_dim) - if self.action_dependent_std: - log_std = nn.Linear(latent_dim, self.action_dim) - else: - log_std = nn.Parameter(-2.0 * torch.ones(self.action_dim), requires_grad=True) - - if self.dist_init is not None: - # log_std.weight.data.fill_(0.01) - # mean_actions.weight.data.fill_(0.01) - # acc/steer - mean_actions.bias.data[0] = self.dist_init[0][0] - mean_actions.bias.data[1] = self.dist_init[1][0] - if self.action_dependent_std: - log_std.bias.data[0] = self.dist_init[0][1] - log_std.bias.data[1] = self.dist_init[1][1] - else: - init_tensor = torch.FloatTensor([self.dist_init[0][1], self.dist_init[1][1]]) - log_std = nn.Parameter(init_tensor, requires_grad=True) - - return mean_actions, log_std - - def proba_distribution(self, mean_actions: torch.Tensor, log_std: torch.Tensor) -> "DiagGaussianDistribution": - if self.action_dependent_std: - log_std = torch.clamp(log_std, self.log_std_min, self.log_std_max) - action_std = torch.ones_like(mean_actions) * log_std.exp() - self.distribution = Normal(mean_actions, action_std) - return self - - def log_prob(self, actions: torch.Tensor) -> torch.Tensor: - log_prob = self.distribution.log_prob(actions) - return sum_independent_dims(log_prob) - - def entropy(self) -> torch.Tensor: - return self.distribution.entropy() - - def exploration_loss(self, exploration_suggests) -> torch.Tensor: - # [('stop'/'go'/None, 'turn'/'straight'/None)] - # (batch_size, action_dim) - mu = self.distribution.loc.detach().clone() - sigma = self.distribution.scale.detach().clone() - - # 0: '', '' - # 1: 'go', '' - # 2: 'go', 'turn' - # 3: 'stop', '' - # TODO see if this can be vectorized - for i, suggest_indx in enumerate(exploration_suggests): - # Index 0 means original - if suggest_indx == 1: # Blocked - # No steer suggest - mu[i, 1] = self.suggest_go[0] - sigma[i, 1] = self.suggest_go[1] - elif suggest_indx == 2: # Route deviation - mu[i, 0] = self.suggest_turn[0] - sigma[i, 0] = self.suggest_turn[1] - - mu[i, 1] = self.suggest_go[0] - sigma[i, 1] = self.suggest_go[1] - elif suggest_indx == 3: # Collision, red light, stop sign - mu[i, 1] = self.suggest_stop[0] - sigma[i, 1] = self.suggest_stop[1] - - dist_ent = Normal(mu, sigma) - - exploration_loss = torch.distributions.kl_divergence(dist_ent, self.distribution) - return torch.mean(exploration_loss) - - def sample(self) -> torch.Tensor: - return self.distribution.rsample() - - def mode(self) -> torch.Tensor: - return self.distribution.mean - - def get_actions(self, deterministic: bool = False) -> torch.Tensor: - if deterministic: - return self.mode() - return self.sample() - - -class BetaDistribution(nn.Module): - """ - Wrapper around the torch Beta distribution with some additional functionality. - """ - - def __init__(self, action_dim=2, dist_init=None): - super().__init__() - assert action_dim == 2 - - self.distribution = None - self.action_dim = action_dim - self.dist_init = dist_init - self.low = 0.0 - self.high = 1.0 - - # [alpha, beta], [0, 1] # Changed order from original repo to have alpha first. - self.suggest_go = nn.Parameter(torch.FloatTensor([2.5, 1.0]), requires_grad=False) - self.suggest_stop = nn.Parameter(torch.FloatTensor([1.0, 1.5]), requires_grad=False) - self.suggest_turn = nn.Parameter(torch.FloatTensor([1.0, 1.0]), requires_grad=False) - - def proba_distribution_net(self, latent_dim: int) -> Tuple[nn.Module, nn.Module]: - - linear_alpha = nn.Linear(latent_dim, self.action_dim) - linear_beta = nn.Linear(latent_dim, self.action_dim) - - if self.dist_init is not None: - # acc - linear_alpha.bias.data[0] = self.dist_init[0][1] - linear_beta.bias.data[0] = self.dist_init[0][0] - # steer - linear_alpha.bias.data[1] = self.dist_init[1][1] - linear_beta.bias.data[1] = self.dist_init[1][0] - - alpha = nn.Sequential(linear_alpha) - beta = nn.Sequential(linear_beta) - return alpha, beta - - def proba_distribution(self, alpha, beta): - self.distribution = Beta(alpha, beta) - return self - - def log_prob(self, actions: torch.Tensor) -> torch.Tensor: - log_prob = self.distribution.log_prob(actions) - return sum_independent_dims(log_prob) - - def entropy(self): - return self.distribution.entropy() - - def exploration_loss(self, exploration_suggests) -> torch.Tensor: - alpha = self.distribution.concentration1.detach().clone() - beta = self.distribution.concentration0.detach().clone() - - # 0: '', '' - # 1: 'go', '' - # 2: 'go', 'turn' - # 3: 'stop', '' - # TODO see if this can be vectorized - for i, suggest_indx in enumerate(exploration_suggests): - # Index 0 means original - if suggest_indx == 1: # Blocked - # No steer suggest - alpha[i, 1] = self.suggest_go[0] - beta[i, 1] = self.suggest_go[1] - elif suggest_indx == 2: # Route deviation - alpha[i, 0] = self.suggest_turn[0] - beta[i, 0] = self.suggest_turn[1] - - alpha[i, 1] = self.suggest_go[0] - beta[i, 1] = self.suggest_go[1] - elif suggest_indx == 3: # Collision, red light, stop sign - alpha[i, 1] = self.suggest_stop[0] - beta[i, 1] = self.suggest_stop[1] - - dist_ent = Beta(alpha, beta) - - exploration_loss = torch.distributions.kl_divergence(self.distribution, dist_ent) - return torch.mean(exploration_loss) - - def sample(self) -> torch.Tensor: - # Reparametrization trick to pass gradients - return self.distribution.rsample() - - def mode(self) -> torch.Tensor: - alpha = self.distribution.concentration1 - beta = self.distribution.concentration0 - x = torch.zeros_like(alpha) - x[:, 1] += 0.5 - mask1 = (alpha > 1) & (beta > 1) - x[mask1] = (alpha[mask1] - 1) / (alpha[mask1] + beta[mask1] - 2) - - mask2 = (alpha <= 1) & (beta > 1) - x[mask2] = 0.0 - - mask3 = (alpha > 1) & (beta <= 1) - x[mask3] = 1.0 - - # mean - mask4 = (alpha <= 1) & (beta <= 1) - x[mask4] = self.distribution.mean[mask4] - - return x - - def evaluate_mean(self) -> torch.Tensor: - return self.distribution.mean - - def get_actions(self, deterministic: bool = False) -> torch.Tensor: - if deterministic: - # return self.mode() # TODO cleanup - return self.evaluate_mean() - return self.sample() - - -class BetaUniformMixtureDistribution(nn.Module): - """ - A Mixture of the Beta and Uniform Distribution. Meant to ease exploration because the distribution has heavier tails - than the beta distribution. In deterministic mode / inference the action is only dependent on the beta distribution. - The Kl divergence between this distribution and past once is only computed based on the Beta part. - PDF = (1-z) * Beta(a,b) + z * Uniform(0,1) - """ - - def __init__(self, action_dim=2, dist_init=None, uniform_percentage_z=0.1): - super().__init__() - assert action_dim == 2 - - self.distribution = None # The current beta distribution - self.uniform_distribution = None - self.action_dim = action_dim - self.dist_init = dist_init - self.low = 0.0 - self.high = 1.0 - self.beta_perc = 1.0 - uniform_percentage_z - self.uniform_perc = uniform_percentage_z - - # [alpha, beta], [0, 1] # Changed order from original repo to have alpha first. - self.suggest_go = nn.Parameter(torch.FloatTensor([2.5, 1.0]), requires_grad=False) - self.suggest_stop = nn.Parameter(torch.FloatTensor([1.0, 1.5]), requires_grad=False) - self.suggest_turn = nn.Parameter(torch.FloatTensor([1.0, 1.0]), requires_grad=False) - - def proba_distribution_net(self, latent_dim: int) -> Tuple[nn.Module, nn.Module]: - - linear_alpha = nn.Linear(latent_dim, self.action_dim) - linear_beta = nn.Linear(latent_dim, self.action_dim) - - if self.dist_init is not None: - # acc - linear_alpha.bias.data[0] = self.dist_init[0][1] - linear_beta.bias.data[0] = self.dist_init[0][0] - # steer - linear_alpha.bias.data[1] = self.dist_init[1][1] - linear_beta.bias.data[1] = self.dist_init[1][0] - - alpha = nn.Sequential(linear_alpha) - beta = nn.Sequential(linear_beta) - return alpha, beta - - def proba_distribution(self, alpha, beta): - self.action_shape = alpha.shape - lower_bound = torch.zeros_like(alpha, requires_grad=False, device=alpha.device) - upper_bound = torch.ones_like(alpha, requires_grad=False, device=alpha.device) - self.uniform_distribution = torch.distributions.uniform.Uniform(lower_bound, upper_bound) - self.distribution = Beta(alpha, beta) - return self - - def log_prob(self, actions: torch.Tensor) -> torch.Tensor: - uniform_pdf = torch.ones_like(actions, device=actions.device, requires_grad=False) - pdf = self.beta_perc * self.distribution.log_prob(actions).exp() + self.uniform_perc * uniform_pdf - log_prob = torch.log(pdf) - return sum_independent_dims(log_prob) - - def entropy(self): - # TODO Since the uniform is constant, maximizing the entropy of the beta is the same as maximizing the entropy of - # the BetaUniMix. But compute real entropy later. - return self.distribution.entropy() - - def exploration_loss(self, exploration_suggests) -> torch.Tensor: - """ - We ignore the uniform in this computation since its constant. - """ - alpha = self.distribution.concentration1.detach().clone() - beta = self.distribution.concentration0.detach().clone() - - # 0: '', '' - # 1: 'go', '' - # 2: 'go', 'turn' - # 3: 'stop', '' - # TODO see if this can be vectorized - for i, suggest_indx in enumerate(exploration_suggests): - # Index 0 means original - if suggest_indx == 1: # Blocked - # No steer suggest - alpha[i, 1] = self.suggest_go[0] - beta[i, 1] = self.suggest_go[1] - elif suggest_indx == 2: # Route deviation - alpha[i, 0] = self.suggest_turn[0] - beta[i, 0] = self.suggest_turn[1] - - alpha[i, 1] = self.suggest_go[0] - beta[i, 1] = self.suggest_go[1] - elif suggest_indx == 3: # Collision, red light, stop sign - alpha[i, 1] = self.suggest_stop[0] - beta[i, 1] = self.suggest_stop[1] - - dist_ent = Beta(alpha, beta) - - exploration_loss = torch.distributions.kl_divergence(self.distribution, dist_ent) - return torch.mean(exploration_loss) - - def sample(self) -> torch.Tensor: - """ - We sample from the mixture distribution by first drawing a uniform random variable in range [0,1]. - If it is > uniform_perc we draw from the beta + otherwise we draw from the uniform distribution. - This is not differentiable, but it doesn't have to be for PPO and other stochastic policy gradient methods. - """ - prob = torch.rand(1) - if prob < self.uniform_perc: - return self.uniform_distribution.rsample() # Uniform - else: - return self.distribution.rsample() # Beta - - def mode(self) -> torch.Tensor: - """ - Uniform is ignored when computing the mode of the distribution. We effectively want to remove the uniform during - inference, since it is only meant to help exploration during training. - """ - alpha = self.distribution.concentration1 - beta = self.distribution.concentration0 - x = torch.zeros_like(alpha) - x[:, 1] += 0.5 - mask1 = (alpha > 1) & (beta > 1) - x[mask1] = (alpha[mask1] - 1) / (alpha[mask1] + beta[mask1] - 2) - - mask2 = (alpha <= 1) & (beta > 1) - x[mask2] = 0.0 - - mask3 = (alpha > 1) & (beta <= 1) - x[mask3] = 1.0 - - # mean - mask4 = (alpha <= 1) & (beta <= 1) - x[mask4] = self.distribution.mean[mask4] - - return x - - def get_actions(self, deterministic: bool = False) -> torch.Tensor: - if deterministic: - return self.mode() - return self.sample() diff --git a/d123/simulation/gym/policy/ppo/ppo_model.py b/d123/simulation/gym/policy/ppo/ppo_model.py deleted file mode 100644 index 911e78f4..00000000 --- a/d123/simulation/gym/policy/ppo/ppo_model.py +++ /dev/null @@ -1,916 +0,0 @@ -""" -Agent architecture from https://github.com/zhejz/carla-roach -""" - -from copy import deepcopy -from typing import Dict, Optional - -import cv2 -import gym -import numpy as np - -# import timm -import torch -from torch import nn - -from d123.simulation.gym.policy.ppo.ppo_config import GlobalConfig -from d123.simulation.gym.policy.ppo.ppo_distributions import ( - BetaDistribution, - BetaUniformMixtureDistribution, - DiagGaussianDistribution, -) - -# class CustomCnn(nn.Module): -# """ -# A custom CNN with timm backbone extractors. -# """ - -# def __init__(self, config, n_input_channels): -# super().__init__() -# self.config = config -# self.image_encoder = timm.create_model( -# config.image_encoder, -# in_chans=n_input_channels, -# pretrained=False, -# features_only=True, -# ) -# final_width = int(self.config.bev_semantics_width / self.image_encoder.feature_info.info[-1]["reduction"]) -# final_height = int(self.config.bev_semantics_height / self.image_encoder.feature_info.info[-1]["reduction"]) -# final_total_pxiels = final_height * final_width -# # We want to output roughly the same amount of features as the roach encoder. -# self.out_channels = int(1024 / final_total_pxiels) -# self.change_channel = nn.Conv2d( -# self.image_encoder.feature_info.info[-1]["num_chs"], -# self.out_channels, -# kernel_size=1, -# ) - -# def forward(self, x): -# x = self.image_encoder(x) -# x = x[-1] -# x = self.change_channel(x) -# x = torch.flatten(x, start_dim=1) -# return x - - -# Input image feature extractor class -class XtMaCNN(nn.Module): - """ - Inspired by https://github.com/xtma/pytorch_car_caring - """ - - def __init__(self, observation_space, states_neurons, config): - super().__init__() - self.features_dim = config.features_dim - self.config = config - - n_input_channels = observation_space["bev_semantics"].shape[0] - - if self.config.image_encoder == "roach": - self.cnn = nn.Sequential( # in [B, 15, 192, 192] - nn.Conv2d(n_input_channels, 8, kernel_size=5, stride=2), # -> [B, 8, 94, 94] - nn.ReLU(), - nn.Conv2d(8, 16, kernel_size=5, stride=2), # -> [B, 16, 45, 45] - nn.ReLU(), - nn.Conv2d(16, 32, kernel_size=5, stride=2), # -> [B, 32, 21, 21] - nn.ReLU(), - nn.Conv2d(32, 64, kernel_size=3, stride=2), # -> [B, 64, 10, 10] - nn.ReLU(), - nn.Conv2d(64, 128, kernel_size=3, stride=2), # -> [B, 128, 4, 4] - nn.ReLU(), - nn.Conv2d(128, 256, kernel_size=3, stride=1), # -> [B, 256, 2, 2] - nn.ReLU(), - ) - elif self.config.image_encoder == "roach_ln": - self.cnn = nn.Sequential( # in [B, 15, 192, 192] - nn.Conv2d(n_input_channels, 8, kernel_size=5, stride=2), # -> [B, 8, 94, 94] - nn.LayerNorm((8, 94, 94)), - nn.ReLU(), - nn.Conv2d(8, 16, kernel_size=5, stride=2), # -> [B, 16, 45, 45] - nn.LayerNorm((16, 45, 45)), - nn.ReLU(), - nn.Conv2d(16, 32, kernel_size=5, stride=2), # -> [B, 32, 21, 21] - nn.LayerNorm((32, 21, 21)), - nn.ReLU(), - nn.Conv2d(32, 64, kernel_size=3, stride=2), # -> [B, 64, 10, 10] - nn.LayerNorm((64, 10, 10)), - nn.ReLU(), - nn.Conv2d(64, 128, kernel_size=3, stride=2), # -> [B, 128, 4, 4] - nn.LayerNorm((128, 4, 4)), - nn.ReLU(), - nn.Conv2d(128, 256, kernel_size=3, stride=1), # -> [B, 256, 2, 2] - nn.LayerNorm((256, 2, 2)), - nn.ReLU(), - ) - elif self.config.image_encoder == "roach_ln2": # input is expected to be [B, C, 256, 256] - self.cnn = nn.Sequential( - nn.Conv2d(n_input_channels, 8, kernel_size=5, stride=2), # -> [B, 8, 126, 126] - nn.LayerNorm((8, 126, 126)), - nn.ReLU(), - nn.Conv2d(8, 16, kernel_size=5, stride=2), # -> [B, 16, 61, 61] - nn.LayerNorm((16, 61, 61)), - nn.ReLU(), - nn.Conv2d(16, 24, kernel_size=5, stride=2), # -> [B, 16, 29, 29] - nn.LayerNorm((24, 29, 29)), - nn.ReLU(), - nn.Conv2d(24, 32, kernel_size=5, stride=2), # -> [B, 32, 13, 13] - nn.LayerNorm((32, 13, 13)), - nn.ReLU(), - nn.Conv2d(32, 64, kernel_size=3, stride=2), # -> [B, 64, 6, 6] - nn.LayerNorm((64, 6, 6)), - nn.ReLU(), - nn.Conv2d(64, 128, kernel_size=3, stride=1), # -> [B, 128, 4, 4] - nn.LayerNorm((128, 4, 4)), - nn.ReLU(), - nn.Conv2d(128, 256, kernel_size=3, stride=1), # -> [B, 256, 2, 2] - nn.LayerNorm((256, 2, 2)), - nn.ReLU(), - ) - # else: - # self.cnn = CustomCnn(config, n_input_channels) - - # Compute shape by doing one forward pass - with torch.no_grad(): - self.cnn_out_shape = self.cnn( - torch.as_tensor(observation_space["bev_semantics"].sample()[None]).float() - ).shape - self.n_flatten = self.cnn_out_shape[1] * self.cnn_out_shape[2] * self.cnn_out_shape[3] - - self.states_neurons = states_neurons[-1] - - if self.config.use_layer_norm: - self.linear = nn.Sequential( - nn.Linear(self.n_flatten + states_neurons[-1], 512), - nn.LayerNorm(512), - nn.ReLU(), - nn.Linear(512, config.features_dim), - nn.LayerNorm(config.features_dim), - nn.ReLU(), - ) - else: - self.linear = nn.Sequential( - nn.Linear(self.n_flatten + states_neurons[-1], 512), - nn.ReLU(), - nn.Linear(512, config.features_dim), - nn.ReLU(), - ) - - states_neurons = [observation_space["measurements"].shape[0]] + list(states_neurons) - self.state_linear = [] - for i in range(len(states_neurons) - 1): - self.state_linear.append(nn.Linear(states_neurons[i], states_neurons[i + 1])) - if self.config.use_layer_norm: - self.state_linear.append(nn.LayerNorm(states_neurons[i + 1])) - self.state_linear.append(nn.ReLU()) - self.state_linear = nn.Sequential(*self.state_linear) - - if self.config.image_encoder == "roach": - self.apply(self._weights_init) - - @staticmethod - def _weights_init(m): - if isinstance(m, nn.Conv2d): - nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain("relu")) - nn.init.constant_(m.bias, 0.1) - - def forward(self, bev_semantics, measurements): - x = self.cnn(bev_semantics) - x = torch.flatten(x, start_dim=1) - latent_state = self.state_linear(measurements) - - x = torch.cat((x, latent_state), dim=1) - x = self.linear(x) - return x - - -class WorldModelDecoder(nn.Module): - """ - Decoder that predicts a next state given features - """ - - def __init__(self, cnn_out_shape, cnn_n_flatten, states_neurons, features_dim, config): - super().__init__() - self.cnn_out_shape = cnn_out_shape - self.cnn_n_flatten = cnn_n_flatten - self.states_neurons = states_neurons - self.features_dim = features_dim - self.config = config - - if self.config.use_layer_norm: - self.linear_decoder = nn.Sequential( - nn.Linear(features_dim, 512), - nn.LayerNorm(512), - nn.ReLU(), - nn.Linear(512, cnn_n_flatten + states_neurons), - nn.LayerNorm(cnn_n_flatten + states_neurons), - nn.ReLU(), - ) - else: - self.linear_decoder = nn.Sequential( - nn.Linear(features_dim, 512), - nn.ReLU(), - nn.Linear(512, cnn_n_flatten + states_neurons), - nn.ReLU(), - ) - - self.bev_semantic_decoder = nn.Sequential( - nn.Conv2d(self.cnn_out_shape[1], 128, (1, 1)), - nn.ReLU(inplace=True), - nn.Upsample(scale_factor=2, mode="bilinear", align_corners=False), - nn.Conv2d(128, 64, (3, 3), padding=1), - nn.ReLU(inplace=True), - nn.Upsample( - size=( - self.config.bev_semantics_height // 4, - self.config.bev_semantics_width // 4, - ), - mode="bilinear", - align_corners=False, - ), - nn.Conv2d(64, 32, (3, 3), padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(32, 16, (3, 3), padding=1), - nn.ReLU(inplace=True), - nn.Conv2d(16, self.config.obs_num_channels + 1, kernel_size=(1, 1)), # + 1 = Background class - nn.Upsample( - size=( - self.config.bev_semantics_height, - self.config.bev_semantics_width, - ), - mode="bilinear", - align_corners=False, - ), - ) - - self.measurement_decoder = nn.Linear(states_neurons, self.config.obs_num_measurements) - - def forward(self, features): - features = self.linear_decoder(features) - features_cnn = features[:, : self.cnn_n_flatten] - features_measurements = features[:, self.cnn_n_flatten :] - features_cnn = features_cnn.view(-1, self.cnn_out_shape[1], self.cnn_out_shape[2], self.cnn_out_shape[3]) - - pred_semantic = self.bev_semantic_decoder(features_cnn) - pred_measurement = self.measurement_decoder(features_measurements) - - return pred_semantic, pred_measurement - - -class PPOPolicy(nn.Module): - """ - Neural network policy designed for driving and training with the PPO algorithm. - """ - - def __init__( - self, - observation_space: gym.spaces.Space, - action_space: gym.spaces.Space, - policy_head_arch=(256, 256), - value_head_arch=(256, 256), - states_neurons=(256, 256), - config: Optional[GlobalConfig] = None, - ): - - super().__init__() - self.observation_space = observation_space - self.action_space = action_space - self.config: GlobalConfig = config - - self.features_extractor = XtMaCNN(observation_space, config=config, states_neurons=states_neurons) - - if self.config.use_lstm: - self.lstm = nn.LSTM( - config.features_dim, - config.features_dim, - num_layers=config.num_lstm_layers, - ) - for name, param in self.lstm.named_parameters(): - if "bias" in name: - nn.init.constant_(param, 0) - elif "weight" in name: - nn.init.orthogonal_(param, 1.0) - - if self.config.use_world_model_loss: - self.feature_decoder = WorldModelDecoder( - self.features_extractor.cnn_out_shape, - self.features_extractor.n_flatten, - self.features_extractor.states_neurons, - self.features_extractor.features_dim, - self.config, - ) - - if self.config.distribution == "beta": - self.action_dist = BetaDistribution(int(np.prod(action_space.shape))) - elif self.config.distribution == "normal": - # Hyperparameters are from roach - self.action_dist = DiagGaussianDistribution( - int(np.prod(action_space.shape)), - dist_init=self.config.normal_dist_init, - action_dependent_std=self.config.normal_dist_action_dep_std, - ) - elif self.config.distribution == "beta_uni_mix": - self.action_dist = BetaUniformMixtureDistribution( - int(np.prod(action_space.shape)), - uniform_percentage_z=self.config.uniform_percentage_z, - ) - else: - raise ValueError("Distribution selected that is not implemented. Options: beta, normal, beta_uni_mix") - - self.policy_head_arch = list(policy_head_arch) - self.value_head_arch = list(value_head_arch) - self.activation_fn = nn.ReLU - - self.action_space_low = nn.Parameter(torch.from_numpy(self.action_space.low), requires_grad=False) - self.action_space_high = nn.Parameter(torch.from_numpy(self.action_space.high), requires_grad=False) - - self.build() - - def build(self) -> None: - last_layer_dim_pi = self.features_extractor.features_dim - policy_net = [] - for layer_size in self.policy_head_arch: - policy_net.append(nn.Linear(last_layer_dim_pi, layer_size)) - if self.config.use_layer_norm and self.config.use_layer_norm_policy_head: - policy_net.append(nn.LayerNorm(layer_size)) - policy_net.append(self.activation_fn()) - last_layer_dim_pi = layer_size - - self.policy_head = nn.Sequential(*policy_net) - # mu->alpha/mean, sigma->beta/log_std (nn.Module, nn.Parameter) - self.dist_mu, self.dist_sigma = self.action_dist.proba_distribution_net(last_layer_dim_pi) - - if self.config.use_temperature: - # * 2 for a and b assuming beta distribution - self.temperature_layer = nn.Sequential( - nn.Linear(last_layer_dim_pi, self.action_dist.action_dim * 2), - nn.Sigmoid(), - ) - - last_layer_dim_vf = self.features_extractor.features_dim + self.config.num_value_measurements - value_net = [] - for layer_size in self.value_head_arch: - value_net.append(nn.Linear(last_layer_dim_vf, layer_size)) - if self.config.use_layer_norm: - value_net.append(nn.LayerNorm(layer_size)) - value_net.append(self.activation_fn()) - last_layer_dim_vf = layer_size - - if self.config.use_hl_gauss_value_loss: - value_net.append(nn.Linear(last_layer_dim_vf, self.config.hl_gauss_num_classes)) - else: - value_net.append(nn.Linear(last_layer_dim_vf, 1)) - self.value_head = nn.Sequential(*value_net) - - def get_features(self, observations) -> torch.Tensor: - """ - :param bev_semantics: torch.Tensor (num_envs, frame_stack*channel, height, width) - :param measurements: torch.Tensor (num_envs, state_dim) - """ - bev_semantics = observations["bev_semantics"] - measurements = observations["measurements"] - birdview = bev_semantics / 255.0 - features = self.features_extractor(birdview, measurements) - return features - - def get_action_dist_from_features(self, features: torch.Tensor, actions=None): - latent_pi = self.policy_head(features) - mu = self.dist_mu(latent_pi) - sigma = self.dist_sigma(latent_pi) - - if actions is not None and self.config.use_rpo: - # sample again to add stochasticity to the policy, Robust policy optimization https://arxiv.org/abs/2212.07536 - # Due to the requirement of the Beta distribution to have numbers > 0 we add the random number before the - # activation function. We add the random number only to alpha which should have a similar effect of shifting the - # mean as for the originally proposed gaussian distribution. - z = torch.zeros(mu.shape, dtype=torch.float32, device=mu.device).uniform_( - -self.config.rpo_alpha, self.config.rpo_alpha - ) - mu = mu + z - - # We don't need an activation function for the normal distribution because std is predicted in log space. - if self.config.distribution in ("beta", "beta_uni_mix"): - mu = nn.functional.softplus(mu) - sigma = nn.functional.softplus(sigma) - # NOTE adding the nugget to mu only makes sense with the beta distribution. - mu = mu + self.config.beta_min_a_b_value - sigma = sigma + self.config.beta_min_a_b_value - - if self.config.use_temperature: - temperature = self.temperature_layer(latent_pi) - mu_temperature = temperature[:, : self.action_dist.action_dim] - sigma_temperature = temperature[:, self.action_dist.action_dim : self.action_dist.action_dim * 2] - # Put them from [0,1] into range [min, 1] - mu_temperature = (1.0 - self.config.min_temperature) * mu_temperature + self.config.min_temperature - sigma_temperature = (1.0 - self.config.min_temperature) * sigma_temperature + self.config.min_temperature - - mu = mu / mu_temperature - sigma = sigma / sigma_temperature - - return ( - self.action_dist.proba_distribution(mu, sigma), - mu.detach(), - sigma.detach(), - ) - - def lstm_forward(self, features, lstm_state, done): - # LSTM logic - batch_size = lstm_state[0].shape[1] - hidden = features.reshape((-1, batch_size, self.lstm.input_size)) - done = done.reshape((-1, batch_size)) - new_hidden = [] - for h, d in zip(hidden, done): - h, lstm_state = self.lstm( - h.unsqueeze(0), - ( - (1.0 - d).view(1, -1, 1) * lstm_state[0], - (1.0 - d).view(1, -1, 1) * lstm_state[1], - ), - ) - new_hidden += [h] - new_hidden = torch.flatten(torch.cat(new_hidden), 0, 1) - return new_hidden, lstm_state - - def get_value(self, obs_dict: Dict[str, torch.Tensor], lstm_state=None, done=None): - features = self.get_features(obs_dict) - - if self.config.use_lstm: - features, _ = self.lstm_forward(features, lstm_state, done) - - value_features = torch.cat((features, obs_dict["value_measurements"]), dim=1) - values = self.value_head(value_features) - return values - - def forward( - self, - obs_dict: Dict[str, np.ndarray], - actions=None, - deterministic: bool = False, - exploration_suggests=None, - lstm_state=None, - done=None, - ): - """ - actions are expected to be unscaled actions! - """ - features = self.get_features(obs_dict) - - if self.config.use_lstm: - features, lstm_state = self.lstm_forward(features, lstm_state, done) - - pred_sem = pred_measure = None - # Additional condition turns of world model prediction during data collection where it is not used. - if self.config.use_world_model_loss and (actions is not None or deterministic): - pred_sem, pred_measure = self.feature_decoder(features) - - value_features = torch.cat((features, obs_dict["value_measurements"]), dim=1) - values = self.value_head(value_features) - distribution, mu, sigma = self.get_action_dist_from_features(features, actions) - - if actions is None: - actions = distribution.get_actions(deterministic=deterministic) - else: - actions = self.scale_action(actions) - - log_prob = distribution.log_prob(actions) - - actions = self.unscale_action(actions) - - entropy = distribution.entropy().sum(1) - exp_loss = None - - if exploration_suggests is not None: - exp_loss = distribution.exploration_loss(exploration_suggests) - - return ( - actions, - log_prob, - entropy, - values, - exp_loss, - mu, - sigma, - distribution.distribution, - pred_sem, - pred_measure, - lstm_state, - ) - - def scale_action(self, action: torch.Tensor, eps=1e-7) -> torch.Tensor: - # input action \in [a_low, a_high] - # output action \in [d_low+eps, d_high-eps] - d_low, d_high = self.action_dist.low, self.action_dist.high # scalar - - if d_low is not None and d_high is not None: - a_low, a_high = self.action_space_low, self.action_space_high - action = (action - a_low) / (a_high - a_low) * (d_high - d_low) + d_low - action = torch.clamp(action, d_low + eps, d_high - eps) - return action - - def unscale_action(self, action: torch.Tensor) -> torch.Tensor: - # input action \in [d_low, d_high] - # output action \in [a_low+eps, a_high-eps] - d_low, d_high = self.action_dist.low, self.action_dist.high # scalar - - if d_low is not None and d_high is not None: - a_low, a_high = self.action_space_low, self.action_space_high - action = (action - d_low) / (d_high - d_low) * (a_high - a_low) + a_low - return action - - @staticmethod - def init_weights(module: nn.Module, gain: float = 1) -> None: - """ - Orthogonal initialization (used in PPO and A2C) - """ - if isinstance(module, (nn.Linear, nn.Conv2d)): - nn.init.orthogonal_(module.weight, gain=gain) - if module.bias is not None: - module.bias.data.fill_(0.0) - - def visualize_model( - self, - distribution, - obs_rendered, - measurements, - control, - value, - value_measurements, - pred_sem, - pred_measure, - ): - - if self.config.distribution in ("beta", "beta_uni_mix"): - device = distribution.concentration1.device - granularity = torch.arange(start=0.0, end=1.0, step=0.001).unsqueeze(1) - granularity = torch.ones((granularity.shape[0], self.action_space.shape[0])) * granularity - granularity = granularity.to(device) - granularity_cpu = deepcopy(granularity).cpu() - elif self.config.distribution == "normal": - device = distribution.mean.device - granularity_cpu = torch.arange(start=0.0, end=1.0, step=0.001).unsqueeze(1) - granularity = torch.arange(start=-1.0, end=1.0, step=0.002).unsqueeze(1) - granularity = torch.ones((granularity.shape[0], self.action_space.shape[0])) * granularity - granularity = granularity.to(device) - - if self.config.distribution == "beta_uni_mix": - uniform_pdf = torch.ones_like(granularity, device=device, requires_grad=False) - distribution = ( - self.action_dist.beta_perc * distribution.log_prob(granularity).exp() - + self.action_dist.uniform_perc * uniform_pdf - ) - distribution = distribution.cpu().numpy() - else: - distribution = distribution.log_prob(granularity) - distribution = torch.exp(distribution).cpu().numpy() - # Make a random plot... - width, height, _ = obs_rendered.shape - - action_type = ["acceleration", "steering"] - action_plots = [] - plot_height = height // (self.action_space.shape[0] + 1) - actions = [control[0], control[1]] - - for i in range(self.action_space.shape[0]): - action_plot = np.zeros((plot_height, width, 3), dtype=np.uint8) - cv2.line( - action_plot, - (width // 2, 0), - (width // 2, (plot_height - 1)), - (0, 255, 0), - thickness=2, - ) - cv2.line(action_plot, (0, 0), (0, (plot_height - 1)), (0, 255, 0), thickness=2) - cv2.line( - action_plot, - (width - 1, 0), - (width - 1, (plot_height - 1)), - (0, 255, 0), - thickness=2, - ) - - # Plot actions: - control_pixel = int(((actions[i] + 1.0) / 2.0) * (width - 1)) - cv2.line( - action_plot, - (control_pixel, 0), - (control_pixel, (plot_height - 1)), - (255, 255, 0), - thickness=2, - ) - - for idx, x_value in enumerate(granularity_cpu.numpy()): - x = int(x_value[0] * width) - y_max = 25.0 # Continuous PDFs can be arbitrary high. We clipp after 25. - y_pixel = int(distribution[idx, i] / y_max * (plot_height - 1)) - clipped_pixel = min(int(plot_height - 1), y_pixel) - y = (plot_height - 1) - clipped_pixel # Mirror - action_plot = cv2.circle( - action_plot, - (x, y), - radius=1, - color=(255, 255, 0), - lineType=cv2.LINE_AA, - thickness=-1, - ) - - cv2.putText( - action_plot, - action_type[i], - (0, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.5, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - action_plots.append(action_plot) - - action_plots = np.concatenate(action_plots, axis=0) - measurement_plot = np.zeros((plot_height, width, 3), dtype=np.uint8) - - cv2.putText( - measurement_plot, - f"Last steer: {measurements[0]:.2f}", - (0, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - measurement_plot, - f"Last throt: {measurements[1]:.2f}", - (0, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - measurement_plot, - f"Last break: {measurements[2]:.2f}", - (0, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - - if self.config.use_target_point: - cv2.putText( - measurement_plot, - f"TP: {measurements[8]:.1f} {measurements[9]:.1f}", - (0, 55), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - - # cv2.putText(measurement_plot, f'Acc: {measurements[8]:.1f} {measurements[9]:.1f}', (0, 55), - # cv2.FONT_HERSHEY_SIMPLEX, 0.33, (255, 255, 255), 1, cv2.LINE_AA) - - cv2.putText( - measurement_plot, - f"Gear: {measurements[3]:.2f}", - (width // 2, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - measurement_plot, - f"Speed: {measurements[4]:.1f} {measurements[5]:.1f}", - (width // 2, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - measurement_plot, - f"F. speed: {measurements[6]:.2f}", - (width // 2, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - measurement_plot, - f"Speed lim.: {measurements[7]:.2f}", - (width // 2, 55), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - - action_plots = np.concatenate((measurement_plot, action_plots), axis=0) - - obs_rendered = np.array(obs_rendered).copy() - cv2.putText( - obs_rendered, - f"Steer:{control[1]:.2f}", - (5, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"Throt:{control[0]:.2f}", - (5, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"Brake:{control[0]:.2f}", - (5, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"Value:{value.item():.2f}", - (5, 55), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - - cv2.putText( - obs_rendered, - f"timeout:{value_measurements[0]:.2f}", - (110, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"blocked:{value_measurements[1]:.2f}", - (110, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"route:{value_measurements[2]:.2f}", - (110, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (0, 0, 0), - 1, - cv2.LINE_AA, - ) - - if self.config.use_extra_control_inputs: - cv2.putText( - obs_rendered, - f"wheel: {measurements[8]:.2f}", - (110, 140), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"error: {measurements[9]:.2f}", - (110, 155), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"deriv: {measurements[10]:.2f}", - (110, 170), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - obs_rendered, - f"integ: {measurements[11]:.2f}", - (110, 185), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - - if self.config.use_world_model_loss: - pred_sem = pred_sem.cpu().numpy() - pred_measure = pred_measure.cpu().numpy() - - bev_semantic_indices = np.argmax(pred_sem[0], axis=0) - converter = np.array(self.config.bev_classes_list) - pred_semantic = converter[bev_semantic_indices, ...].astype("uint8") - pred_measure = pred_measure[0] - - cv2.putText( - pred_semantic, - f"Last steer: {pred_measure[0]:.2f}", - (0, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - pred_semantic, - f"Last throt: {pred_measure[1]:.2f}", - (0, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - pred_semantic, - f"Last break: {pred_measure[2]:.2f}", - (0, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - - cv2.putText( - pred_semantic, - f"Gear: {pred_measure[3]:.2f}", - (width // 2, 10), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - pred_semantic, - f"Speed: {pred_measure[4]:.1f} {pred_measure[5]:.1f}", - (width // 2, 25), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - pred_semantic, - f"F. speed: {pred_measure[6]:.2f}", - (width // 2, 40), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - cv2.putText( - pred_semantic, - f"Speed lim.: {pred_measure[7]:.2f}", - (width // 2, 55), - cv2.FONT_HERSHEY_SIMPLEX, - 0.33, - (255, 255, 255), - 1, - cv2.LINE_AA, - ) - return np.concatenate((action_plots, obs_rendered, pred_semantic), axis=1) - - return np.concatenate((action_plots, obs_rendered), axis=1) diff --git a/d123/simulation/gym/training.py b/d123/simulation/gym/training.py deleted file mode 100644 index 0e035f1c..00000000 --- a/d123/simulation/gym/training.py +++ /dev/null @@ -1,1097 +0,0 @@ -""" -NOTE @DanielDauner: -This file needs refactoring. The training loop is specific to the default environment. -I will leave it for the initial code release but hope to find time to fix it. -""" - -import datetime -import gc -import logging -import math -import os -import random -import re -import time -from collections import deque -from pathlib import Path - -import gymnasium as gym -import jsonpickle -import jsonpickle.ext.numpy as jsonpickle_numpy -import numpy as np -import schedulefree -import torch -import wandb -from carl_nuplan.common.logging import suppress_info_logs -from carl_nuplan.planning.script.builders.observation_building_builder import build_observation_builder -from carl_nuplan.planning.script.builders.reward_builder_builder import build_reward_builder -from carl_nuplan.planning.script.builders.scenario_sampler_builder import build_scenario_sampler -from carl_nuplan.planning.script.builders.simulation_building_builder import build_simulation_builder -from carl_nuplan.planning.script.builders.trajectory_building_builder import build_trajectory_builder -from gymnasium.envs.registration import register -from omegaconf import DictConfig, OmegaConf -from pytictoc import TicToc -from tensorboardX import SummaryWriter -from torch import nn, optim -from tqdm import tqdm - -from d123.simulation.gym.policy.ppo.ppo_config import GlobalConfig -from d123.simulation.gym.policy.ppo.ppo_model import PPOPolicy - -jsonpickle_numpy.register_handlers() -jsonpickle.set_encoder_options("json", sort_keys=True, indent=4) - -logger = logging.getLogger(__name__) - -REWARD_LOGGING: bool = True -COMFORT_LOGGING: bool = True - - -def save(model, optimizer, config, folder, model_file, optimizer_file): - model_file = os.path.join(folder, model_file) - torch.save(model.module.state_dict(), model_file) - - if optimizer is not None: - optimizer_file = os.path.join(folder, optimizer_file) - torch.save(optimizer.state_dict(), optimizer_file) - - json_config = jsonpickle.encode(config) - with open( - os.path.join(folder, "config_pickle.json"), - "wt", - encoding="utf-8", - ) as f2: - f2.write(json_config) - - -def make_env(cfg: DictConfig, config: GlobalConfig): - @suppress_info_logs - def thunk(idx: int = 0): - - scenario_sampler = build_scenario_sampler(cfg) - simulation_builder = build_simulation_builder(cfg) - trajectory_builder = build_trajectory_builder(cfg) - observation_builder = build_observation_builder(cfg) - reward_builder = build_reward_builder(cfg) - - env = gym.make( - "EnvironmentWrapper-v0", - scenario_sampler=scenario_sampler, - simulation_builder=simulation_builder, - trajectory_builder=trajectory_builder, - observation_builder=observation_builder, - reward_builder=reward_builder, - terminate_on_failure=cfg.debug, - ) - env = gym.wrappers.RecordEpisodeStatistics(env) - env = gym.wrappers.ClipAction(env) - - if config.normalize_rewards: - env = gym.wrappers.NormalizeReward(env, gamma=config.gamma) - env = gym.wrappers.TransformReward(env, lambda reward: np.clip(reward, -10, 10)) - return env - - return thunk - - -def run_training(cfg: DictConfig) -> None: - - register( - id="EnvironmentWrapper-v0", - entry_point="d123.simulation.gym.environment.environment_wrapper:EnvironmentWrapper", - max_episode_steps=None, - ) - config = GlobalConfig() - - # Torchrun initialization - # Use torchrun for starting because it has proper error handling. Local rank will be set automatically - rank = int(os.environ["RANK"]) # Rank across all processes - local_rank = int(os.environ["LOCAL_RANK"]) # Rank on Node - world_size = int(os.environ["WORLD_SIZE"]) # Number of processes - - logger.info(f"RANK, LOCAL_RANK and WORLD_SIZE in environ: {rank}/{local_rank}/{world_size}") - - local_batch_size = cfg.total_batch_size // world_size - local_bs_per_env = local_batch_size // cfg.num_envs_per_gpu - local_minibatch_size = cfg.total_minibatch_size // world_size - num_minibatches = local_batch_size // local_minibatch_size - - run_name = f"{cfg.experiment_name}__{cfg.seed}" - if rank == 0: - exp_folder = os.path.join(cfg.output_dir, f"{cfg.experiment_name}") - wandb_folder = os.path.join(exp_folder, "wandb") - - Path(exp_folder).mkdir(parents=True, exist_ok=True) - Path(wandb_folder).mkdir(parents=True, exist_ok=True) - - if cfg.track: - - wandb.init( - project=cfg.wandb_project_name, - entity=cfg.wandb_entity, - sync_tensorboard=True, - # config=vars(cfg), # FIXME - name=run_name, - monitor_gym=False, - allow_val_change=True, - save_code=False, - mode="online", - resume="auto", - dir=wandb_folder, - settings=wandb.Settings( - _disable_stats=True, _disable_meta=True - ), # Can get large if we log all the cpu cores. - ) - - writer = SummaryWriter(exp_folder) - writer.add_text( - "hyperparameters", - "|param|value|\n|-|-|\n%s" - % ("\n".join([f"|{key}|{value}|" for key, value in OmegaConf.to_container(cfg, resolve=True).items()])), - ) - - # TRY NOT TO MODIFY: seeding - random.seed(cfg.seed) - np.random.seed(cfg.seed) - torch.manual_seed(cfg.seed) - - logger.info(f"Is cuda available?: {torch.cuda.is_available()}") - if cfg.train_gpu_ids is None: - cfg.train_gpu_ids = list(range(torch.cuda.device_count())) - - # Load the config before overwriting values with current arguments - if cfg.load_file is not None: - load_folder = Path(cfg.load_file).parent.resolve() - with open(os.path.join(load_folder, "config_pickle.json"), "rt", encoding="utf-8") as f: - json_config = f.read() - # 4 ms, might need to move outside the agent. - loaded_config = jsonpickle.decode(json_config) - # Overwrite all properties that were set in the saved config. - config.__dict__.update(loaded_config.__dict__) - - # Configure config. Converts all arguments into config attributes - config.initialize(**OmegaConf.to_container(cfg, resolve=True)) - - if config.use_dd_ppo_preempt: - # Compute unique port within machine based on experiment name and seed. - experiment_id = int(re.findall(r"\d+", cfg.experiment_uid)[0]) - tcp_store_port = ((experiment_id * 1000) % 65534) + int(cfg.seed) + 5000 - # We use gloo, because nccl crashes when using multiple processes per GPU. - num_rollouts_done_store = torch.distributed.TCPStore("127.0.0.1", tcp_store_port, world_size, rank == 0) - torch.distributed.init_process_group( - backend="gloo" if cfg.cpu_collect else "nccl", - store=num_rollouts_done_store, - world_size=world_size, - rank=rank, - timeout=datetime.timedelta(minutes=15), - ) - num_rollouts_done_store.set("num_done", "0") - logger.info(f"Rank:{rank}, TCP_Store_Port: {tcp_store_port}") - else: - torch.distributed.init_process_group( - backend="gloo" if cfg.cpu_collect else "nccl", - init_method="env://", - world_size=world_size, - rank=rank, - timeout=datetime.timedelta(minutes=15), - ) - - device = ( - # torch.device(f"cuda:{cfg.train_gpu_ids[rank]}") - torch.device(f"cuda:{cfg.train_gpu_ids[rank]}") - if torch.cuda.is_available() and cfg.cuda - else torch.device("cpu") - ) - - if torch.cuda.is_available() and cfg.cuda: - torch.cuda.device(device) - - torch.backends.cudnn.deterministic = cfg.torch_deterministic - torch.backends.cuda.matmul.allow_tf32 = config.allow_tf32 - torch.backends.cudnn.benchmark = config.benchmark - torch.backends.cudnn.allow_tf32 = config.allow_tf32 - # torch.set_float32_matmul_precision(config.matmul_precision) - - if rank == 0: - json_config = jsonpickle.encode(config) - with open(os.path.join(exp_folder, "config_pickle.json"), "w") as f2: - f2.write(json_config) - - # NOTE: need to update the config with the argparse arguments before creating the gym environment because the gym env - if cfg.debug: - env = gym.vector.SyncVectorEnv([make_env(cfg=cfg, config=config) for _ in range(cfg.num_envs_per_gpu)]) - else: - env = gym.vector.AsyncVectorEnv( - [make_env(cfg=cfg, config=config) for _ in range(cfg.num_envs_per_gpu)], - copy=False, - ) - assert isinstance(env.single_action_space, gym.spaces.Box), "only continuous action space is supported" - - agent = PPOPolicy(env.single_observation_space, env.single_action_space, config=config).to(device) - - if config.compile_model: - agent = torch.compile(agent) - - start_step = 0 - if cfg.load_file is not None: - load_file_name = os.path.basename(cfg.load_file) - algo_step = re.findall(r"\d+", load_file_name) - if len(algo_step) > 0: - start_step = int(algo_step[0]) + 1 # That step was already finished. - logger.info(f"Start training from step: {start_step}") - agent.load_state_dict(torch.load(cfg.load_file, map_location=device), strict=True) - - agent = torch.nn.parallel.DistributedDataParallel( - agent, - device_ids=None, - output_device=None, - broadcast_buffers=False, - find_unused_parameters=False, - ) - - # If we are resuming training use last learning rate from config. - # If we start a fresh training set the current learning rate according to arguments. - if cfg.load_file is None: - config.current_learning_rate = cfg.learning_rate - - # if rank == 0: - # model_parameters = filter(lambda p: p.requires_grad, agent.parameters()) - # num_params = sum(np.prod(p.size()) for p in model_parameters) - # - # logger.info('Total trainable parameters: ', num_params) - if cfg.schedule_free: - optimizer = schedulefree.AdamWScheduleFree( - agent.parameters(), - lr=config.current_learning_rate, - betas=tuple(cfg.adam_betas), - eps=config.adam_eps, - weight_decay=config.weight_decay, - ) - - elif config.weight_decay > 0.0: - optimizer = optim.AdamW( - agent.parameters(), - lr=config.current_learning_rate, - betas=tuple(cfg.adam_betas), - eps=config.adam_eps, - weight_decay=config.weight_decay, - ) - else: - optimizer = optim.Adam( - agent.parameters(), - betas=tuple(cfg.adam_betas), - lr=config.current_learning_rate, - eps=config.adam_eps, - ) - - # Load optimizer - if cfg.load_file is not None: - optimizer.load_state_dict(torch.load(cfg.load_file.replace("model_", "optimizer_"), map_location=device)) - - if rank == 0: - writer.add_scalar("charts/restart", 1, config.global_step) # Log that a restart happened - - if config.cpu_collect: - device = "cpu" - - # ALGO Logic: Storage setup - obs = { - "bev_semantics": torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_observation_space.spaces["bev_semantics"].shape, - dtype=torch.uint8, - device=device, - ), - "measurements": torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_observation_space.spaces["measurements"].shape, - device=device, - ), - "value_measurements": torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_observation_space.spaces["value_measurements"].shape, - device=device, - ), - } - actions = torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_action_space.shape, - device=device, - ) - old_mus = torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_action_space.shape, - device=device, - ) - old_sigmas = torch.zeros( - (local_bs_per_env, cfg.num_envs_per_gpu) + env.single_action_space.shape, - device=device, - ) - logprobs = torch.zeros((local_bs_per_env, cfg.num_envs_per_gpu), device=device) - rewards = torch.zeros((local_bs_per_env, cfg.num_envs_per_gpu), device=device) - dones = torch.zeros((local_bs_per_env, cfg.num_envs_per_gpu), device=device) - values = torch.zeros((local_bs_per_env, cfg.num_envs_per_gpu), device=device) - exp_n_steps = np.zeros((local_bs_per_env, cfg.num_envs_per_gpu), dtype=np.int32) - exp_suggest = np.zeros((local_bs_per_env, cfg.num_envs_per_gpu), dtype=np.int32) - - # TRY NOT TO MODIFY: start the game - reset_obs = env.reset(seed=[cfg.seed + rank * cfg.num_envs_per_gpu + i for i in range(cfg.num_envs_per_gpu)]) - next_obs = { - "bev_semantics": torch.tensor(reset_obs[0]["bev_semantics"], device=device, dtype=torch.uint8), - "measurements": torch.tensor(reset_obs[0]["measurements"], device=device, dtype=torch.float32), - "value_measurements": torch.tensor(reset_obs[0]["value_measurements"], device=device, dtype=torch.float32), - } - next_done = torch.zeros(cfg.num_envs_per_gpu, device=device) - next_lstm_state = ( - torch.zeros( - config.num_lstm_layers, - cfg.num_envs_per_gpu, - config.features_dim, - device=device, - ), - torch.zeros( - config.num_lstm_layers, - cfg.num_envs_per_gpu, - config.features_dim, - device=device, - ), - ) - num_updates = cfg.total_timesteps // cfg.total_batch_size - local_processed_samples = 0 - start_time = time.time() - agent.train() # TODO change train and eval - - if rank == 0: - avg_returns = deque(maxlen=100) - - # if config.use_hl_gauss_value_loss: - # hl_gauss_bins = rl_u.hl_gaus_bins(config.hl_gauss_vmin, config.hl_gauss_vmax, config.hl_gauss_bucket_size, device) - - for update in tqdm(range(start_step, num_updates), disable=rank != 0): - if cfg.debug: - print("WARNING: DEBUG MODE") - - if config.cpu_collect: - device = "cpu" - agent.to(device) - # Free all data from last interation. - gc.collect() - with torch.no_grad(): - torch.cuda.empty_cache() - gc.disable() - - if config.use_dd_ppo_preempt: - num_rollouts_done_store.set("num_done", "0") - - # Buffers we use to store returns and aggregate them later to rank 0 for logging. - total_returns = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - total_lengths = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - num_total_returns = torch.zeros(world_size, device=device, dtype=torch.int32, requires_grad=False) - - # reward - if REWARD_LOGGING: - reward_progress = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_red_light = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_collision = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_off_road = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_lane_distance = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_too_fast = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_off_route = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_comfort = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - reward_ttc = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - - if COMFORT_LOGGING: - comfort_lon_acceleration = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - comfort_lat_acceleration = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - comfort_jerk_metric = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - comfort_lon_jerk_metric = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - comfort_yaw_accel = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - comfort_yaw_rate = torch.zeros(world_size, device=device, dtype=torch.float32, requires_grad=False) - - initial_lstm_state = (next_lstm_state[0].clone(), next_lstm_state[1].clone()) - - # Annealing the rate if instructed to do so. - if cfg.schedule_free: - optimizer.eval() - else: - if config.lr_schedule == "linear": - frac = 1.0 - (update - 1.0) / num_updates - config.current_learning_rate = frac * config.learning_rate - elif config.lr_schedule == "step": - frac = update / num_updates - lr_multiplier = 1.0 - for change_percentage in config.lr_schedule_step_perc: - if frac > change_percentage: - lr_multiplier *= config.lr_schedule_step_factor - config.current_learning_rate = lr_multiplier * config.learning_rate - elif config.lr_schedule == "cosine": - frac = update / num_updates - config.current_learning_rate = 0.5 * config.learning_rate * (1 + math.cos(frac * math.pi)) - elif config.lr_schedule == "cosine_restart": - frac = update / (num_updates + 1) # + 1 so it doesn't become 100 % - for idx, frac_restart in enumerate(config.lr_schedule_cosine_restarts): - if frac >= frac_restart: - current_idx = idx - base_frac = config.lr_schedule_cosine_restarts[current_idx] - length_current_interval = ( - config.lr_schedule_cosine_restarts[current_idx + 1] - - config.lr_schedule_cosine_restarts[current_idx] - ) - frac_current_iter = (frac - base_frac) / length_current_interval - config.current_learning_rate = 0.5 * config.learning_rate * (1 + math.cos(frac_current_iter * math.pi)) - - for param_group in optimizer.param_groups: - param_group["lr"] = config.current_learning_rate - - t0 = TicToc() # Data collect - t1 = TicToc() # Forward pass - t2 = TicToc() # Env step - t3 = TicToc() # Pre-processing - t4 = TicToc() # Train inter - t5 = TicToc() # Logging - t0.tic() - inference_times = [] - env_times = [] - for step in range(0, local_bs_per_env): - config.global_step += 1 * world_size * cfg.num_envs_per_gpu - local_processed_samples += 1 * world_size * cfg.num_envs_per_gpu - - obs["bev_semantics"][step] = next_obs["bev_semantics"] - obs["measurements"][step] = next_obs["measurements"] - obs["value_measurements"][step] = next_obs["value_measurements"] - dones[step] = next_done - - # ALGO LOGIC: action logic - with torch.no_grad(): - t1.tic() - ( - action, - logprob, - _, - value, - _, - mu, - sigma, - _, - _, - _, - next_lstm_state, - ) = agent.forward(next_obs, lstm_state=next_lstm_state, done=next_done) - if config.use_hl_gauss_value_loss: - # value_pdf = F.softmax(value, dim=1) - # value = torch.sum(value_pdf * hl_gauss_bins.unsqueeze(0), dim=1) - pass - inference_times.append(t1.tocvalue()) - values[step] = value.flatten() - actions[step] = action - logprobs[step] = logprob - old_mus[step] = mu - old_sigmas[step] = sigma - - # TRY NOT TO MODIFY: execute the game and log data. - t2.tic() - next_obs, reward, termination, truncation, info = env.step(action.cpu().numpy()) - env_times.append(t2.tocvalue()) - - if rank == 0: - if "timing" in info.keys(): - if info["timing"][0] is not None: - for key_, value_ in info["timing"][0].items(): - tab_ = "time_step" if "step" in key_ else "time_reset" - writer.add_scalar(f"{tab_}/{key_}", np.mean(value_), config.global_step) - - done = np.logical_or(termination, truncation) # Not treated separately in original PPO - rewards[step] = torch.tensor(reward, device=device, dtype=torch.float32) - next_done = torch.tensor(done, device=device, dtype=torch.float32) - next_obs = { - "bev_semantics": torch.tensor(next_obs["bev_semantics"], device=device, dtype=torch.uint8), - "measurements": torch.tensor(next_obs["measurements"], device=device, dtype=torch.float32), - "value_measurements": torch.tensor(next_obs["value_measurements"], device=device, dtype=torch.float32), - } - - if "final_info" in info.keys(): - - for idx, single_info in enumerate(info["final_info"]): - - if config.use_exploration_suggest: - # Exploration loss - exp_n_steps[step, idx] = single_info["n_steps"] - exp_suggest[step, idx] = single_info["suggest"] - - # Sum up total returns and how often the env was reset during this iteration. - if single_info is not None: - if "episode" in single_info.keys(): - print( - f"rank: {rank}, config.global_step={config.global_step}, episodic_return={single_info['episode']['r']}" - ) - total_returns[rank] += single_info["episode"]["r"].item() - total_lengths[rank] += single_info["episode"]["l"].item() - num_total_returns[rank] += 1 - - if REWARD_LOGGING and "reward" in single_info.keys(): - reward_progress[rank] += single_info["reward"]["reward_progress"] - reward_red_light[rank] += single_info["reward"]["reward_red_light"] - reward_collision[rank] += single_info["reward"]["reward_collision"] - reward_off_road[rank] += single_info["reward"]["reward_off_road"] - reward_lane_distance[rank] += single_info["reward"]["reward_lane_distance"] - reward_too_fast[rank] += single_info["reward"]["reward_too_fast"] - reward_off_route[rank] += single_info["reward"]["reward_off_route"] - reward_comfort[rank] += single_info["reward"]["reward_comfort"] - reward_ttc[rank] += single_info["reward"]["reward_ttc"] - - if COMFORT_LOGGING and "comfort" in single_info.keys(): - comfort_lon_acceleration[rank] += single_info["comfort"]["comfort_lon_acceleration"] - comfort_lat_acceleration[rank] += single_info["comfort"]["comfort_lat_acceleration"] - comfort_jerk_metric[rank] += single_info["comfort"]["comfort_jerk_metric"] - comfort_lon_jerk_metric[rank] += single_info["comfort"]["comfort_lon_jerk_metric"] - comfort_yaw_accel[rank] += single_info["comfort"]["comfort_yaw_accel"] - comfort_yaw_rate[rank] += single_info["comfort"]["comfort_yaw_rate"] - - if config.use_dd_ppo_preempt: - num_done = int(num_rollouts_done_store.get("num_done")) - min_steps = int(config.dd_ppo_min_perc * local_bs_per_env) - if (num_done / world_size) > config.dd_ppo_preempt_threshold and step > min_steps: - logger.info(f"Rank:{rank}, Preempt at step: {step}, Num done: {num_done}") - break # End data collection early the other workers are finished. - - t0.toc(msg=f"Rank:{rank}, Data collection.") - print(f"Rank:{rank}, Avg forward time {sum(inference_times)}") - print(f"Rank:{rank}, Avg env time {sum(env_times)}") - t3.tic() - - if config.use_dd_ppo_preempt: - num_rollouts_done_store.add("num_done", 1) - - # In case of a dd-ppo preempt this can be smaller than local batch size - num_collected_steps = step + 1 - - # bootstrap value if not done - with torch.no_grad(): - # if config.use_hl_gauss_value_loss: - # next_value = agent.module.get_value(next_obs, next_lstm_state, next_done) - # value_pdf = F.softmax(next_value, dim=1) - # next_value = torch.sum(value_pdf * hl_gauss_bins.unsqueeze(0), dim=1) - if config.use_hl_gauss_value_loss: - False - else: - next_value = agent.module.get_value(next_obs, next_lstm_state, next_done).squeeze(1) - if cfg.gae: - advantages = torch.zeros_like(rewards, device=device) - lastgaelam = 0.0 - for t in reversed(range(num_collected_steps)): - if t == local_bs_per_env - 1: - nextnonterminal = 1.0 - next_done - nextvalues = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - nextvalues = values[t + 1] - delta = rewards[t] + cfg.gamma * nextvalues * nextnonterminal - values[t] - advantages[t] = lastgaelam = delta + cfg.gamma * cfg.gae_lambda * nextnonterminal * lastgaelam - returns = advantages + values - else: - returns = torch.zeros_like(rewards, device=device) - for t in reversed(range(num_collected_steps)): - if t == local_bs_per_env - 1: - nextnonterminal = 1.0 - next_done - next_return = next_value - else: - nextnonterminal = 1.0 - dones[t + 1] - next_return = returns[t + 1] - returns[t] = rewards[t] + cfg.gamma * nextnonterminal * next_return - advantages = returns - values - - if config.cpu_collect: - device = ( - torch.device(f"cuda:{cfg.train_gpu_ids}") - if torch.cuda.is_available() and cfg.cuda - else torch.device("cpu") - ) - agent.to(device) - - b_exploration_suggests = np.zeros((num_collected_steps, cfg.num_envs_per_gpu), dtype=np.int32) - if config.use_exploration_suggest: - for step in range(num_collected_steps): - n_steps = exp_n_steps[step][0] # TODO - if n_steps > 0: - n_start = max(0, step - n_steps) - b_exploration_suggests[n_start:step] = exp_suggest[step] - - if config.use_world_model_loss: # TODO - b_wm_added_index = np.zeros(num_collected_steps, dtype=np.int32) - b_world_model_mask = torch.zeros( - num_collected_steps, - dtype=torch.float32, - device=device, - requires_grad=False, - ) - invalid_frames = config.num_future_prediction - for step in reversed(range(num_collected_steps)): - if invalid_frames <= 0: - b_wm_added_index[step] = config.num_future_prediction - b_world_model_mask[step] = 1.0 - else: - invalid_frames -= 1 - - if dones[step]: - invalid_frames = 5 - - b_obs = { - "bev_semantics": obs["bev_semantics"][:num_collected_steps].reshape( - (-1,) + env.single_observation_space.spaces["bev_semantics"].shape - ), - "measurements": obs["measurements"][:num_collected_steps].reshape( - (-1,) + env.single_observation_space.spaces["measurements"].shape - ), - "value_measurements": obs["value_measurements"][:num_collected_steps].reshape( - (-1,) + env.single_observation_space.spaces["value_measurements"].shape - ), - } - b_logprobs = logprobs[:num_collected_steps].reshape(-1) - b_actions = actions[:num_collected_steps].reshape((-1,) + env.single_action_space.shape) - b_dones = dones[:num_collected_steps].reshape(-1) # TODO check if pre-emption trick causes problems with LSTM. - b_advantages = advantages[:num_collected_steps].reshape(-1) - b_returns = returns[:num_collected_steps].reshape(-1) - b_values = values[:num_collected_steps].reshape(-1) - b_old_mus = old_mus[:num_collected_steps].reshape((-1,) + env.single_action_space.shape) - b_old_sigmas = old_sigmas[:num_collected_steps].reshape((-1,) + env.single_action_space.shape) - - # When the data was collected on the CPU, move it to GPU before training - if config.cpu_collect: - b_obs["bev_semantics"] = b_obs["bev_semantics"].to(device) - b_obs["measurements"] = b_obs["measurements"].to(device) - b_obs["value_measurements"] = b_obs["value_measurements"].to(device) - b_logprobs = b_logprobs.to(device) - b_actions = b_actions.to(device) - b_dones = b_dones.to(device) - b_advantages = b_advantages.to(device) - b_returns = b_returns.to(device) - b_values = b_values.to(device) - b_old_mus = b_old_mus.to(device) - b_old_sigmas = b_old_sigmas.to(device) - - # Aggregate returns to GPU 0 for logging and storing the best model. - # Gloo doesn't support AVG, so we implement it via sum / num returns - torch.distributed.all_reduce(total_returns, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(total_lengths, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(num_total_returns, op=torch.distributed.ReduceOp.SUM) - - # reward - if REWARD_LOGGING: - torch.distributed.all_reduce(reward_progress, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_red_light, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_collision, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_off_road, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_lane_distance, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_too_fast, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_off_route, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_comfort, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(reward_ttc, op=torch.distributed.ReduceOp.SUM) - - if COMFORT_LOGGING: - torch.distributed.all_reduce(comfort_lon_acceleration, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(comfort_lat_acceleration, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(comfort_jerk_metric, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(comfort_lon_jerk_metric, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(comfort_yaw_accel, op=torch.distributed.ReduceOp.SUM) - torch.distributed.all_reduce(comfort_yaw_rate, op=torch.distributed.ReduceOp.SUM) - - if rank == 0: - num_total_returns_all_processes = torch.sum(num_total_returns) - # Only can log return if there was any episode that finished - if num_total_returns_all_processes > 0: - total_returns_all_processes = torch.sum(total_returns) - total_lengths_all_processes = torch.sum(total_lengths) - avg_return = total_returns_all_processes / num_total_returns_all_processes - avg_return = avg_return.item() - avg_length = total_lengths_all_processes / num_total_returns_all_processes - avg_length = avg_length.item() - - avg_returns.append(avg_return) - windowed_avg_return = sum(avg_returns) / len(avg_returns) - - writer.add_scalar("charts/episodic_return", avg_return, config.global_step) - writer.add_scalar( - "charts/windowed_avg_return", - windowed_avg_return, - config.global_step, - ) - writer.add_scalar("charts/episodic_length", avg_length, config.global_step) - - if REWARD_LOGGING: - reward_progress = torch.sum(reward_progress) / num_total_returns_all_processes - reward_red_light = torch.sum(reward_red_light) / num_total_returns_all_processes - reward_collision = torch.sum(reward_collision) / num_total_returns_all_processes - reward_off_road = torch.sum(reward_off_road) / num_total_returns_all_processes - reward_lane_distance = torch.sum(reward_lane_distance) / num_total_returns_all_processes - reward_too_fast = torch.sum(reward_too_fast) / num_total_returns_all_processes - reward_off_route = torch.sum(reward_off_route) / num_total_returns_all_processes - reward_comfort = torch.sum(reward_comfort) / num_total_returns_all_processes - reward_ttc = torch.sum(reward_ttc) / num_total_returns_all_processes - - writer.add_scalar("reward/progress", reward_progress.item(), config.global_step) - writer.add_scalar("reward/red_light", reward_red_light.item(), config.global_step) - writer.add_scalar("reward/collision", reward_collision.item(), config.global_step) - writer.add_scalar("reward/off_road", reward_off_road.item(), config.global_step) - writer.add_scalar( - "reward/lane_distance", - reward_lane_distance.item(), - config.global_step, - ) - writer.add_scalar("reward/too_fast", reward_too_fast.item(), config.global_step) - writer.add_scalar("reward/off_route", reward_off_route.item(), config.global_step) - writer.add_scalar("reward/comfort", reward_comfort.item(), config.global_step) - writer.add_scalar("reward/ttc", reward_ttc.item(), config.global_step) - - if COMFORT_LOGGING: - comfort_lon_acceleration = torch.sum(comfort_lon_acceleration) / num_total_returns_all_processes - comfort_lat_acceleration = torch.sum(comfort_lat_acceleration) / num_total_returns_all_processes - comfort_jerk_metric = torch.sum(comfort_jerk_metric) / num_total_returns_all_processes - comfort_lon_jerk_metric = torch.sum(comfort_lon_jerk_metric) / num_total_returns_all_processes - comfort_yaw_accel = torch.sum(comfort_yaw_accel) / num_total_returns_all_processes - comfort_yaw_rate = torch.sum(comfort_yaw_rate) / num_total_returns_all_processes - - writer.add_scalar( - "comfort/comfort_lon_acceleration", - comfort_lon_acceleration.item(), - config.global_step, - ) - writer.add_scalar( - "comfort/comfort_lat_acceleration", - comfort_lat_acceleration.item(), - config.global_step, - ) - writer.add_scalar( - "comfort/comfort_jerk_metric", - comfort_jerk_metric.item(), - config.global_step, - ) - writer.add_scalar( - "comfort/comfort_lon_jerk_metric", - comfort_lon_jerk_metric.item(), - config.global_step, - ) - writer.add_scalar( - "comfort/comfort_yaw_accel", - comfort_yaw_accel.item(), - config.global_step, - ) - writer.add_scalar( - "comfort/comfort_yaw_rate", - comfort_yaw_rate.item(), - config.global_step, - ) - - if windowed_avg_return >= config.max_training_score: - config.max_training_score = windowed_avg_return - # Same model could reach multiple high scores - if config.best_iteration != update: - save(agent, None, config, exp_folder, "model_best.pth", None) - config.best_iteration = update - - # Optimizing the policy and value network - if config.use_lstm: - assert cfg.num_envs_per_gpu % num_minibatches == 0 - assert not config.use_dd_ppo_preempt - assert not config.use_world_model_loss - - envsperbatch = cfg.num_envs_per_gpu // num_minibatches - envinds = np.arange(cfg.num_envs_per_gpu) - flatinds = np.arange(local_batch_size).reshape(local_bs_per_env, cfg.num_envs_per_gpu) - - b_inds_original = np.arange(num_collected_steps * cfg.num_envs_per_gpu) - - if config.use_dd_ppo_preempt: - b_inds_original = np.resize(b_inds_original, (local_batch_size,)) - - # if config.use_world_model_loss: - # b_inds_world_model_original = b_inds_original + b_wm_added_index[b_inds_original] # TODO - - clipfracs = [] - - t3.toc(msg=f"Rank:{rank}, Data pre-processing.") - t4.tic() - - if cfg.schedule_free: - optimizer.train() - - for latest_epoch in range(cfg.update_epochs): - approx_kl_divs = [] - if config.use_lstm: - np.random.shuffle(envinds) - else: - p = np.random.permutation(len(b_inds_original)) - b_inds = b_inds_original[p] - # if config.use_world_model_loss: - # b_inds_world_model = b_inds_world_model_original[p] - - total_steps = local_batch_size - step_size = local_minibatch_size - if config.use_lstm: - total_steps = cfg.num_envs_per_gpu - step_size = envsperbatch - - for start in range(0, total_steps, step_size): - if config.use_lstm: - end = start + envsperbatch - mbenvinds = envinds[start:end] - lstm_state = ( - initial_lstm_state[0][:, mbenvinds], - initial_lstm_state[1][:, mbenvinds], - ) - mb_inds = flatinds[:, mbenvinds].ravel() # be really careful about the index - else: - end = start + local_minibatch_size - mb_inds = b_inds[start:end] - lstm_state = None - - # if config.use_world_model_loss: - # mb_inds_world_model = b_inds_world_model[start:end] - if config.use_exploration_suggest: - b_exploration_suggests_sampled = b_exploration_suggests[mb_inds] - else: - b_exploration_suggests_sampled = None - b_obs_sampled = { - "bev_semantics": b_obs["bev_semantics"][mb_inds], - "measurements": b_obs["measurements"][mb_inds], - "value_measurements": b_obs["value_measurements"][mb_inds], - } - # Don't need action, so we don't unscale - ( - _, - newlogprob, - entropy, - newvalue, - exploration_loss, - _, - _, - distribution, - pred_sem, - pred_measure, - _, - ) = agent.forward( - b_obs_sampled, - actions=b_actions[mb_inds], - exploration_suggests=b_exploration_suggests_sampled, - lstm_state=lstm_state, - done=b_dones[mb_inds], - ) - logratio = newlogprob - b_logprobs[mb_inds] - ratio = logratio.exp() - - mb_advantages = b_advantages[mb_inds] - if cfg.norm_adv: - mb_advantages = (mb_advantages - mb_advantages.mean()) / (mb_advantages.std() + 1e-8) - - # Policy loss - pg_loss1 = -mb_advantages * ratio - pg_loss2 = -mb_advantages * torch.clamp(ratio, 1 - cfg.clip_coef, 1 + cfg.clip_coef) - pg_loss = torch.max(pg_loss1, pg_loss2).mean() - - # Value loss - if cfg.clip_vloss: - # Value clipping is not implemented with HL_Gauss loss - assert config.use_hl_gauss_value_loss is False - newvalue = newvalue.view(-1) - v_clipped = b_values[mb_inds] + torch.clamp( - newvalue - b_values[mb_inds], - -cfg.clip_coef, - cfg.clip_coef, - ) - v_loss_clipped = (v_clipped - b_returns[mb_inds]) ** 2 - v_loss_unclipped = (newvalue - b_returns[mb_inds]) ** 2 - v_loss_max = torch.max(v_loss_unclipped, v_loss_clipped) - v_loss = 0.5 * v_loss_max.mean() - else: - # if config.use_hl_gauss_value_loss: - # target_pdf = rl_u.hl_gaus_pdf(b_returns[mb_inds], config.hl_gauss_std, config.hl_gauss_vmin, - # config.hl_gauss_vmax, config.hl_gauss_bucket_size) - # v_loss = F.cross_entropy(newvalue, target_pdf) - if config.use_hl_gauss_value_loss: - pass - else: - newvalue = newvalue.view(-1) - v_loss = 0.5 * ((newvalue - b_returns[mb_inds]) ** 2).mean() - - # if config.use_world_model_loss: - # b_mask_sampled = b_world_model_mask[mb_inds_world_model] - # total_valid_items = torch.sum(b_mask_sampled) - # semantic_labels = image_to_class_labels(b_obs['bev_semantics'][mb_inds_world_model]) - # semantic_loss = F.cross_entropy(pred_sem, semantic_labels, reduction='none') - # semantic_loss = torch.mean(semantic_loss, dim=(1, 2)) * b_mask_sampled - # semantic_loss = torch.sum(semantic_loss) / total_valid_items - # measure_loss = F.l1_loss(pred_measure, b_obs['measurements'][mb_inds_world_model], reduction='none') - # measure_loss = torch.mean(measure_loss, dim=1) * b_mask_sampled - # measure_loss = torch.sum(measure_loss) / total_valid_items - # world_model_loss = 0.5 * semantic_loss + 0.5 * measure_loss - - entropy_loss = entropy.mean() - loss = pg_loss - config.ent_coef * entropy_loss + v_loss * config.vf_coef - if config.use_exploration_suggest: - loss = loss + cfg.expl_coef * exploration_loss - - # if config.use_world_model_loss: - # loss = loss + config.world_model_loss_weight * world_model_loss - - optimizer.zero_grad() - loss.backward() - nn.utils.clip_grad_norm_(agent.parameters(), cfg.max_grad_norm) - optimizer.step() - - old_mu_sampled = b_old_mus[mb_inds] - old_sigmas_sampled = b_old_sigmas[mb_inds] - with torch.no_grad(): - # calculate approx_kl http://joschu.net/blog/kl-approx.html - old_approx_kl = (-logratio).mean() - # approx_kl = ((ratio - 1) - logratio).mean() - - # We compute approx KL according to roach - old_distribution = agent.module.action_dist.proba_distribution(old_mu_sampled, old_sigmas_sampled) - kl_div = torch.distributions.kl_divergence(old_distribution.distribution, distribution) - approx_kl_divs.append(kl_div.mean()) - - clipfracs += [((ratio - 1.0).abs() > cfg.clip_coef).float().mean()] - - approx_kl = torch.mean(torch.stack(approx_kl_divs)) - # Gloo doesn't support AVG, so we implement it via sum / world size - torch.distributed.all_reduce(approx_kl, op=torch.distributed.ReduceOp.SUM) - approx_kl = approx_kl / world_size - if cfg.target_kl is not None and config.lr_schedule == "kl": - if approx_kl > cfg.target_kl: - if config.lr_schedule_step is not None: - config.kl_early_stop += 1 - if config.kl_early_stop >= config.lr_schedule_step: - config.current_learning_rate *= 0.5 - config.kl_early_stop = 0 - - break - - if cfg.schedule_free: - optimizer.eval() - - del b_obs # Remove large array - t4.toc(msg=f"Rank:{rank}, Training.") - t5.tic() - - config.latest_iteration = update - # Avg value to log over all Environments - # Sync with 3 envs takes 4 ms. - # Gloo doesn't support AVG, so we implement it via sum / world size - torch.distributed.all_reduce(v_loss, op=torch.distributed.ReduceOp.SUM) - v_loss = v_loss / world_size - - torch.distributed.all_reduce(pg_loss, op=torch.distributed.ReduceOp.SUM) - pg_loss = pg_loss / world_size - - torch.distributed.all_reduce(entropy_loss, op=torch.distributed.ReduceOp.SUM) - entropy_loss = entropy_loss / world_size - - if config.use_exploration_suggest: - torch.distributed.all_reduce(exploration_loss, op=torch.distributed.ReduceOp.SUM) - exploration_loss = exploration_loss / world_size - - # if config.use_world_model_loss: - # torch.distributed.all_reduce(world_model_loss, op=torch.distributed.ReduceOp.SUM) - # world_model_loss = world_model_loss / world_size - - torch.distributed.all_reduce(old_approx_kl, op=torch.distributed.ReduceOp.SUM) - old_approx_kl = old_approx_kl / world_size - - torch.distributed.all_reduce(approx_kl, op=torch.distributed.ReduceOp.SUM) - approx_kl = approx_kl / world_size - - b_values = b_values[b_inds_original] - torch.distributed.all_reduce(b_values, op=torch.distributed.ReduceOp.SUM) - b_values = b_values / world_size - - b_returns = b_returns[b_inds_original] - torch.distributed.all_reduce(b_returns, op=torch.distributed.ReduceOp.SUM) - b_returns = b_returns / world_size - - b_advantages = b_advantages[b_inds_original] - torch.distributed.all_reduce(b_advantages, op=torch.distributed.ReduceOp.SUM) - b_advantages = b_advantages / world_size - - clipfracs = torch.mean(torch.stack(clipfracs)) - torch.distributed.all_reduce(clipfracs, op=torch.distributed.ReduceOp.SUM) - clipfracs = clipfracs / world_size - - if rank == 0: - save( - agent, - optimizer, - config, - exp_folder, - f"model_latest_{update:09d}.pth", - f"optimizer_latest_{update:09d}.pth", - ) - frac = update / num_updates - if config.current_eval_interval_idx < len(config.eval_intervals): - if frac >= config.eval_intervals[config.current_eval_interval_idx]: - save( - agent, - None, - config, - exp_folder, - f"model_eval_{update:09d}.pth", - None, - ) - config.current_eval_interval_idx += 1 - - # Cleanup file from last epoch - for file in os.listdir(exp_folder): - if file.startswith("model_latest_") and file.endswith(".pth"): - if file != f"model_latest_{update:09d}.pth": - old_model_file = os.path.join(exp_folder, file) - if os.path.isfile(old_model_file): - os.remove(old_model_file) - if file.startswith("optimizer_latest_") and file.endswith(".pth"): - if file != f"optimizer_latest_{update:09d}.pth": - old_model_file = os.path.join(exp_folder, file) - if os.path.isfile(old_model_file): - os.remove(old_model_file) - - y_pred, y_true = b_values.cpu().numpy(), b_returns.cpu().numpy() - var_y = np.var(y_true) - explained_var = np.nan if var_y == 0 else 1 - np.var(y_true - y_pred) / var_y - - # TRY NOT TO MODIFY: record rewards for plotting purposes - writer.add_scalar( - "charts/learning_rate", - optimizer.param_groups[0]["lr"], - config.global_step, - ) - writer.add_scalar("losses/value_loss", v_loss.item(), config.global_step) - writer.add_scalar("losses/policy_loss", pg_loss.item(), config.global_step) - writer.add_scalar("losses/entropy", entropy_loss.item(), config.global_step) - if config.use_exploration_suggest: - writer.add_scalar("losses/exploration", exploration_loss.item(), config.global_step) - # if config.use_world_model_loss: - # writer.add_scalar("losses/world_model", world_model_loss.item(), config.global_step) - writer.add_scalar("losses/old_approx_kl", old_approx_kl.item(), config.global_step) - writer.add_scalar("losses/approx_kl", approx_kl.item(), config.global_step) - writer.add_scalar("losses/clipfrac", clipfracs.item(), config.global_step) - writer.add_scalar("losses/explained_variance", explained_var, config.global_step) - writer.add_scalar("losses/latest_epoch", latest_epoch, config.global_step) - writer.add_scalar("charts/discounted_returns", b_returns.mean().item(), config.global_step) - writer.add_scalar("charts/advantages", b_advantages.mean().item(), config.global_step) - # Adjusted so it doesn't count the first epoch which is slower than the rest (converges faster) - writer.add_scalar( - "charts/SPS", - int(local_processed_samples / (time.time() - start_time)), - config.global_step, - ) - writer.add_scalar("charts/restart", 0, config.global_step) - - print(f"SPS: {int(local_processed_samples / (time.time() - start_time))}") - - t5.toc(msg=f"Rank:{rank}, Logging") - - env.close() - if rank == 0: - writer.close() - - save( - agent, - optimizer, - config, - exp_folder, - "model_final.pth", - "optimizer_final.pth", - ) - wandb.finish(exit_code=0, quiet=True) - logger.info("Done training.") diff --git a/d123/simulation/history/simulation_history.py b/d123/simulation/history/simulation_history.py deleted file mode 100644 index d40b7dc6..00000000 --- a/d123/simulation/history/simulation_history.py +++ /dev/null @@ -1,95 +0,0 @@ -from __future__ import annotations - -from dataclasses import dataclass -from typing import List, Optional - -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - -# from nuplan.common.actor_state.ego_state import EgoState -# from nuplan.common.actor_state.state_representation import StateSE2 -# from nuplan.common.maps.abstract_map import AbstractMap -# from nuplan.common.maps.maps_datatypes import TrafficLightStatusData -# from nuplan.planning.simulation.observation.observation_type import Observation -# from nuplan.planning.simulation.simulation_time_controller.simulation_iteration import SimulationIteration -# from nuplan.planning.simulation.trajectory.abstract_trajectory import AbstractTrajectory - - -@dataclass(frozen=True) -class Simulation2DHistorySample: - """ - Single SimulationHistory sample point. - """ - - iteration: SimulationIteration - ego_state: EgoStateSE2 - planner_output: AbstractPlannerOutput - detections: DetectionRecording - - -class Simulation2DHistory: - """ - Simulation history including a sequence of simulation states. - """ - - def __init__(self, data: Optional[List[Simulation2DHistorySample]] = None) -> None: - """ - Construct the history - :param map_api: abstract map api for accessing the maps - :param data: A list of simulation data. - """ - - self.data: List[Simulation2DHistorySample] = data if data is not None else list() - self.scene: Optional[AbstractScene] = None - - def add_sample(self, sample: Simulation2DHistorySample) -> None: - """ - Add a sample to history - :param sample: one snapshot of a simulation - """ - self.data.append(sample) - - def last(self) -> Simulation2DHistorySample: - """ - :return: last sample from history, or raise if empty - """ - if not self.data: - raise RuntimeError("Data is empty!") - return self.data[-1] - - def reset(self, scene: AbstractScene) -> None: - """ - Clear the stored data - """ - self.data.clear() - self.scene = scene - - def __len__(self) -> int: - """ - Return the number of history samples as len(). - """ - return len(self.data) - - @property - def extract_ego_state(self) -> List[EgoStateSE2]: - """ - Extract ego states in simulation history. - :return An List of ego_states. - """ - return [sample.ego_state for sample in self.data] - - @property - def interval_seconds(self) -> float: - """ - Return the interval between SimulationHistorySamples. - :return The interval in seconds. - """ - if not self.data or len(self.data) < 1: - raise ValueError("Data is empty!") - elif len(self.data) < 2: - raise ValueError("Can't calculate the interval of a single-iteration simulation.") - - return float(self.data[1].iteration.time_s - self.data[0].iteration.time_s) # float cast is for mypy diff --git a/d123/simulation/history/simulation_history_buffer.py b/d123/simulation/history/simulation_history_buffer.py deleted file mode 100644 index 47a17ac5..00000000 --- a/d123/simulation/history/simulation_history_buffer.py +++ /dev/null @@ -1,194 +0,0 @@ -from __future__ import annotations - -from collections import deque -from typing import Deque, List, Optional, Tuple, Type - -from d123.common.datatypes.recording.abstract_recording import Recording -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene - - -class Simulation2DHistoryBuffer: - """ - This class is used to keep a rolling buffer of a given size. The buffer is a first-in first-out queue. Hence, the - oldest samples in the buffer are continuously replaced as new samples are appended. - """ - - def __init__( - self, - ego_state_buffer: Deque[EgoStateSE2], - recording_buffer: Deque[Recording], - sample_interval: Optional[float] = None, - ): - """ - Constructs a SimulationHistoryBuffer - :param ego_state_buffer: Past ego state trajectory including the state. - at the current time step [t_-N, ..., t_-1, t_0] - :param observations_buffer: Past observations including the observation. - at the current time step [t_-N, ..., t_-1, t_0]. - :param sample_interval: [s] the time interval between each sample, if given - """ - if not ego_state_buffer or not recording_buffer: - raise ValueError("Ego and observation buffers cannot be empty!") - - if len(ego_state_buffer) != len(recording_buffer): - raise ValueError( - "Ego and observations buffer is " - f"not the same length {len(ego_state_buffer) != len(recording_buffer)}!" - ) - - self._ego_state_buffer = ego_state_buffer - self._recording_buffer = recording_buffer - self._sample_interval = sample_interval - - @property - def ego_state_buffer(self) -> Deque[EgoStateSE2]: - """ - :return: current ego state buffer - """ - return self._ego_state_buffer - - @property - def recording_buffer(self) -> Deque[Recording]: - """ - :return: current observation buffer - """ - return self._recording_buffer - - @property - def size(self) -> int: - """ - :return: Size of the buffer. - """ - return len(self.ego_states) - - @property - def duration(self) -> Optional[float]: - """ - :return: [s] Duration of the buffer. - """ - return self.sample_interval * self.size if self.sample_interval else None - - @property - def current_state(self) -> Tuple[EgoStateSE2, Recording]: - """ - :return: current state of AV vehicle and its observations - """ - return self.ego_states[-1], self.recording_buffer[-1] - - @property - def sample_interval(self) -> Optional[float]: - """ - :return: the sample interval - """ - return self._sample_interval - - @sample_interval.setter - def sample_interval(self, sample_interval: float) -> None: - """ - Sets the sample interval of the buffer, raises if the sample interval was not None - :param sample_interval: The sample interval of the buffer - """ - assert self._sample_interval is None, "Can't overwrite a pre-existing sample-interval!" - self._sample_interval = sample_interval - - @property - def ego_states(self) -> List[EgoStateSE2]: - """ - :return: the ego state buffer in increasing temporal order where the last sample is the more recent sample - [t_-N, ..., t_-1, t_0] - """ - return list(self._ego_state_buffer) - - @property - def recordings(self) -> List[Recording]: - """ - :return: the recording buffer in increasing temporal order where the last sample is the more recent sample - [t_-N, ..., t_-1, t_0] - """ - return list(self._recording_buffer) - - def append(self, ego_state: EgoStateSE2, recording: Recording) -> None: - """ - Adds new samples to the buffers - :param ego_state: an ego state - :param recording: a recording - """ - self._ego_state_buffer.append(ego_state) - self._recording_buffer.append(recording) - - def extend(self, ego_states: List[EgoStateSE2], recordings: List[Recording]) -> None: - """ - Adds new samples to the buffers - :param ego_states: an ego states list - :param recordings: a recordings list - """ - if len(ego_states) != len(recordings): - raise ValueError(f"Ego and recordings are not the same length {len(ego_states) != len(recordings)}!") - self._ego_state_buffer.extend(ego_states) - self._recording_buffer.extend(recordings) - - def __len__(self) -> int: - """ - :return: the length of the buffer - @raise AssertionError if the length of each buffers are not the same - """ - return len(self._ego_state_buffer) - - @classmethod - def initialize_from_list( - cls, - buffer_size: int, - ego_states: List[EgoStateSE2], - recordings: List[Recording], - sample_interval: Optional[float] = None, - ) -> Simulation2DHistoryBuffer: - """ - Create history buffer from lists - :param buffer_size: size of buffer - :param ego_states: list of ego states - :param observations: list of observations - :param sample_interval: [s] the time interval between each sample, if given - :return: SimulationHistoryBuffer - """ - ego_state_buffer: Deque[EgoStateSE2] = deque(ego_states[-buffer_size:], maxlen=buffer_size) - recording_buffer: Deque[Recording] = deque(recordings[-buffer_size:], maxlen=buffer_size) - - return cls( - ego_state_buffer=ego_state_buffer, - recording_buffer=recording_buffer, - sample_interval=sample_interval, - ) - - @staticmethod - def initialize_from_scene( - buffer_size: int, scene: AbstractScene, recording_type: Type[Recording] - ) -> Simulation2DHistoryBuffer: - """ - Initializes ego_state_buffer and recording_buffer from scene - :param buffer_size: size of the buffer - :param scene: Simulation scene - :param recording_type: Recording type used for the simulation - """ - - if recording_type == DetectionRecording: - observation_getter = scene.get_detection_recording_at_iteration - # elif recording_type == SensorRecording: - # observation_getter = scenario.get_past_sensors - else: - raise ValueError(f"No matching recording type for {recording_type} for history!") - - history_iterations = [-iteration for iteration in range(1, scene.get_number_of_history_iterations() + 1)] - - past_observation = list(observation_getter(iteration) for iteration in history_iterations) - past_ego_states = list( - scene.get_ego_state_at_iteration(iteration).ego_state_se2 for iteration in history_iterations - ) - - return Simulation2DHistoryBuffer.initialize_from_list( - buffer_size=buffer_size, - ego_states=past_ego_states, - recordings=past_observation, - sample_interval=scene.log_metadata.timestep_seconds, - ) diff --git a/d123/simulation/metrics/sim_agents/histogram_metric.py b/d123/simulation/metrics/sim_agents/histogram_metric.py deleted file mode 100644 index cc4aeaef..00000000 --- a/d123/simulation/metrics/sim_agents/histogram_metric.py +++ /dev/null @@ -1,218 +0,0 @@ -from typing import Dict, List, Optional, Tuple - -import matplotlib.pyplot as plt -import numpy as np -import numpy.typing as npt - - -class HistogramIntersectionMetric: - def __init__(self, min_val: float, max_val: float, n_bins: int, name: str, weight: float = 1.0): - self.min_val = min_val - self.max_val = max_val - self._n_bins = n_bins - self._bin_edges = np.linspace(min_val, max_val, n_bins + 1) - self._weight = weight - self._name = name - - self._aggregate_objects: bool = False - self._independent_timesteps: bool = True - - def _create_histogram(self, data: npt.NDArray[np.float64], normalize: bool = True) -> np.ndarray: - hist, _ = np.histogram(data, bins=self._bin_edges) - - if normalize: - # Normalize to create probability distribution - hist = hist.astype(float) - if hist.sum() > 0: - hist = hist / hist.sum() - - return hist - - def _calculate_intersection(self, dist1: npt.NDArray[np.float64], dist2: npt.NDArray[np.float64]) -> float: - hist1 = self._create_histogram(dist1, normalize=True) - hist2 = self._create_histogram(dist2, normalize=True) - intersection = np.sum(np.minimum(hist1, hist2)) - return intersection - - def _calculate_bhattacharyya(self, dist1: npt.NDArray[np.int_], dist2: npt.NDArray[np.int_]) -> float: - hist1 = self._create_histogram(dist1, normalize=True) - hist2 = self._create_histogram(dist2, normalize=True) - bhattacharyya_coeff = np.sum(np.sqrt(hist1 * hist2)) - return bhattacharyya_coeff - - def compute( - self, dist1: npt.NDArray[np.float64], dist2: npt.NDArray[np.float64], log_mask: npt.NDArray[np.bool_] - ) -> Dict[str, float]: - assert dist1.shape[0] == dist2.shape[0], "Distributions must have the same number of objects" - assert dist1.ndim == 2 - assert dist2.ndim == 2 - assert log_mask.ndim == 2 - - if len(dist1) == 0: - return { - f"{self._name}_intersection": 1.0, - # f"{self._name}_bhattacharyya": 1.0, - f"{self._name}_score": self._weight * 1.0, - } - - intersection = 0.0 - bhattacharyya = 0.0 - - if self._independent_timesteps: - # (n_objects, n_rollouts * n_steps) - for obj_dist1, obj_dist2, obj_mask in zip(dist1, dist2, log_mask): - intersection += self._calculate_intersection(obj_dist1[obj_mask], obj_dist2[obj_mask]) - bhattacharyya += self._calculate_bhattacharyya(obj_dist1[obj_mask], obj_dist2[obj_mask]) - intersection /= dist1.shape[0] # Average intersection over all objects - bhattacharyya /= dist1.shape[0] # Average Bhattacharyya coefficient over all objects - - else: - raise NotImplementedError - - return { - f"{self._name}_intersection": float(intersection), - # f"{self._name}_bhattacharyya": float(bhattacharyya), - f"{self._name}_score": self._weight * float(intersection), - } - - def plot_histograms( - self, - dist1: npt.NDArray[np.float64], - dist2: npt.NDArray[np.float64], - mask: Optional[npt.NDArray[np.bool_]] = None, - labels: Optional[Tuple[str, str]] = None, - title: str = "Histogram Comparison", - ) -> None: - def _apply_mask( - data: npt.NDArray[np.float64], mask: Optional[npt.NDArray[np.bool_]] - ) -> npt.NDArray[np.float64]: - flat_data = [] - for obj_data, obj_mask in zip(data, mask): - if mask is not None: - flat_data.extend(obj_data[obj_mask].tolist()) - else: - flat_data.extend(obj_data.tolist()) - return np.array(flat_data) - - hist1 = self._create_histogram(_apply_mask(dist1, mask), normalize=True) - hist2 = self._create_histogram(_apply_mask(dist2, mask), normalize=True) - - bin_centers = (self._bin_edges[:-1] + self._bin_edges[1:]) / 2 - width = (self.max_val - self.min_val) / self._n_bins - - plt.figure(figsize=(10, 6)) - - if labels is None: - labels = ("Distribution 1", "Distribution 2") - - plt.bar(bin_centers, hist1, width, alpha=0.5, label=labels[0], color="blue") - plt.bar(bin_centers, hist2, width, alpha=0.5, label=labels[1], color="red") - - plt.xlabel("Value") - plt.ylabel("Probability Density") - plt.title(title) - plt.legend() - plt.grid(True, alpha=0.3) - plt.show() - - -class BinaryHistogramIntersectionMetric: - def __init__(self, name: str, weight: float = 1.0): - self._name = name - self._weight = weight - self.aggregate_objects: bool = False - self.independent_timesteps: bool = True - - def _create_histogram(self, data: List[int], normalize: bool = True) -> np.ndarray: - # Binary histogram: bins are [0, 1] - hist = np.zeros(2, dtype=float) - data = np.asarray(data) - hist[0] = np.sum(data == 0) - hist[1] = np.sum(data == 1) - if normalize and hist.sum() > 0: - hist = hist / hist.sum() - return hist - - def _calculate_intersection(self, dist1: npt.NDArray[np.int_], dist2: npt.NDArray[np.int_]) -> float: - hist1 = self._create_histogram(dist1, normalize=True) - hist2 = self._create_histogram(dist2, normalize=True) - intersection = np.sum(np.minimum(hist1, hist2)) - return intersection - - def _calculate_bhattacharyya(self, dist1: npt.NDArray[np.int_], dist2: npt.NDArray[np.int_]) -> float: - hist1 = self._create_histogram(dist1, normalize=True) - hist2 = self._create_histogram(dist2, normalize=True) - bhattacharyya_coeff = np.sum(np.sqrt(hist1 * hist2)) - return bhattacharyya_coeff - - def compute( - self, dist1: npt.NDArray[np.int_], dist2: npt.NDArray[np.int_], log_mask: npt.NDArray[np.bool_] - ) -> Dict[str, float]: - assert dist1.shape[0] == dist2.shape[0], "Distributions must have the same number of objects" - assert dist1.ndim == 2 - assert dist2.ndim == 2 - assert log_mask.ndim == 2 - - if len(dist1) == 0: - return { - f"{self._name}_intersection": 1.0, - # f"{self._name}_bhattacharyya": 1.0, - f"{self._name}_score": self._weight * 1.0, - } - - intersection = 0.0 - bhattacharyya = 0.0 - - if self.independent_timesteps: - for obj_dist1, obj_dist2, obj_mask in zip(dist1, dist2, log_mask): - intersection += self._calculate_intersection(obj_dist1[obj_mask], obj_dist2[obj_mask]) - bhattacharyya += self._calculate_bhattacharyya(obj_dist1[obj_mask], obj_dist2[obj_mask]) - intersection /= dist1.shape[0] - bhattacharyya /= dist1.shape[0] - else: - raise NotImplementedError - - return { - f"{self._name}_intersection": float(intersection), - # f"{self._name}_bhattacharyya": float(bhattacharyya), - f"{self._name}_score": self._weight * float(intersection), - } - - def plot_histograms( - self, - dist1: npt.NDArray[np.int_], - dist2: npt.NDArray[np.int_], - mask: Optional[npt.NDArray[np.bool_]] = None, - labels: Optional[Tuple[str, str]] = None, - title: str = "Binary Histogram Comparison", - ) -> None: - def _apply_mask(data: npt.NDArray[np.int_], mask: Optional[npt.NDArray[np.bool_]]) -> npt.NDArray[np.int_]: - flat_data = [] - for obj_data, obj_mask in zip(data, mask): - if mask is not None: - flat_data.extend(obj_data[obj_mask].tolist()) - else: - flat_data.extend(obj_data.tolist()) - return np.array(flat_data) - - hist1 = self._create_histogram(_apply_mask(dist1, mask), normalize=True) - hist2 = self._create_histogram(_apply_mask(dist2, mask), normalize=True) - - bin_centers = np.array([0, 1]) - width = 0.4 - - plt.figure(figsize=(6, 4)) - - if labels is None: - labels = ("Distribution 1", "Distribution 2") - - plt.bar(bin_centers - width / 2, hist1, width, alpha=0.5, label=labels[0], color="blue") - plt.bar(bin_centers + width / 2, hist2, width, alpha=0.5, label=labels[1], color="red") - - plt.xlabel("Value") - plt.ylabel("Probability") - plt.title(title) - plt.xticks([0, 1]) - plt.legend() - plt.grid(True, alpha=0.3) - plt.show() diff --git a/d123/simulation/metrics/sim_agents/interaction_based.py b/d123/simulation/metrics/sim_agents/interaction_based.py deleted file mode 100644 index 76474d55..00000000 --- a/d123/simulation/metrics/sim_agents/interaction_based.py +++ /dev/null @@ -1,75 +0,0 @@ -from typing import Final, List - -import numpy as np -import numpy.typing as npt - -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index -from d123.common.geometry.bounding_box.utils import bbse2_array_to_polygon_array -from d123.dataset.arrow.conversion import BoxDetectionWrapper - -MAX_OBJECT_DISTANCE: Final[float] = 50.0 - - -def _get_collision_feature( - agent_array: npt.NDArray[np.float64], box_detections_list: List[BoxDetectionWrapper] -) -> npt.NDArray[np.bool]: - """ - Extracts the collision feature from the agent array. - :param agent_array: The agent array containing bounding box information. - :return: A boolean array indicating collisions. - """ - assert agent_array.ndim == 3 - assert agent_array.shape[-1] == len(BoundingBoxSE2Index) - assert agent_array.shape[1] == len(box_detections_list) - n_objects, n_iterations = agent_array.shape[:2] - collision_feature = np.zeros((n_objects, n_iterations), dtype=np.bool_) - - agent_polygon_array = bbse2_array_to_polygon_array(agent_array) - for iteration, box_detections in enumerate(box_detections_list): - occupancy_map = box_detections.occupancy_map - for agent_idx in range(n_objects): - agent_polygon = agent_polygon_array[agent_idx, iteration] - intersecting_tokens = occupancy_map.intersects(agent_polygon) - collision_feature[agent_idx, iteration] = len(intersecting_tokens) > 1 - - return collision_feature - - -def _get_object_distance_feature( - agent_array: npt.NDArray[np.float64], - agents_mask: npt.NDArray[np.bool], - box_detections_list: List[BoxDetectionWrapper], -) -> npt.NDArray[np.float64]: - """ - Extracts the collision feature from the agent array. - :param agent_array: The agent array containing bounding box information. - :return: A boolean array indicating collisions. - """ - assert agent_array.ndim == 3 - assert agent_array.shape[-1] == len(BoundingBoxSE2Index) - assert agent_array.shape[1] == len(box_detections_list) - n_objects, n_iterations = agent_array.shape[:2] - object_distance_feature = np.zeros((n_objects, n_iterations), dtype=np.float64) - - agent_polygon_array = bbse2_array_to_polygon_array(agent_array) - for iteration, box_detections in enumerate(box_detections_list): - if agents_mask[:, iteration].any(): - - occupancy_map = box_detections.occupancy_map - _, distances = occupancy_map.query_nearest( - agent_polygon_array[agents_mask[:, iteration], iteration], - exclusive=True, - return_distance=True, - all_matches=False, - ) - if len(distances) == 0: - distances = np.full((n_objects,), MAX_OBJECT_DISTANCE, dtype=np.float64) - - if len(agent_polygon_array[agents_mask[:, iteration]]) == 1: - distances = min(distances.min(), MAX_OBJECT_DISTANCE) - - object_distance_feature[agents_mask[:, iteration], iteration] = distances - - object_distance_feature = np.clip(object_distance_feature, 0.0, MAX_OBJECT_DISTANCE) - - return object_distance_feature diff --git a/d123/simulation/metrics/sim_agents/kinematics.py b/d123/simulation/metrics/sim_agents/kinematics.py deleted file mode 100644 index fb599db0..00000000 --- a/d123/simulation/metrics/sim_agents/kinematics.py +++ /dev/null @@ -1,165 +0,0 @@ -import numpy as np -import numpy.typing as npt -from scipy.signal import savgol_filter - -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index - -SECONDS_PER_ITERATION = 0.1 - - -def _masked_diff(y: npt.NDArray[np.float64], mask: npt.NDArray[np.bool], axis: int = 1) -> npt.NDArray[np.float64]: - """ - Computes the difference between successive elements of y, applying the mask. - :param y: The input array. - :param mask: A boolean mask indicating valid elements. - :return: An array of differences with the same shape as y. - """ - - diff = np.zeros_like(y, dtype=np.float64) - diff[:, 1:] = np.diff(y, axis=axis) - diff[:, 0] = diff[:, 1] - diff[~mask] = 0.0 - - return diff - - -def _get_linear_speed_from_agents_array( - agents_array: npt.NDArray[np.float64], mask: npt.NDArray[np.bool] -) -> npt.NDArray[np.float64]: - """ - Extracts the linear speed from the agents array. - :param agents_array: The agents array containing bounding box data. - :return: An array of linear speeds. - """ - assert agents_array.ndim == 3 - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - - n_agents, n_iterations = agents_array.shape[:2] - linear_speed = np.zeros((n_agents, n_iterations), dtype=np.float64) - linear_speed[:, 1:] = ( - np.linalg.norm(np.diff(agents_array[:, :, BoundingBoxSE2Index.XY], axis=1), axis=-1) / SECONDS_PER_ITERATION - ) - linear_speed[:, 0] = linear_speed[:, 1] - linear_speed[~mask] = 0.0 - - return linear_speed - - -def _get_linear_acceleration_from_agents_array( - agents_array: npt.NDArray[np.float64], mask: npt.NDArray[np.bool] -) -> npt.NDArray[np.float64]: - """ - Extracts the linear acceleration from the agents array. - :param agents_array: The agents array containing bounding box data. - :return: An array of linear accelerations. - """ - assert agents_array.ndim == 3 - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - - n_agents, n_iterations = agents_array.shape[:2] - linear_acceleration = np.zeros((n_agents, n_iterations), dtype=np.float64) - - linear_speed = _get_linear_speed_from_agents_array(agents_array, mask) - linear_acceleration[:, 1:] = np.diff(linear_speed, axis=1) / SECONDS_PER_ITERATION - linear_acceleration[:, 0] = linear_acceleration[:, 1] - linear_acceleration[~mask] = 0.0 - - return linear_acceleration - - -def _get_yaw_rate_from_agents_array( - agents_array: npt.NDArray[np.float64], mask: npt.NDArray[np.bool] -) -> npt.NDArray[np.float64]: - """ - Extracts the yaw rate from the agents array. - :param agents_array: The agents array containing bounding box data. - :param mask: A boolean mask indicating valid elements. - :return: An array of yaw rates. - """ - assert agents_array.ndim == 3 - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - - n_agents, n_iterations = agents_array.shape[:2] - headings = agents_array[:, :, BoundingBoxSE2Index.YAW] - heading_rate = _phase_unwrap(_masked_diff(headings, mask, axis=1)) / SECONDS_PER_ITERATION - return heading_rate - - -def _get_yaw_acceleration_from_agents_array( - agents_array: npt.NDArray[np.float64], mask: npt.NDArray[np.bool] -) -> npt.NDArray[np.float64]: - assert agents_array.ndim == 3 - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - - n_agents, n_iterations = agents_array.shape[:2] - yaw_rate = _get_yaw_rate_from_agents_array(agents_array, mask) - yaw_acceleration = np.zeros((n_agents, n_iterations), dtype=np.float64) - yaw_acceleration[:, 1:] = np.diff(yaw_rate, axis=1) / SECONDS_PER_ITERATION - yaw_acceleration[:, 0] = yaw_acceleration[:, 1] - yaw_acceleration[~mask] = 0.0 - return yaw_acceleration - - -def _phase_unwrap(headings: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - """ - Returns an array of heading angles equal mod 2 pi to the input heading angles, - and such that the difference between successive output angles is less than or - equal to pi radians in absolute value - :param headings: An array of headings (radians) - :return The phase-unwrapped equivalent headings. - """ - # There are some jumps in the heading (e.g. from -np.pi to +np.pi) which causes approximation of yaw to be very large. - # We want unwrapped[j] = headings[j] - 2*pi*adjustments[j] for some integer-valued adjustments making the absolute value of - # unwrapped[j+1] - unwrapped[j] at most pi: - # -pi <= headings[j+1] - headings[j] - 2*pi*(adjustments[j+1] - adjustments[j]) <= pi - # -1/2 <= (headings[j+1] - headings[j])/(2*pi) - (adjustments[j+1] - adjustments[j]) <= 1/2 - # So adjustments[j+1] - adjustments[j] = round((headings[j+1] - headings[j]) / (2*pi)). - two_pi = 2.0 * np.pi - adjustments = np.zeros_like(headings) - adjustments[..., 1:] = np.cumsum(np.round(np.diff(headings, axis=-1) / two_pi), axis=-1) - unwrapped = headings - two_pi * adjustments - return unwrapped - - -def _approximate_derivatives( - y: npt.NDArray[np.float64], - x: npt.NDArray[np.float64], - window_length: int = 5, - poly_order: int = 2, - deriv_order: int = 1, - axis: int = -1, -) -> npt.NDArray[np.float32]: - """ - Given two equal-length sequences y and x, compute an approximation to the n-th - derivative of some function interpolating the (x, y) data points, and return its - values at the x's. We assume the x's are increasing and equally-spaced. - :param y: The dependent variable (say of length n) - :param x: The independent variable (must have the same length n). Must be strictly - increasing and equally-spaced. - :param window_length: The order (default 5) of the Savitsky-Golay filter used. - (Ignored if the x's are not equally-spaced.) Must be odd and at least 3 - :param poly_order: The degree (default 2) of the filter polynomial used. Must - be less than the window_length - :param deriv_order: The order of derivative to compute (default 1) - :param axis: The axis of the array x along which the filter is to be applied. Default is -1. - :return Derivatives. - """ - window_length = min(window_length, len(x)) - - if not (poly_order < window_length): - raise ValueError(f"{poly_order} < {window_length} does not hold!") - - dx = np.diff(x, axis=-1) - if not (dx > 0).all(): - raise RuntimeError("dx is not monotonically increasing!") - - dx = dx.mean() - derivative: npt.NDArray[np.float32] = savgol_filter( - y, - polyorder=poly_order, - window_length=window_length, - deriv=deriv_order, - delta=dx, - axis=axis, - ) - return derivative diff --git a/d123/simulation/metrics/sim_agents/map_based.py b/d123/simulation/metrics/sim_agents/map_based.py deleted file mode 100644 index dbd9a16d..00000000 --- a/d123/simulation/metrics/sim_agents/map_based.py +++ /dev/null @@ -1,151 +0,0 @@ -from typing import Dict, Final, List - -import numpy as np -import numpy.typing as npt -import shapely - -from d123.common.geometry.base import StateSE2, StateSE2Index -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index -from d123.common.geometry.bounding_box.utils import Corners2DIndex, bbse2_array_to_corners_array -from d123.common.geometry.utils import normalize_angle -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.maps.abstract_map_objects import AbstractLane -from d123.dataset.maps.map_datatypes import MapLayer - -MAX_LANE_CENTER_DISTANCE: Final[float] = 10.0 - - -def _get_offroad_feature( - agents_array: npt.NDArray[np.float64], agents_mask: npt.NDArray[np.bool], map_api: AbstractMap -) -> npt.NDArray[np.bool]: - - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - n_objects, n_iterations = agents_array.shape[:2] - - offroad_feature = np.zeros((n_objects, n_iterations), dtype=np.bool_) - - agent_shapely_corners = shapely.creation.points(bbse2_array_to_corners_array(agents_array)).flatten() - corner_indices = np.arange(n_iterations * n_objects * len(Corners2DIndex)).reshape( - n_objects, n_iterations, len(Corners2DIndex) - ) - - output = map_api.query_object_ids( - agent_shapely_corners, - layers=[ - MapLayer.INTERSECTION, - MapLayer.LANE_GROUP, - MapLayer.CARPARK, - MapLayer.GENERIC_DRIVABLE, - ], - predicate="within", - ) - list_all_corners = [] - for _, object_ids in output.items(): - list_all_corners.extend(list(object_ids)) - set_of_all_corners = set(list_all_corners) - - for object_idx in range(n_objects): - for iteration in range(n_iterations): - if agents_mask[object_idx, iteration]: - corner_indices_ = set(corner_indices[object_idx, iteration]) - offroad_feature[object_idx, iteration] = not corner_indices_.issubset(set_of_all_corners) - - return offroad_feature - - -def _get_road_center_distance_feature( - agents_array: npt.NDArray[np.float64], agents_mask: npt.NDArray[np.bool], map_api: AbstractMap -) -> npt.NDArray[np.float64]: - - lane_dict: Dict[str, AbstractLane] = {} - - def get_lane_by_id(lane_id: str, lane_dict: Dict[str, AbstractLane]) -> AbstractLane: - if lane_id not in lane_dict.keys(): - lane_dict[lane_id] = map_api.get_map_object(lane_id, MapLayer.LANE) - return lane_dict[lane_id] - - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - n_objects, n_iterations = agents_array.shape[:2] - - agent_shapely_centers = shapely.creation.points(agents_array[..., BoundingBoxSE2Index.XY]).flatten() - agent_indices = np.arange(n_iterations * n_objects).reshape(n_objects, n_iterations) - - distances = np.full((n_objects, n_iterations), MAX_LANE_CENTER_DISTANCE, dtype=np.float64) - - nearest_query_output = map_api.query_nearest( - agent_shapely_centers, - layers=[MapLayer.LANE], - max_distance=MAX_LANE_CENTER_DISTANCE, - return_all=True, - return_distance=False, - exclusive=False, - )[MapLayer.LANE] - - for object_idx in range(n_objects): - for iteration in range(n_iterations): - agent_idx = agent_indices[object_idx, iteration] - - if (not agents_mask[object_idx, iteration]) or (agent_idx not in nearest_query_output.keys()): - continue - - lane_ids: List[str] = nearest_query_output[agent_idx] - lanes: List[AbstractLane] = [get_lane_by_id(lane_id, lane_dict) for lane_id in lane_ids] - - if len(lanes) == 1: - select_lane = lanes[0] - centerline = select_lane.centerline.polyline_se2 - projected_se2_array = centerline.interpolate(centerline.project(agent_shapely_centers[agent_idx])).array - - elif len(lanes) > 1: - - projected_se2s_array = np.zeros((len(lanes), len(StateSE2Index)), dtype=np.float64) - for lane_idx, lane in enumerate(lanes): - lane: AbstractLane - centerline = lane.centerline.polyline_se2 - projected_se2s_array[lane_idx] = centerline.interpolate( - centerline.project(agent_shapely_centers[agent_idx]) - ).array - se2_distances = circumference_distance_se2_array( - agents_array[object_idx, iteration, BoundingBoxSE2Index.SE2], - projected_se2s_array, - radius=agents_array[object_idx, iteration, BoundingBoxSE2Index.LENGTH] / 2, - ) - projected_se2_array = projected_se2s_array[np.argmin(se2_distances)] - else: - raise ValueError - - distances[object_idx, iteration] = np.linalg.norm( - agents_array[object_idx, iteration, BoundingBoxSE2Index.XY] - - projected_se2_array[BoundingBoxSE2Index.XY] - ) - - del lane_dict - return distances - - -def circumference_distance_se2(state1: StateSE2, state2: StateSE2, radius: float) -> float: - # TODO: Move this to a more appropriate location for general usage. - # Heuristic for defining distance/similarity between two SE2 states. - # Combines the 2D Euclidean distance with the circumference of the yaw difference. - positional_distance = np.linalg.norm(state1.point_2d.array - state2.point_2d.array) - abs_yaw_difference = np.abs(normalize_angle(state1.yaw - state2.yaw)) - rotation_distance = abs_yaw_difference * radius - return positional_distance + rotation_distance - - -def circumference_distance_se2_array( - state1_se2: npt.NDArray[np.float64], - state2_se2: npt.NDArray[np.float64], - radius: npt.NDArray[np.float64], -) -> npt.NDArray[np.float64]: - # TODO: Move this to a more appropriate location for general usage. - # Heuristic for defining distance/similarity between two SE2 states. - # Combines the 2D Euclidean distance with the circumference of the yaw difference. - positional_distance = np.linalg.norm(state1_se2[..., StateSE2Index.XY] - state2_se2[..., StateSE2Index.XY], axis=-1) - abs_yaw_difference = np.abs( - normalize_angle( - state1_se2[..., StateSE2Index.YAW] - state2_se2[..., StateSE2Index.YAW], - ) - ) - rotation_distance = abs_yaw_difference * radius - return positional_distance + rotation_distance diff --git a/d123/simulation/metrics/sim_agents/sim_agents.py b/d123/simulation/metrics/sim_agents/sim_agents.py deleted file mode 100644 index 94eeed2c..00000000 --- a/d123/simulation/metrics/sim_agents/sim_agents.py +++ /dev/null @@ -1,225 +0,0 @@ -from dataclasses import dataclass -from typing import Dict, List - -import numpy as np -import numpy.typing as npt - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionWrapper, DetectionType -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.metrics.sim_agents.histogram_metric import ( - BinaryHistogramIntersectionMetric, - HistogramIntersectionMetric, -) -from d123.simulation.metrics.sim_agents.interaction_based import _get_collision_feature, _get_object_distance_feature -from d123.simulation.metrics.sim_agents.kinematics import ( - _get_linear_acceleration_from_agents_array, - _get_linear_speed_from_agents_array, - _get_yaw_acceleration_from_agents_array, - _get_yaw_rate_from_agents_array, -) -from d123.simulation.metrics.sim_agents.map_based import _get_offroad_feature, _get_road_center_distance_feature -from d123.simulation.metrics.sim_agents.utils import _get_log_agents_array, _get_rollout_agents_array - - -@dataclass -class SimAgentsData: - - mask: npt.NDArray[np.bool] - - # 1. Kinematics - speed: npt.NDArray[np.float64] - acceleration: npt.NDArray[np.float64] - yaw_rate: npt.NDArray[np.float64] - yaw_acceleration: npt.NDArray[np.float64] - - # 2. Interaction based - collision: npt.NDArray[np.bool] - object_distance: npt.NDArray[np.float64] - - # 3. Map based - offroad: npt.NDArray[np.bool] - center_distance: npt.NDArray[np.float64] - - -def get_sim_agents_metrics(scene: AbstractScene, agent_rollouts: List[BoxDetectionWrapper]) -> Dict[str, float]: - def get_agent_tokens(agent_rollout: List[BoxDetection]) -> List[str]: - return [ - box_detection.metadata.track_token - for box_detection in agent_rollout - if box_detection.metadata.detection_type == DetectionType.VEHICLE - ] - - # TODO: Add ego vehicle state to the metrics - log_rollouts: List[BoxDetectionWrapper] = [] - - for iteration in range(scene.get_number_of_iterations()): - background_detections = scene.get_box_detections_at_iteration(iteration).box_detections - ego_detection = scene.get_ego_state_at_iteration(iteration).box_detection - log_rollouts.append(BoxDetectionWrapper(background_detections + [ego_detection])) - - initial_agent_tokens = get_agent_tokens(agent_rollouts[0]) - log_agents_array, log_agents_mask = _get_log_agents_array(scene, initial_agent_tokens) - agents_array, agents_mask = _get_rollout_agents_array(agent_rollouts, initial_agent_tokens) - - log_agents_data = _extract_sim_agent_data(log_agents_array, log_agents_mask, log_rollouts, scene.map_api) - agents_data = _extract_sim_agent_data(agents_array, agents_mask, agent_rollouts, scene.map_api) - - results: Dict[str, float] = {} - - # 0. Other data - results.update(_collision_rate(log_agents_data, agents_data)) - results.update(_offroad_rate(log_agents_data, agents_data)) - - # 1. Kinematics metrics - # 1.1 Speed - speed_metric = HistogramIntersectionMetric(min_val=0.0, max_val=25.0, n_bins=10, name="speed", weight=0.05) - speed_result = speed_metric.compute(log_agents_data.speed, agents_data.speed, log_agents_data.mask) - results.update(speed_result) - - # 1.2 Acceleration - acceleration_metric = HistogramIntersectionMetric( - min_val=-12.0, max_val=12.0, n_bins=11, name="acceleration", weight=0.05 - ) - acceleration_result = acceleration_metric.compute( - log_agents_data.acceleration, - agents_data.acceleration, - log_agents_data.mask, - ) - results.update(acceleration_result) - - # 1.3 Yaw rate - yaw_rate_metric = HistogramIntersectionMetric( - min_val=-0.628, max_val=0.628, n_bins=11, name="yaw_rate", weight=0.05 - ) - yaw_rate_result = yaw_rate_metric.compute( - log_agents_data.yaw_rate, - agents_data.yaw_rate, - log_agents_data.mask, - ) - results.update(yaw_rate_result) - - # 1.4 Yaw acceleration - yaw_acceleration_metric = HistogramIntersectionMetric( - min_val=-3.14, max_val=3.14, n_bins=11, name="yaw_acceleration", weight=0.05 - ) - yaw_acceleration_result = yaw_acceleration_metric.compute( - log_agents_data.yaw_acceleration, - agents_data.yaw_acceleration, - log_agents_data.mask, - ) - results.update(yaw_acceleration_result) - - # 2. Interaction based - # 2.1 Collision - collision_metric = BinaryHistogramIntersectionMetric(name="collision", weight=0.25) - collision_results = collision_metric.compute( - log_agents_data.collision, - agents_data.collision, - log_agents_data.mask, - ) - results.update(collision_results) - # collision_metric.plot_histograms(logs_collision, agents_collision, log_agents_mask) - - # 2.2 TTC - # TODO: Implement TTC metric - - # 2.3 Object distance - object_distance_metric = HistogramIntersectionMetric( - min_val=0.0, max_val=40.0, n_bins=10, name="object_distance", weight=0.15 - ) - object_distance_results = object_distance_metric.compute( - log_agents_data.object_distance, - agents_data.object_distance, - log_agents_data.mask, - ) - results.update(object_distance_results) - - # 3. Map based - # 3.1 Offroad - offroad_metric = BinaryHistogramIntersectionMetric(name="offroad", weight=0.25) - offroad_results = offroad_metric.compute( - log_agents_data.offroad, - agents_data.offroad, - log_agents_data.mask, - ) - results.update(offroad_results) - - # 3.2 lane center distance - center_distance_metric = HistogramIntersectionMetric( - min_val=0.0, max_val=10.0, n_bins=10, name="center_distance", weight=0.15 - ) - center_distance_results = center_distance_metric.compute( - log_agents_data.center_distance, - agents_data.center_distance, - log_agents_data.mask, - ) - results.update(center_distance_results) - - # 3.3 Traffic light compliance - # TODO: Implement traffic light compliance metric - - results["meta_score"] = sum([score for name, score in results.items() if name.endswith("_score")]) - - return results - - -def _extract_sim_agent_data( - agents_array: npt.NDArray[np.float64], - agents_mask: npt.NDArray[np.bool], - rollout: List[BoxDetectionWrapper], - map_api: AbstractMap, -) -> SimAgentsData: - - assert agents_array.ndim == 3 - assert agents_array.shape[-1] == len(BoundingBoxSE2Index) - assert agents_array.shape[1] == len(rollout) - - # 1. Kinematics - speed = _get_linear_speed_from_agents_array(agents_array, agents_mask) - acceleration = _get_linear_acceleration_from_agents_array(agents_array, agents_mask) - yaw_rate = _get_yaw_rate_from_agents_array(agents_array, agents_mask) - yaw_acceleration = _get_yaw_acceleration_from_agents_array(agents_array, agents_mask) - - # 2. Interaction based - collision = _get_collision_feature(agents_array, rollout) - object_distance = _get_object_distance_feature(agents_array, agents_mask, rollout) - - # 3. Map based - offroad = _get_offroad_feature(agents_array, agents_mask, map_api) - center_distance = _get_road_center_distance_feature(agents_array, agents_mask, map_api) - - return SimAgentsData( - mask=agents_mask, - speed=speed, - acceleration=acceleration, - yaw_rate=yaw_rate, - yaw_acceleration=yaw_acceleration, - collision=collision, - object_distance=object_distance, - offroad=offroad, - center_distance=center_distance, - ) - - -def _collision_rate(log_agents_data: SimAgentsData, agents_data: SimAgentsData) -> Dict[str, float]: - - def _collision_rate(agents_data: SimAgentsData) -> npt.NDArray[np.bool_]: - return np.any(agents_data.collision, where=agents_data.mask, axis=1).mean() - - return { - "log_collision_rate": _collision_rate(log_agents_data), - "agents_collision_rate": _collision_rate(agents_data), - } - - -def _offroad_rate(log_agents_data: SimAgentsData, agents_data: SimAgentsData) -> Dict[str, float]: - - def _offroad_rate(agents_data_: SimAgentsData) -> npt.NDArray[np.bool_]: - return np.any(agents_data_.offroad, where=agents_data_.mask, axis=1).mean() - - return { - "log_offroad_rate": _offroad_rate(log_agents_data), - "agents_offroad_rate": _offroad_rate(agents_data), - } diff --git a/d123/simulation/metrics/sim_agents/utils.py b/d123/simulation/metrics/sim_agents/utils.py deleted file mode 100644 index cf40c5a6..00000000 --- a/d123/simulation/metrics/sim_agents/utils.py +++ /dev/null @@ -1,54 +0,0 @@ -from typing import List, Tuple - -import numpy as np -import numpy.typing as npt - -from d123.common.datatypes.detection.detection import BoxDetectionWrapper -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE2Index -from d123.dataset.scene.abstract_scene import AbstractScene - - -def _get_log_agents_array( - scene: AbstractScene, agent_tokens: List[str] -) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: - - log_agents_array = np.zeros( - (len(agent_tokens), scene.get_number_of_iterations(), len(BoundingBoxSE2Index)), - dtype=np.float64, - ) - log_agents_mask = np.zeros( - (len(agent_tokens), scene.get_number_of_iterations()), - dtype=bool, - ) - - for iteration in range(scene.get_number_of_iterations()): - box_detections = scene.get_box_detections_at_iteration(iteration) - for agent_idx, agent_token in enumerate(agent_tokens): - box_detection = box_detections.get_detection_by_track_token(agent_token) - if box_detection is not None: - log_agents_mask[agent_idx, iteration] = True - log_agents_array[agent_idx, iteration] = box_detection.bounding_box_se2.array - - return log_agents_array, log_agents_mask - - -def _get_rollout_agents_array( - agent_rollouts: List[BoxDetectionWrapper], agent_tokens: List[str] -) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64]]: - rollout_agents_array = np.zeros( - (len(agent_tokens), len(agent_rollouts), len(BoundingBoxSE2Index)), - dtype=np.float64, - ) - rollout_agents_mask = np.zeros( - (len(agent_tokens), len(agent_rollouts)), - dtype=bool, - ) - - for iteration, agent_rollout in enumerate(agent_rollouts): - for agent_idx, agent_token in enumerate(agent_tokens): - box_detection = agent_rollout.get_detection_by_track_token(agent_token) - if box_detection is not None: - rollout_agents_mask[agent_idx, iteration] = True - rollout_agents_array[agent_idx, iteration] = box_detection.bounding_box_se2.array - - return rollout_agents_array, rollout_agents_mask diff --git a/d123/simulation/observation/abstract_observation.py b/d123/simulation/observation/abstract_observation.py deleted file mode 100644 index dfba70af..00000000 --- a/d123/simulation/observation/abstract_observation.py +++ /dev/null @@ -1,33 +0,0 @@ -import abc -from abc import abstractmethod -from typing import Optional, Type - -from d123.common.datatypes.recording.abstract_recording import Recording -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class AbstractObservation(abc.ABC): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - @abstractmethod - def recording_type(self) -> Type[Recording]: - pass - - @abstractmethod - def reset(self, scene: Optional[AbstractScene]) -> DetectionRecording: - pass - - @abstractmethod - def step( - self, - current_iteration: SimulationIteration, - next_iteration: SimulationIteration, - current_ego_state: EgoStateSE2, - ) -> DetectionRecording: - pass diff --git a/d123/simulation/observation/agents_observation.py b/d123/simulation/observation/agents_observation.py deleted file mode 100644 index a31d6d67..00000000 --- a/d123/simulation/observation/agents_observation.py +++ /dev/null @@ -1,87 +0,0 @@ -from typing import List, Optional, Tuple, Type - -from d123.common.datatypes.detection.detection import BoxDetection -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.recording.abstract_recording import Recording -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.arrow.conversion import BoxDetectionWrapper -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.agents.abstract_agents import AbstractAgents - -# from d123.simulation.agents.path_following import PathFollowingAgents -from d123.simulation.agents.idm_agents import IDMAgents - -# from d123.simulation.agents.smart_agents import SMARTAgents -from d123.simulation.observation.abstract_observation import AbstractObservation -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class AgentsObservation(AbstractObservation): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - def __init__(self, agents: AbstractAgents) -> None: - super().__init__() - self._scene: Optional[AbstractScene] = None - # self._agents: AbstractAgents = ConstantVelocityAgents() - self._agents: AbstractAgents = IDMAgents() - # self._agents: AbstractAgents = SMARTAgents() - - def recording_type(self) -> Type[Recording]: - return DetectionRecording - - def reset(self, scene: Optional[AbstractScene]) -> DetectionRecording: - assert scene is not None, "Scene must be provided for log replay observation." - self._scene = scene - self._iteration = 0 - - cars, non_cars, _ = _filter_agents_by_type( - self._scene.get_box_detections_at_iteration(self._iteration), - detection_types=[DetectionType.VEHICLE], - ) - cars = self._agents.reset( - map_api=self._scene.map_api, - target_agents=cars, - non_target_agents=non_cars, - scene=self._scene if self._agents.requires_scene else None, - ) - return DetectionRecording( - box_detections=BoxDetectionWrapper(cars + non_cars), - traffic_light_detections=self._scene.get_traffic_light_detections_at_iteration(self._iteration), - ) - - def step( - self, - current_iteration: SimulationIteration, - next_iteration: SimulationIteration, - current_ego_state: EgoStateSE2, - ) -> DetectionRecording: - assert self._scene is not None, "Scene must be provided for log replay observation." - self._iteration += 1 - _, non_cars, _ = _filter_agents_by_type( - self._scene.get_box_detections_at_iteration(self._iteration), - detection_types=[DetectionType.VEHICLE], - ) - cars = self._agents.step(non_target_agents=non_cars) - return DetectionRecording( - box_detections=BoxDetectionWrapper(cars + non_cars), - traffic_light_detections=self._scene.get_traffic_light_detections_at_iteration(self._iteration), - ) - - -def _filter_agents_by_type( - detections: BoxDetectionWrapper, detection_types: List[DetectionType] -) -> Tuple[List[BoxDetection], List[BoxDetection], List[int]]: - - in_types, not_in_types, in_indices = [], [], [] - for detection_idx, detection in enumerate(detections): - if detection.metadata.detection_type in detection_types: - in_types.append(detection) - in_indices.append(detection_idx) - else: - not_in_types.append(detection) - - return in_types, not_in_types, in_indices diff --git a/d123/simulation/observation/log_replay_observation.py b/d123/simulation/observation/log_replay_observation.py deleted file mode 100644 index 6e0b6b85..00000000 --- a/d123/simulation/observation/log_replay_observation.py +++ /dev/null @@ -1,47 +0,0 @@ -from typing import Optional, Type - -from d123.common.datatypes.recording.abstract_recording import Recording -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.observation.abstract_observation import AbstractObservation -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class LogReplayObservation(AbstractObservation): - - # Whether the agent class requires the scenario object to be passed at construction time. - # This can be set to true only for oracle planners and cannot be used for submissions. - requires_scene: bool = True - - def __init__(self) -> None: - """ - Initialize the log replay observation. - """ - super().__init__() - self._scene: Optional[AbstractScene] = None - - def recording_type(self) -> Type[Recording]: - return DetectionRecording - - def reset(self, scene: Optional[AbstractScene]) -> DetectionRecording: - assert scene is not None, "Scene must be provided for log replay observation." - self._scene = scene - self._iteration = 0 - - return DetectionRecording( - box_detections=self._scene.get_box_detections_at_iteration(self._iteration), - traffic_light_detections=self._scene.get_traffic_light_detections_at_iteration(self._iteration), - ) - - def step( - self, - current_iteration: SimulationIteration, - next_iteration: SimulationIteration, - current_ego_state: EgoStateSE2, - ) -> DetectionRecording: - self._iteration += 1 - return DetectionRecording( - box_detections=self._scene.get_box_detections_at_iteration(self._iteration), - traffic_light_detections=self._scene.get_traffic_light_detections_at_iteration(self._iteration), - ) diff --git a/d123/simulation/planning/abstract_planner.py b/d123/simulation/planning/abstract_planner.py deleted file mode 100644 index 1746d857..00000000 --- a/d123/simulation/planning/abstract_planner.py +++ /dev/null @@ -1,40 +0,0 @@ -# TODO: Remove or implement this placeholder - - -from dataclasses import dataclass -from typing import List - -from d123.dataset.maps.abstract_map import AbstractMap -from d123.simulation.history.simulation_history_buffer import Simulation2DHistoryBuffer -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -@dataclass(frozen=True) -class PlannerInitialization: - """ - This class represents required data to initialize a planner. - """ - - route_lane_group_ids: List[str] - map_api: AbstractMap - - -@dataclass(frozen=True) -class PlannerInput: - """ - Input to a planner for which a trajectory should be computed. - """ - - iteration: SimulationIteration - history: Simulation2DHistoryBuffer - - -class AbstractPlanner: - def __init__(self): - self._arg = None - - def step(self): - raise NotImplementedError - - def reset(self): - raise NotImplementedError diff --git a/d123/simulation/planning/planner_output/abstract_planner_output.py b/d123/simulation/planning/planner_output/abstract_planner_output.py deleted file mode 100644 index e5d2cf7f..00000000 --- a/d123/simulation/planning/planner_output/abstract_planner_output.py +++ /dev/null @@ -1,8 +0,0 @@ -import abc - - -class AbstractPlannerOutput(abc.ABC): - """Abstract class for planner output.""" - - def __init__(self): - pass diff --git a/d123/simulation/planning/planner_output/action_planner_output.py b/d123/simulation/planning/planner_output/action_planner_output.py deleted file mode 100644 index 0a62ed98..00000000 --- a/d123/simulation/planning/planner_output/action_planner_output.py +++ /dev/null @@ -1,29 +0,0 @@ -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE2, EgoStateSE2 -from d123.common.geometry.vector import Vector2D -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput - - -class ActionPlannerOutput(AbstractPlannerOutput): - - def __init__(self, acceleration: float, steering_rate: float, ego_state: EgoStateSE2): - """ - Initializes the ActionTrajectory. - :param acceleration: Longitudinal acceleration [m/s^2]. - :param steering_rate: Steering rate [rad/s]. - :param ego_state: Ego state at the start of the action. - """ - - self._acceleration = acceleration - self._steering_rate = steering_rate - self._ego_state = ego_state - - @property - def dynamic_state_se2(self) -> DynamicStateSE2: - - return DynamicStateSE2( - velocity=self._ego_state.dynamic_state_se2.velocity, - acceleration=Vector2D(self._acceleration, 0.0), - angular_velocity=self._ego_state.dynamic_state_se2.angular_velocity, - tire_steering_rate=self._steering_rate, - angular_acceleration=0.0, - ) diff --git a/d123/simulation/planning/planner_output/trajectory_planner_output.py b/d123/simulation/planning/planner_output/trajectory_planner_output.py deleted file mode 100644 index 2a394d68..00000000 --- a/d123/simulation/planning/planner_output/trajectory_planner_output.py +++ /dev/null @@ -1,7 +0,0 @@ -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput - - -class TrajectoryPlannerOutput(AbstractPlannerOutput): - - def __init__(self): - pass diff --git a/d123/simulation/simulation_2d.py b/d123/simulation/simulation_2d.py deleted file mode 100644 index 0be93c05..00000000 --- a/d123/simulation/simulation_2d.py +++ /dev/null @@ -1,171 +0,0 @@ -from __future__ import annotations - -import logging -from typing import Any, Optional, Tuple, Type - -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.callback.abstract_callback import AbstractCallback -from d123.simulation.callback.multi_callback import MultiCallback -from d123.simulation.history.simulation_history import Simulation2DHistory, Simulation2DHistorySample -from d123.simulation.history.simulation_history_buffer import Simulation2DHistoryBuffer -from d123.simulation.planning.abstract_planner import PlannerInitialization, PlannerInput -from d123.simulation.planning.planner_output.abstract_planner_output import AbstractPlannerOutput -from d123.simulation.simulation_2d_setup import Simulation2DSetup - -logger = logging.getLogger(__name__) - - -class Simulation2D: - - def __init__( - self, - setup: Simulation2DSetup, - callback: Optional[AbstractCallback] = None, - ): - - self._setup: Simulation2DSetup = setup - self._callback = MultiCallback([]) if callback is None else callback - - # History where the steps of a simulation are stored - self._history = Simulation2DHistory() - - # The + 1 here is to account for duration. For example, 20 steps at 0.1s starting at 0s will have a duration - # of 1.9s. At 21 steps the duration will achieve the target 2s duration. - self._history_buffer: Optional[Simulation2DHistoryBuffer] = None - - # Flag that keeps track whether simulation is still running - self._is_simulation_running = True - - # Lazy loaded in `.reset()` method - self._scene: Optional[AbstractScene] = None - - def __reduce__(self) -> Tuple[Type[Simulation2D], Tuple[Any, ...]]: - """ - Hints on how to reconstruct the object when pickling. - :return: Object type and constructor arguments to be used. - """ - return self.__class__, (self._setup, self._callback) - - def is_simulation_running(self) -> bool: - """ - Check whether a simulation reached the end - :return True if simulation hasn't reached the end, otherwise false. - """ - return not self._setup.time_controller.reached_end() and self._is_simulation_running - - def reset(self, scene: AbstractScene) -> Tuple[PlannerInitialization, PlannerInput]: - """ - Reset all internal states of simulation. - """ - - # 1. Reset the scene object - self._scene = scene - - # 2. Reset history and setup - self._history = Simulation2DHistory() # TODO: refactor - self._history.reset(scene) - simulation_iteration = self._setup.time_controller.reset(self._scene) - observation = self._setup.observations.reset(self._scene) - ego_state = self._setup.ego_controller.reset(self._scene) - - # 3. Reinitialize history buffer - self._history_buffer = Simulation2DHistoryBuffer.initialize_from_scene( - self._scene.get_number_of_history_iterations(), - self._scene, - self._setup.observations.recording_type(), - ) - self._history_buffer.append(ego_state, observation) - - # 4. Restart simulation - self._is_simulation_running = True - - # 5. Fill planner input and initialization - planner_initialization = PlannerInitialization( - route_lane_group_ids=self._scene.get_route_lane_group_ids(simulation_iteration.index), - map_api=self._scene.map_api, - ) - planner_input = PlannerInput( - iteration=simulation_iteration, - history=self._history_buffer, - ) - - return planner_initialization, planner_input - - def step(self, planner_output: AbstractPlannerOutput) -> PlannerInput: - - if self._history_buffer is None: - raise RuntimeError("Simulation was not initialized!") - - if not self.is_simulation_running(): - raise RuntimeError("Simulation is not running, simulation can not be propagated!") - - # Measurements - current_iteration = self._setup.time_controller.get_iteration() - current_ego_state, current_observation = self._history_buffer.current_state - - # Add new sample to history - logger.debug(f"Adding to history: {current_iteration.index}") - self._history.add_sample( - Simulation2DHistorySample( - current_iteration, - current_ego_state, - planner_output, - current_observation, - ) - ) - - # Propagate state to next iteration - next_iteration, reached_end = self._setup.time_controller.step() - - # Propagate state - next_ego_state = self._setup.ego_controller.step( - current_iteration, - next_iteration, - current_ego_state, - planner_output, - ) - next_observation = self._setup.observations.step( - current_iteration, - next_iteration, - current_ego_state, - ) - - if reached_end: - self._is_simulation_running = False - - # Append new state into history buffer - self._history_buffer.append(next_ego_state, next_observation) - planner_input = PlannerInput(iteration=next_iteration, history=self._history_buffer) - return planner_input - - @property - def scene(self) -> Optional[AbstractScene]: - """ - :return: used scene in this simulation. - """ - return self._scene - - @property - def callback(self) -> AbstractCallback: - """ - :return: Callback for this simulation. - """ - return self._callback - - @property - def history(self) -> Simulation2DHistory: - """ - :return History from the simulation. - """ - return self._history - - @property - def history_buffer(self) -> Simulation2DHistoryBuffer: - """ - :return SimulationHistoryBuffer from the simulation. - """ - if self._history_buffer is None: - raise RuntimeError( - "_history_buffer is None. Please initialize the buffer by calling Simulation.initialize()" - ) - return self._history_buffer diff --git a/d123/simulation/simulation_2d_setup.py b/d123/simulation/simulation_2d_setup.py deleted file mode 100644 index 737d3e6c..00000000 --- a/d123/simulation/simulation_2d_setup.py +++ /dev/null @@ -1,26 +0,0 @@ -from dataclasses import dataclass - -from d123.simulation.controller.abstract_controller import AbstractEgoController -from d123.simulation.observation.abstract_observation import AbstractObservation -from d123.simulation.time_controller.abstract_time_controller import AbstractTimeController - - -@dataclass -class Simulation2DSetup: - """Setup class for constructing a 2D Simulation.""" - - time_controller: AbstractTimeController - observations: AbstractObservation - ego_controller: AbstractEgoController - - def __post_init__(self) -> None: - """Post-initialization sanity checks.""" - assert isinstance( - self.time_controller, AbstractTimeController - ), "Error: time_controller must inherit from AbstractTimeController!" - assert isinstance( - self.observations, AbstractObservation - ), "Error: observations must inherit from AbstractObservation!" - assert isinstance( - self.ego_controller, AbstractEgoController - ), "Error: ego_controller must inherit from AbstractEgoController!" diff --git a/d123/simulation/time_controller/abstract_time_controller.py b/d123/simulation/time_controller/abstract_time_controller.py deleted file mode 100644 index 788acfcb..00000000 --- a/d123/simulation/time_controller/abstract_time_controller.py +++ /dev/null @@ -1,47 +0,0 @@ -from __future__ import annotations - -import abc -from typing import Tuple - -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class AbstractTimeController(abc.ABC): - """ - Generic simulation time manager. - """ - - @abc.abstractmethod - def get_iteration(self) -> SimulationIteration: - """ - Get the current simulation iteration. - :return: Get the current simulation current_simulation_state and time point - """ - - @abc.abstractmethod - def reset(self, scene: AbstractScene) -> SimulationIteration: - """ - Reset the observation (all internal states should be reseted, if any). - """ - - @abc.abstractmethod - def step(self) -> Tuple[SimulationIteration, bool]: - """ - Advance to the next simulation iteration. - :return: A tuple containing the next SimulationIteration and a boolean indicating if the simulation has reached its end. - """ - - @abc.abstractmethod - def reached_end(self) -> bool: - """ - Check if we have reached the end of the simulation. - :return: Check whether simulation reached the end state. - """ - - @abc.abstractmethod - def number_of_iterations(self) -> int: - """ - The number of iterations the simulation should be running for - :return: Number of iterations of simulation. - """ diff --git a/d123/simulation/time_controller/log_time_controller.py b/d123/simulation/time_controller/log_time_controller.py deleted file mode 100644 index b358a24c..00000000 --- a/d123/simulation/time_controller/log_time_controller.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Optional, Tuple - -from d123.dataset.scene.abstract_scene import AbstractScene -from d123.simulation.time_controller.abstract_time_controller import ( - AbstractTimeController, -) -from d123.simulation.time_controller.simulation_iteration import SimulationIteration - - -class LogTimeController(AbstractTimeController): - """ - Class handling simulation time and completion. - """ - - def __init__(self): - """ - Initialize simulation control. - """ - self._current_iteration_index: int = 0 - self._scene: Optional[AbstractScene] = None - - def reset(self, scene: AbstractScene) -> SimulationIteration: - """Inherited, see superclass.""" - self._scene = scene - self._current_iteration_index = 0 - return self.get_iteration() - - def get_iteration(self) -> SimulationIteration: - """Inherited, see superclass.""" - scene_time = self._scene.get_timepoint_at_iteration(self._current_iteration_index) - return SimulationIteration(time_point=scene_time, index=self._current_iteration_index) - - def step(self) -> Tuple[SimulationIteration, bool]: - """Inherited, see superclass.""" - self._current_iteration_index += 1 - return self.get_iteration(), self.reached_end() - - def reached_end(self) -> bool: - """Inherited, see superclass.""" - return self._current_iteration_index >= self.number_of_iterations() - 1 - - def number_of_iterations(self) -> int: - """Inherited, see superclass.""" - return self._scene.get_number_of_iterations() diff --git a/d123/simulation/time_controller/simulation_iteration.py b/d123/simulation/time_controller/simulation_iteration.py deleted file mode 100644 index 7304bf78..00000000 --- a/d123/simulation/time_controller/simulation_iteration.py +++ /dev/null @@ -1,34 +0,0 @@ -from dataclasses import dataclass - -from d123.common.datatypes.time.time_point import TimePoint - - -@dataclass -class SimulationIteration: - """ - Simulation step time and index. - """ - - time_point: TimePoint # A time point along simulation - - # Iteration in the simulation, starting from 0. - # In closed loop this represents the n-th sample of the simulation. - index: int - - def __post_init__(self) -> None: - """Post-init index sanity check.""" - assert self.index >= 0, f"Iteration must be >= 0, but it is {self.index}!" - - @property - def time_us(self) -> int: - """ - :return: time in micro seconds. - """ - return int(self.time_point.time_us) - - @property - def time_s(self) -> float: - """ - :return: Time in seconds. - """ - return float(self.time_point.time_s) diff --git a/d123/tests/.gitkeep b/d123/tests/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/feature_builder/smart_feature_builder.py b/d123/training/feature_builder/smart_feature_builder.py deleted file mode 100644 index 829f0498..00000000 --- a/d123/training/feature_builder/smart_feature_builder.py +++ /dev/null @@ -1,345 +0,0 @@ -from enum import IntEnum -from typing import Dict, Final, List, Optional, Tuple - -import numpy as np -import numpy.typing as npt -import shapely - -from d123.common.datatypes.detection.detection import BoxDetection, BoxDetectionWrapper -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.geometry.base import StateSE2, StateSE2Index -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.geometry.line.polylines import PolylineSE2 -from d123.common.geometry.transform.se2_array import convert_absolute_to_relative_se2_array -from d123.common.visualization.color.default import TrafficLightStatus -from d123.dataset.maps.abstract_map import MapLayer -from d123.dataset.maps.abstract_map_objects import ( - AbstractCarpark, - AbstractCrosswalk, - AbstractGenericDrivable, - AbstractLaneGroup, -) -from d123.dataset.scene.abstract_scene import AbstractScene - -# TODO: Hind feature builder behind abstraction. - - -class SMARTMapTokenType(IntEnum): - LANE = 0 - LANE_GROUP_BOUNDARY = 1 - CROSSWALK = 2 - CARPARK = 3 - GENERIC_DRIVABLE = 4 - - -class SMARTMapTokenPlType(IntEnum): - BOUNDARY = 0 - CENTERLINE = 1 - POLYGON = 2 - - -START_ITERATION: Final[int] = 0 - - -class SMARTFeatureBuilder: - def __init__(self): - pass - - def build_features(self, scene: AbstractScene): - - feature_dict = {"scenario_id": scene.token} - - # Optionally, you use a different origin instead - origin: StateSE2 = scene.get_ego_state_at_iteration(START_ITERATION).bounding_box.center.state_se2 - - map_features = _build_map_features(scene, origin) - feature_dict.update(map_features) - agent_features = _build_agent_features(scene, origin) - feature_dict.update(agent_features) - - return feature_dict - - -def _build_map_features(scene: AbstractScene, origin: StateSE2) -> Dict[str, np.ndarray]: - - # TODO: Add to config - width, height = 200, 200 - num_points = 3 - segment_length = 5.0 - - # create map extent polygon - map_bounding_box = BoundingBoxSE2(origin, height, width) - - map_api = scene.map_api - map_objects = map_api.query( - map_bounding_box.shapely_polygon, - layers=[ - MapLayer.LANE_GROUP, - MapLayer.CROSSWALK, - MapLayer.CARPARK, - MapLayer.GENERIC_DRIVABLE, - ], - predicate="intersects", - ) - - # Traffic light data - traffic_lights = scene.get_traffic_light_detections_at_iteration(START_ITERATION) - - traj_se2: List[npt.NDArray[np.float64]] = [] - types: List[int] = [] - pl_types: List[int] = [] - pl_light_types: List[int] = [] - - # 1. Add lane - for lane_group in map_objects[MapLayer.LANE_GROUP]: - lane_group: AbstractLaneGroup - is_intersection = lane_group.intersection is not None - - for boundary in [lane_group.right_boundary.polyline_se2, lane_group.left_boundary.polyline_se2]: - boundary_traj_se2 = _split_segments( - boundary, - num_points=num_points, - segment_length=segment_length, - map_bounding_box=map_bounding_box, - ) - traj_se2.extend(boundary_traj_se2) - types.extend([int(SMARTMapTokenType.LANE_GROUP_BOUNDARY)] * len(boundary_traj_se2)) - pl_types.extend([int(SMARTMapTokenPlType.BOUNDARY)] * len(boundary_traj_se2)) - pl_light_types.extend([int(TrafficLightStatus.OFF)] * len(boundary_traj_se2)) - - for lane in lane_group.lanes: - lane_traffic_light = traffic_lights.get_detection_by_lane_id(lane.id) - centerline = lane.centerline.polyline_se2 - lane_traj_se2 = _split_segments( - centerline, - num_points=num_points, - segment_length=segment_length, - map_bounding_box=map_bounding_box, - ) - - traj_se2.extend(lane_traj_se2) - types.extend([int(SMARTMapTokenType.LANE)] * len(lane_traj_se2)) - pl_types.extend([int(SMARTMapTokenPlType.CENTERLINE)] * len(lane_traj_se2)) - if lane_traffic_light is None: - if is_intersection: - pl_light_types.extend([int(TrafficLightStatus.UNKNOWN)] * len(lane_traj_se2)) - else: - pl_light_types.extend([int(TrafficLightStatus.OFF)] * len(lane_traj_se2)) - else: - pl_light_types.extend([int(lane_traffic_light.status)] * len(lane_traj_se2)) - - # 2. Crosswalks - for crosswalk in map_objects[MapLayer.CROSSWALK]: - crosswalk: AbstractCrosswalk - crosswalk_traj_se2 = _split_segments( - crosswalk.outline_3d.polyline_se2, - num_points=num_points, - segment_length=segment_length, - map_bounding_box=map_bounding_box, - ) - traj_se2.extend(crosswalk_traj_se2) - types.extend([int(SMARTMapTokenType.CROSSWALK)] * len(crosswalk_traj_se2)) - pl_types.extend([int(SMARTMapTokenPlType.POLYGON)] * len(crosswalk_traj_se2)) - pl_light_types.extend([int(TrafficLightStatus.OFF)] * len(crosswalk_traj_se2)) - - # 3. Parking - for carpark in map_objects[MapLayer.CARPARK]: - carpark: AbstractCarpark - carpark_traj_se2 = _split_segments( - carpark.outline_3d.polyline_se2, - num_points=num_points, - segment_length=segment_length, - map_bounding_box=map_bounding_box, - ) - traj_se2.extend(carpark_traj_se2) - types.extend([int(SMARTMapTokenType.CARPARK)] * len(carpark_traj_se2)) - pl_types.extend([int(SMARTMapTokenPlType.POLYGON)] * len(carpark_traj_se2)) - pl_light_types.extend([int(TrafficLightStatus.OFF)] * len(carpark_traj_se2)) - - # 4. Generic drivable - for generic_drivable in map_objects[MapLayer.GENERIC_DRIVABLE]: - generic_drivable: AbstractGenericDrivable - drivable_traj_se2 = _split_segments( - generic_drivable.outline_3d.polyline_se2, - num_points=num_points, - segment_length=segment_length, - map_bounding_box=map_bounding_box, - ) - traj_se2.extend(drivable_traj_se2) - types.extend([int(SMARTMapTokenType.GENERIC_DRIVABLE)] * len(drivable_traj_se2)) - pl_types.extend([int(SMARTMapTokenPlType.POLYGON)] * len(drivable_traj_se2)) - pl_light_types.extend([int(TrafficLightStatus.OFF)] * len(drivable_traj_se2)) - - assert len(traj_se2) == len(types) == len(pl_types) == len(pl_light_types) - - traj_se2 = np.array(traj_se2, dtype=np.float64) - types = np.array(types, dtype=np.uint8) - pl_types = np.array(pl_types, dtype=np.uint8) - pl_light_types = np.array(pl_light_types, dtype=np.uint8) - traj_se2 = convert_absolute_to_relative_se2_array(origin, traj_se2) - - return { - "map_save": { - "traj_pos": traj_se2[..., StateSE2Index.XY], - "traj_theta": traj_se2[..., 0, StateSE2Index.YAW], - }, - "pt_token": { - "type": types, - "pl_type": pl_types, - "light_type": pl_light_types, - "num_nodes": len(traj_se2), - }, - } - - -def _build_agent_features(scene: AbstractScene, origin: StateSE2) -> None: - iteration_indices = np.arange( - -scene.get_number_of_history_iterations(), - scene.get_number_of_iterations(), - ) - # print(iteration_indices[scene.get_number_of_history_iterations()]) - num_steps = len(iteration_indices) - - target_types = [DetectionType.VEHICLE, DetectionType.PEDESTRIAN, DetectionType.BICYCLE] - box_detections_list = [scene.get_box_detections_at_iteration(iteration) for iteration in iteration_indices] - target_detections: List[List[BoxDetection]] = [] - target_indices: List[List[int]] = [] - for box_detections in box_detections_list: - in_types, _, in_indices = _filter_agents_by_type(box_detections, target_types) - target_detections.append(in_types) - target_indices.append(in_indices) - - # initial_agents = [ - # detection.metadata.track_token for detection in target_detections[scene.get_number_of_history_iterations()] - # ] - other_start_iteration = scene.get_number_of_history_iterations() - initial_agents = [detection.metadata.track_token for detection in target_detections[other_start_iteration]] - initial_indices = target_indices[other_start_iteration] - num_agents = len(initial_agents) + 1 # + 1 for ego vehicle - - def detection_type_to_index(detection_type: DetectionType) -> int: - if detection_type == DetectionType.VEHICLE: - return 0 - elif detection_type == DetectionType.PEDESTRIAN: - return 1 - elif detection_type == DetectionType.BICYCLE: - return 2 - else: - raise ValueError(f"Unsupported detection type: {detection_type}") - - # Fill role, id, type arrays - role = np.zeros((num_agents, 3), dtype=bool) - id = np.zeros((num_agents), dtype=np.int64) - type = np.zeros((num_agents), dtype=np.uint8) - extent = np.zeros((num_agents, 3), dtype=np.float32) - - for agent_idx, agent_token in enumerate(initial_agents): - detection = box_detections_list[other_start_iteration].get_detection_by_track_token(agent_token) - assert detection is not None, f"Agent {agent_token} not found in initial detections." - - role_idx = 2 if detection.metadata.detection_type == DetectionType.VEHICLE else 1 - role[agent_idx, role_idx] = True - id[agent_idx] = initial_indices[agent_idx] - type[agent_idx] = detection_type_to_index(detection.metadata.detection_type) - extent[agent_idx] = [ - detection.bounding_box.length, - detection.bounding_box.width, - 1.0, - ] # NOTE: fill height with 1.0 as placeholder (not always available - - # Fill ego vehicle data - role[-1, 0] = True - id[-1] = -1 # Use -1 for ego vehicle - type[-1] = detection_type_to_index(DetectionType.VEHICLE) - - # Fill role, id, type arrays - valid_mask = np.zeros((num_agents, num_steps), dtype=bool) - position = np.zeros((num_agents, num_steps, 3), dtype=np.float64) - heading = np.zeros((num_agents, num_steps), dtype=np.float64) - velocity = np.zeros((num_agents, num_steps, 2), dtype=np.float64) - - for time_idx, iteration in enumerate(iteration_indices): - for agent_idx, agent_token in enumerate(initial_agents): - detection = box_detections_list[time_idx].get_detection_by_track_token(agent_token) - if detection is None: - continue - - valid_mask[agent_idx, time_idx] = True - - state_se2 = detection.bounding_box.center.state_se2 - local_se2_array = convert_absolute_to_relative_se2_array(origin, state_se2.array) - position[agent_idx, time_idx, :2] = local_se2_array[..., StateSE2Index.XY] - # position[agent_idx, time_idx, 2] = ... # Is this the z dimension? - heading[agent_idx, time_idx] = local_se2_array[..., StateSE2Index.YAW] - velocity[agent_idx, time_idx, :] = detection.velocity.array[:2] # already in local of agent - - # Fill ego vehicle data - ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) - valid_mask[-1, time_idx] = True - local_se2_array = convert_absolute_to_relative_se2_array( - origin, ego_vehicle_state.bounding_box.center.state_se2.array - ) - position[-1, time_idx, :2] = local_se2_array[..., StateSE2Index.XY] - # position[-1, time_idx, 2] = ... # Is this the z dimension? - heading[-1, time_idx] = local_se2_array[..., StateSE2Index.YAW] - velocity[-1, time_idx, :] = ego_vehicle_state.dynamic_state_se3.velocity.array[:2] # already in local of agent - - return { - "agent": { - "num_nodes": num_agents, - "valid_mask": valid_mask, - "role": role, - "id": id, - "type": type, - "position": position, - "heading": heading, - "velocity": velocity, - "shape": extent, # Placeholder for shape, if needed - } - } - - -def _split_segments( - polyline: PolylineSE2, - num_points: int, - segment_length: float, - map_bounding_box: Optional[BoundingBoxSE2] = None, -) -> List[npt.NDArray[np.float64]]: - - segments_distances = np.concatenate([np.arange(0.0, polyline.length, step=segment_length), [polyline.length]]) - polygon = map_bounding_box.shapely_polygon if map_bounding_box is not None else None - - segments = [] - for segment_start, segment_end in zip(segments_distances[:-1], segments_distances[1:]): - include_endpoint = True - poses = polyline.interpolate( - np.linspace( - segment_start, - segment_end, - num=num_points, - endpoint=include_endpoint, - ) - ) - if polygon is not None: - points_shapely = shapely.creation.points(poses[(0, -1), :2]) - in_map = any(polygon.contains(points_shapely)) - if not in_map: - continue - segments.append(poses) - - return segments - - -def _filter_agents_by_type( - detections: BoxDetectionWrapper, detection_types: List[DetectionType] -) -> Tuple[List[BoxDetection], List[BoxDetection], List[int]]: - - in_types, not_in_types, in_indices = [], [], [] - for detection_idx, detection in enumerate(detections): - if detection.metadata.detection_type in detection_types: - in_types.append(detection) - in_indices.append(detection_idx) - else: - not_in_types.append(detection) - - return in_types, not_in_types, in_indices diff --git a/d123/training/models/sim_agent/smart/__init__.py b/d123/training/models/sim_agent/smart/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/datamodules/__init__.py b/d123/training/models/sim_agent/smart/datamodules/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/datamodules/scalable_datamodule.py b/d123/training/models/sim_agent/smart/datamodules/scalable_datamodule.py deleted file mode 100644 index 41c318eb..00000000 --- a/d123/training/models/sim_agent/smart/datamodules/scalable_datamodule.py +++ /dev/null @@ -1,95 +0,0 @@ -from typing import Optional - -from lightning import LightningDataModule -from lightning.pytorch.utilities.types import EVAL_DATALOADERS, TRAIN_DATALOADERS -from torch_geometric.loader import DataLoader - -from d123.training.models.sim_agent.smart.datasets.scalable_dataset import MultiDataset - -from .target_builder import WaymoTargetBuilderTrain, WaymoTargetBuilderVal - - -class MultiDataModule(LightningDataModule): - def __init__( - self, - train_batch_size: int, - val_batch_size: int, - test_batch_size: int, - train_raw_dir: str, - val_raw_dir: str, - test_raw_dir: str, - val_tfrecords_splitted: str, - shuffle: bool, - num_workers: int, - pin_memory: bool, - persistent_workers: bool, - train_max_num: int, - ) -> None: - super(MultiDataModule, self).__init__() - self.train_batch_size = train_batch_size - self.val_batch_size = val_batch_size - self.test_batch_size = test_batch_size - self.shuffle = shuffle - self.num_workers = num_workers - self.pin_memory = pin_memory - self.persistent_workers = persistent_workers and num_workers > 0 - self.train_raw_dir = train_raw_dir - self.val_raw_dir = val_raw_dir - self.test_raw_dir = test_raw_dir - self.val_tfrecords_splitted = val_tfrecords_splitted - - self.train_transform = WaymoTargetBuilderTrain(train_max_num) - self.val_transform = WaymoTargetBuilderVal() - self.test_transform = WaymoTargetBuilderVal() - - def setup(self, stage: Optional[str] = None) -> None: - if stage == "fit" or stage is None: - self.train_dataset = MultiDataset(self.train_raw_dir, self.train_transform) - self.val_dataset = MultiDataset( - self.val_raw_dir, - self.val_transform, - tfrecord_dir=self.val_tfrecords_splitted, - ) - elif stage == "validate": - self.val_dataset = MultiDataset( - self.val_raw_dir, - self.val_transform, - tfrecord_dir=self.val_tfrecords_splitted, - ) - elif stage == "test": - self.test_dataset = MultiDataset(self.test_raw_dir, self.test_transform) - else: - raise ValueError(f"{stage} should be one of [fit, validate, test]") - - def train_dataloader(self) -> TRAIN_DATALOADERS: - return DataLoader( - self.train_dataset, - batch_size=self.train_batch_size, - shuffle=self.shuffle, - num_workers=self.num_workers, - pin_memory=self.pin_memory, - persistent_workers=self.persistent_workers, - drop_last=False, - ) - - def val_dataloader(self) -> EVAL_DATALOADERS: - return DataLoader( - self.val_dataset, - batch_size=self.val_batch_size, - shuffle=False, - num_workers=self.num_workers, - pin_memory=self.pin_memory, # False - persistent_workers=self.persistent_workers, - drop_last=False, - ) - - def test_dataloader(self) -> EVAL_DATALOADERS: - return DataLoader( - self.test_dataset, - batch_size=self.test_batch_size, - shuffle=False, - num_workers=self.num_workers, # 0 - pin_memory=self.pin_memory, # False - persistent_workers=self.persistent_workers, - drop_last=False, - ) diff --git a/d123/training/models/sim_agent/smart/datamodules/target_builder.py b/d123/training/models/sim_agent/smart/datamodules/target_builder.py deleted file mode 100644 index a62b6786..00000000 --- a/d123/training/models/sim_agent/smart/datamodules/target_builder.py +++ /dev/null @@ -1,62 +0,0 @@ -import numpy as np -import torch -from torch_geometric.data import HeteroData -from torch_geometric.transforms import BaseTransform - - -def _numpy_dict_to_torch(data: dict, device: torch.device = torch.device("cpu")) -> dict: - """ - Convert numpy arrays in a dictionary to torch tensors. - :param data: Dictionary with numpy arrays. - :return: Dictionary with torch tensors. - """ - for key, value in data.items(): - if isinstance(value, np.ndarray): - data[key] = torch.tensor(value, device=device) - if data[key].dtype == torch.float64: - data[key] = data[key].to(torch.float32) - elif isinstance(value, dict): - _numpy_dict_to_torch(value) - - -class WaymoTargetBuilderTrain(BaseTransform): - def __init__(self, max_num: int) -> None: - super(WaymoTargetBuilderTrain, self).__init__() - self.step_current = 10 - self.max_num = max_num - - def __call__(self, data) -> HeteroData: - _numpy_dict_to_torch(data) - - pos = data["agent"]["position"] - av_index = torch.where(data["agent"]["role"][:, 0])[0].item() - distance = torch.norm(pos - pos[av_index], dim=-1) - - # we do not believe the perception out of range of 150 meters - data["agent"]["valid_mask"] = data["agent"]["valid_mask"] & (distance < 150) - - # we do not predict vehicle too far away from ego car - role_train_mask = data["agent"]["role"].any(-1) - extra_train_mask = (distance[:, self.step_current] < 100) & ( - data["agent"]["valid_mask"][:, self.step_current + 1 :].sum(-1) >= 5 - ) - - train_mask = extra_train_mask | role_train_mask - if train_mask.sum() > self.max_num: # too many vehicle - _indices = torch.where(extra_train_mask & ~role_train_mask)[0] - selected_indices = _indices[torch.randperm(_indices.size(0))[: self.max_num - role_train_mask.sum()]] - data["agent"]["train_mask"] = role_train_mask - data["agent"]["train_mask"][selected_indices] = True - else: - data["agent"]["train_mask"] = train_mask # [n_agent] - - return HeteroData(data) - - -class WaymoTargetBuilderVal(BaseTransform): - def __init__(self) -> None: - super(WaymoTargetBuilderVal, self).__init__() - - def __call__(self, data) -> HeteroData: - _numpy_dict_to_torch(data) - return HeteroData(data) diff --git a/d123/training/models/sim_agent/smart/datasets/__init__.py b/d123/training/models/sim_agent/smart/datasets/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/datasets/scalable_dataset.py b/d123/training/models/sim_agent/smart/datasets/scalable_dataset.py deleted file mode 100644 index 0e4b607d..00000000 --- a/d123/training/models/sim_agent/smart/datasets/scalable_dataset.py +++ /dev/null @@ -1,40 +0,0 @@ -import logging -import pickle -from pathlib import Path -from typing import Callable, List, Optional - -from torch_geometric.data import Dataset - -logger = logging.getLogger(__name__) - - -class MultiDataset(Dataset): - def __init__( - self, - raw_dir: str, - transform: Callable, - tfrecord_dir: Optional[str] = None, - ) -> None: - raw_dir = Path(raw_dir) - self._raw_paths = [p.as_posix() for p in sorted(raw_dir.glob("*"))] - self._num_samples = len(self._raw_paths) - - self._tfrecord_dir = Path(tfrecord_dir) if tfrecord_dir is not None else None - - logger.info("Length of {} dataset is ".format(raw_dir) + str(self._num_samples)) - super(MultiDataset, self).__init__(transform=transform, pre_transform=None, pre_filter=None) - - @property - def raw_paths(self) -> List[str]: - return self._raw_paths - - def len(self) -> int: - return self._num_samples - - def get(self, idx: int): - with open(self.raw_paths[idx], "rb") as handle: - data = pickle.load(handle) - - if self._tfrecord_dir is not None: - data["tfrecord_path"] = (self._tfrecord_dir / (data["scenario_id"] + ".tfrecords")).as_posix() - return data diff --git a/d123/training/models/sim_agent/smart/layers/__init__.py b/d123/training/models/sim_agent/smart/layers/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/layers/attention_layer.py b/d123/training/models/sim_agent/smart/layers/attention_layer.py deleted file mode 100644 index 9452662e..00000000 --- a/d123/training/models/sim_agent/smart/layers/attention_layer.py +++ /dev/null @@ -1,113 +0,0 @@ -from typing import Optional, Tuple, Union - -import torch -import torch.nn as nn -from torch_geometric.nn.conv import MessagePassing -from torch_geometric.utils import softmax - -from d123.training.models.sim_agent.smart.utils.weight_init import weight_init - - -class AttentionLayer(MessagePassing): - def __init__( - self, - hidden_dim: int, - num_heads: int, - head_dim: int, - dropout: float, - bipartite: bool, - has_pos_emb: bool, - **kwargs, - ) -> None: - super(AttentionLayer, self).__init__(aggr="add", node_dim=0, **kwargs) - self.num_heads = num_heads - self.head_dim = head_dim - self.has_pos_emb = has_pos_emb - self.scale = head_dim**-0.5 - - self.to_q = nn.Linear(hidden_dim, head_dim * num_heads) - self.to_k = nn.Linear(hidden_dim, head_dim * num_heads, bias=False) - self.to_v = nn.Linear(hidden_dim, head_dim * num_heads) - if has_pos_emb: - self.to_k_r = nn.Linear(hidden_dim, head_dim * num_heads, bias=False) - self.to_v_r = nn.Linear(hidden_dim, head_dim * num_heads) - self.to_s = nn.Linear(hidden_dim, head_dim * num_heads) - self.to_g = nn.Linear(head_dim * num_heads + hidden_dim, head_dim * num_heads) - self.to_out = nn.Linear(head_dim * num_heads, hidden_dim) - self.attn_drop = nn.Dropout(dropout) - self.ff_mlp = nn.Sequential( - nn.Linear(hidden_dim, hidden_dim * 4), - nn.ReLU(inplace=True), - nn.Dropout(dropout), - nn.Linear(hidden_dim * 4, hidden_dim), - ) - if bipartite: - self.attn_prenorm_x_src = nn.LayerNorm(hidden_dim) - self.attn_prenorm_x_dst = nn.LayerNorm(hidden_dim) - else: - self.attn_prenorm_x_src = nn.LayerNorm(hidden_dim) - self.attn_prenorm_x_dst = self.attn_prenorm_x_src - if has_pos_emb: - self.attn_prenorm_r = nn.LayerNorm(hidden_dim) - self.attn_postnorm = nn.LayerNorm(hidden_dim) - self.ff_prenorm = nn.LayerNorm(hidden_dim) - self.ff_postnorm = nn.LayerNorm(hidden_dim) - self.apply(weight_init) - - def forward( - self, - x: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], - r: Optional[torch.Tensor], - edge_index: torch.Tensor, - ) -> torch.Tensor: - if isinstance(x, torch.Tensor): - x_src = x_dst = self.attn_prenorm_x_src(x) - else: - x_src, x_dst = x - x_src = self.attn_prenorm_x_src(x_src) - x_dst = self.attn_prenorm_x_dst(x_dst) - x = x[1] - if self.has_pos_emb and r is not None: - r = self.attn_prenorm_r(r) - x = x + self.attn_postnorm(self._attn_block(x_src, x_dst, r, edge_index)) - x = x + self.ff_postnorm(self._ff_block(self.ff_prenorm(x))) - return x - - def message( - self, - q_i: torch.Tensor, - k_j: torch.Tensor, - v_j: torch.Tensor, - r: Optional[torch.Tensor], - index: torch.Tensor, - ptr: Optional[torch.Tensor], - ) -> torch.Tensor: - if self.has_pos_emb and r is not None: - k_j = k_j + self.to_k_r(r).view(-1, self.num_heads, self.head_dim) - v_j = v_j + self.to_v_r(r).view(-1, self.num_heads, self.head_dim) - sim = (q_i * k_j).sum(dim=-1) * self.scale - attn = softmax(sim, index, ptr) - self.attention_weight = attn.sum(-1).detach() - attn = self.attn_drop(attn) - return v_j * attn.unsqueeze(-1) - - def update(self, inputs: torch.Tensor, x_dst: torch.Tensor) -> torch.Tensor: - inputs = inputs.view(-1, self.num_heads * self.head_dim) - g = torch.sigmoid(self.to_g(torch.cat([inputs, x_dst], dim=-1))) - return inputs + g * (self.to_s(x_dst) - inputs) - - def _attn_block( - self, - x_src: torch.Tensor, - x_dst: torch.Tensor, - r: Optional[torch.Tensor], - edge_index: torch.Tensor, - ) -> torch.Tensor: - q = self.to_q(x_dst).view(-1, self.num_heads, self.head_dim) - k = self.to_k(x_src).view(-1, self.num_heads, self.head_dim) - v = self.to_v(x_src).view(-1, self.num_heads, self.head_dim) - agg = self.propagate(edge_index=edge_index, x_dst=x_dst, q=q, k=k, v=v, r=r) - return self.to_out(agg) - - def _ff_block(self, x: torch.Tensor) -> torch.Tensor: - return self.ff_mlp(x) diff --git a/d123/training/models/sim_agent/smart/layers/fourier_embedding.py b/d123/training/models/sim_agent/smart/layers/fourier_embedding.py deleted file mode 100644 index 7052c42e..00000000 --- a/d123/training/models/sim_agent/smart/layers/fourier_embedding.py +++ /dev/null @@ -1,88 +0,0 @@ -import math -from typing import List, Optional - -import torch -import torch.nn as nn - -from d123.training.models.sim_agent.smart.utils.weight_init import weight_init - - -class FourierEmbedding(nn.Module): - def __init__(self, input_dim: int, hidden_dim: int, num_freq_bands: int) -> None: - super(FourierEmbedding, self).__init__() - self.input_dim = input_dim - self.hidden_dim = hidden_dim - - self.freqs = nn.Embedding(input_dim, num_freq_bands) if input_dim != 0 else None - self.mlps = nn.ModuleList( - [ - nn.Sequential( - nn.Linear(num_freq_bands * 2 + 1, hidden_dim), - nn.LayerNorm(hidden_dim), - nn.ReLU(inplace=True), - nn.Linear(hidden_dim, hidden_dim), - ) - for _ in range(input_dim) - ] - ) - self.to_out = nn.Sequential( - nn.LayerNorm(hidden_dim), - nn.ReLU(inplace=True), - nn.Linear(hidden_dim, hidden_dim), - ) - self.apply(weight_init) - - def forward( - self, - continuous_inputs: Optional[torch.Tensor] = None, - categorical_embs: Optional[List[torch.Tensor]] = None, - ) -> torch.Tensor: - if continuous_inputs is None: - if categorical_embs is not None: - x = torch.stack(categorical_embs).sum(dim=0) - else: - raise ValueError("Both continuous_inputs and categorical_embs are None") - else: - x = continuous_inputs.unsqueeze(-1) * self.freqs.weight * 2 * math.pi - # Warning: if your data are noisy, don't use learnable sinusoidal embedding - x = torch.cat([x.cos(), x.sin(), continuous_inputs.unsqueeze(-1)], dim=-1) - continuous_embs: List[Optional[torch.Tensor]] = [None] * self.input_dim - for i in range(self.input_dim): - continuous_embs[i] = self.mlps[i](x[:, i]) - x = torch.stack(continuous_embs).sum(dim=0) - if categorical_embs is not None: - x = x + torch.stack(categorical_embs).sum(dim=0) - return self.to_out(x) - - -class MLPEmbedding(nn.Module): - def __init__(self, input_dim: int, hidden_dim: int) -> None: - super(MLPEmbedding, self).__init__() - self.input_dim = input_dim - self.hidden_dim = hidden_dim - self.mlp = nn.Sequential( - nn.Linear(input_dim, 128), - nn.LayerNorm(128), - nn.ReLU(inplace=True), - nn.Linear(128, hidden_dim), - nn.LayerNorm(hidden_dim), - nn.ReLU(inplace=True), - nn.Linear(hidden_dim, hidden_dim), - ) - self.apply(weight_init) - - def forward( - self, - continuous_inputs: Optional[torch.Tensor] = None, - categorical_embs: Optional[List[torch.Tensor]] = None, - ) -> torch.Tensor: - if continuous_inputs is None: - if categorical_embs is not None: - x = torch.stack(categorical_embs).sum(dim=0) - else: - raise ValueError("Both continuous_inputs and categorical_embs are None") - else: - x = self.mlp(continuous_inputs) - if categorical_embs is not None: - x = x + torch.stack(categorical_embs).sum(dim=0) - return x diff --git a/d123/training/models/sim_agent/smart/layers/mlp_layer.py b/d123/training/models/sim_agent/smart/layers/mlp_layer.py deleted file mode 100644 index e28b2d2d..00000000 --- a/d123/training/models/sim_agent/smart/layers/mlp_layer.py +++ /dev/null @@ -1,19 +0,0 @@ -import torch -import torch.nn as nn - -from d123.training.models.sim_agent.smart.utils.weight_init import weight_init - - -class MLPLayer(nn.Module): - def __init__(self, input_dim: int, hidden_dim: int, output_dim: int) -> None: - super(MLPLayer, self).__init__() - self.mlp = nn.Sequential( - nn.Linear(input_dim, hidden_dim), - nn.LayerNorm(hidden_dim), - nn.ReLU(inplace=True), - nn.Linear(hidden_dim, output_dim), - ) - self.apply(weight_init) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - return self.mlp(x) diff --git a/d123/training/models/sim_agent/smart/metrics/__init__.py b/d123/training/models/sim_agent/smart/metrics/__init__.py deleted file mode 100644 index 679bbfc1..00000000 --- a/d123/training/models/sim_agent/smart/metrics/__init__.py +++ /dev/null @@ -1 +0,0 @@ -# from d123.training.models.sim_agent.smart.metrics.wosac_metrics import WOSACMetrics diff --git a/d123/training/models/sim_agent/smart/metrics/cross_entropy.py b/d123/training/models/sim_agent/smart/metrics/cross_entropy.py deleted file mode 100644 index d4c6d55b..00000000 --- a/d123/training/models/sim_agent/smart/metrics/cross_entropy.py +++ /dev/null @@ -1,104 +0,0 @@ -from typing import Optional - -import torch -from torch import Tensor, tensor -from torch.nn.functional import cross_entropy -from torchmetrics.metric import Metric - -from .utils import get_euclidean_targets, get_prob_targets - - -class CrossEntropy(Metric): - - is_differentiable = True - higher_is_better = False - full_state_update = False - - def __init__( - self, - use_gt_raw: bool, - gt_thresh_scale_length: float, # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - label_smoothing: float, - rollout_as_gt: bool, - ) -> None: - super().__init__() - self.use_gt_raw = use_gt_raw - self.gt_thresh_scale_length = gt_thresh_scale_length - self.label_smoothing = label_smoothing - self.rollout_as_gt = rollout_as_gt - self.add_state("loss_sum", default=tensor(0.0), dist_reduce_fx="sum") - self.add_state("count", default=tensor(0.0), dist_reduce_fx="sum") - - def update( - self, - # ! action that goes from [(10->15), ..., (85->90)] - next_token_logits: Tensor, # [n_agent, 16, n_token] - next_token_valid: Tensor, # [n_agent, 16] - # ! for step {5, 10, ..., 90} and act [(0->5), (5->10), ..., (85->90)] - pred_pos: Tensor, # [n_agent, 18, 2] - pred_head: Tensor, # [n_agent, 18] - pred_valid: Tensor, # [n_agent, 18] - # ! for step {5, 10, ..., 90} - gt_pos_raw: Tensor, # [n_agent, 18, 2] - gt_head_raw: Tensor, # [n_agent, 18] - gt_valid_raw: Tensor, # [n_agent, 18] - # or use the tokenized gt - gt_pos: Tensor, # [n_agent, 18, 2] - gt_head: Tensor, # [n_agent, 18] - gt_valid: Tensor, # [n_agent, 18] - # ! for tokenization - token_agent_shape: Tensor, # [n_agent, 2] - token_traj: Tensor, # [n_agent, n_token, 4, 2] - # ! for filtering intersting agent for training - train_mask: Optional[Tensor] = None, # [n_agent] - # ! for rollout_as_gt - next_token_action: Optional[Tensor] = None, # [n_agent, 16, 3] - **kwargs, - ) -> None: - # ! use raw or tokenized GT - if self.use_gt_raw: - gt_pos = gt_pos_raw - gt_head = gt_head_raw - gt_valid = gt_valid_raw - - # ! GT is valid if it's close to the rollout. - if self.gt_thresh_scale_length > 0: - dist = torch.norm(pred_pos - gt_pos, dim=-1) # [n_agent, n_step] - _thresh = token_agent_shape[:, 1] * self.gt_thresh_scale_length # [n_agent] - gt_valid = gt_valid & (dist < _thresh.unsqueeze(1)) # [n_agent, n_step] - - # ! get prob_targets - euclidean_target, euclidean_target_valid = get_euclidean_targets( - pred_pos=pred_pos, - pred_head=pred_head, - pred_valid=pred_valid, - gt_pos=gt_pos, - gt_head=gt_head, - gt_valid=gt_valid, - ) - if self.rollout_as_gt and (next_token_action is not None): - euclidean_target = next_token_action - - prob_target = get_prob_targets( - target=euclidean_target, # [n_agent, n_step, 3] x,y,yaw in local - token_agent_shape=token_agent_shape, # [n_agent, 2] - token_traj=token_traj, # [n_agent, n_token, 4, 2] - ) # [n_agent, n_step, n_token] prob, last dim sum up to 1 - - loss = cross_entropy( - next_token_logits.transpose(1, 2), # [n_agent, n_token, n_step], logits - prob_target.transpose(1, 2), # [n_agent, n_token, n_step], prob - reduction="none", - label_smoothing=self.label_smoothing, - ) # [n_agent, n_step=16] - - # ! weighting final loss [n_agent, n_step] - loss_weighting_mask = next_token_valid & euclidean_target_valid - if self.training: - loss_weighting_mask &= train_mask.unsqueeze(1) # [n_agent, n_step] - - self.loss_sum += (loss * loss_weighting_mask).sum() - self.count += (loss_weighting_mask > 0).sum() - - def compute(self) -> Tensor: - return self.loss_sum / self.count diff --git a/d123/training/models/sim_agent/smart/metrics/ego_nll.py b/d123/training/models/sim_agent/smart/metrics/ego_nll.py deleted file mode 100644 index e4c2ad29..00000000 --- a/d123/training/models/sim_agent/smart/metrics/ego_nll.py +++ /dev/null @@ -1,120 +0,0 @@ -from typing import Optional - -import torch -from torch import Tensor, tensor -from torch.distributions import Categorical, Independent, MixtureSameFamily, Normal -from torchmetrics.metric import Metric - -from .utils import get_euclidean_targets - - -class EgoNLL(Metric): - - is_differentiable = True - higher_is_better = False - full_state_update = False - - def __init__( - self, - use_gt_raw: bool, - gt_thresh_scale_length: float, # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - hard_assignment: bool, - rollout_as_gt: bool, - ) -> None: - super().__init__() - self.use_gt_raw = use_gt_raw - self.gt_thresh_scale_length = gt_thresh_scale_length - self.hard_assignment = hard_assignment - self.rollout_as_gt = rollout_as_gt - self.add_state("loss_sum", default=tensor(0.0), dist_reduce_fx="sum") - self.add_state("count", default=tensor(0.0), dist_reduce_fx="sum") - - def update( - self, - # ! action that goes from [(10->15), ..., (85->90)] - ego_next_logits: Tensor, # [n_batch, 16, n_k_ego_gmm] - ego_next_poses: Tensor, # [n_batch, 16, n_k_ego_gmm, 3] - ego_next_valid: Tensor, # [n_batch, 16] - ego_next_cov: Tensor, # [2], one for pos, one for heading. - # ! for step {5, 10, ..., 90} and act [(0->5), (5->10), ..., (85->90)] - pred_pos: Tensor, # [n_batch, 18, 2] - pred_head: Tensor, # [n_batch, 18] - pred_valid: Tensor, # [n_batch, 18] - # ! for step {5, 10, ..., 90} - gt_pos_raw: Tensor, # [n_batch, 18, 2] - gt_head_raw: Tensor, # [n_batch, 18] - gt_valid_raw: Tensor, # [n_batch, 18] - # or use the tokenized gt - gt_pos: Tensor, # [n_batch, 18, 2] - gt_head: Tensor, # [n_batch, 18] - gt_valid: Tensor, # [n_batch, 18] - token_agent_shape: Tensor, # [n_agent, 2] - # ! for rollout_as_gt - next_token_action: Optional[Tensor] = None, # [n_batch, 16, 3] - **kwargs, - ) -> None: - # ! use raw or tokenized GT - if self.use_gt_raw: - gt_pos = gt_pos_raw - gt_head = gt_head_raw - gt_valid = gt_valid_raw - - # ! GT is valid if it's close to the rollout. - if self.gt_thresh_scale_length > 0: - dist = torch.norm(pred_pos - gt_pos, dim=-1) # [n_agent, n_step] - _thresh = token_agent_shape[:, 1] * self.gt_thresh_scale_length # [n_agent] - gt_valid = gt_valid & (dist < _thresh.unsqueeze(1)) # [n_agent, n_step] - - # ! get prob_targets - target, target_valid = get_euclidean_targets( - pred_pos=pred_pos, - pred_head=pred_head, - pred_valid=pred_valid, - gt_pos=gt_pos, - gt_head=gt_head, - gt_valid=gt_valid, - ) - if self.rollout_as_gt and (next_token_action is not None): - target = next_token_action - - # ! transform yaw angle to unit vector - ego_next_poses = torch.cat( - [ - ego_next_poses[..., :2], - ego_next_poses[..., [-1]].cos(), - ego_next_poses[..., [-1]].sin(), - ], - dim=-1, - ) - ego_next_poses = ego_next_poses.flatten(0, 1) # [n_batch*n_step, K, 4] - cov = ego_next_cov.repeat_interleave(2)[None, None, :].expand(*ego_next_poses.shape) # [n_batch*n_step, K, 4] - - n_batch, n_step = target_valid.shape - target = torch.cat( - [target[..., :2], target[..., [-1]].cos(), target[..., [-1]].sin()], dim=-1 - ) # [n_batch, n_step, 4] - target = target.flatten(0, 1) # [n_batch*n_step, 4] - - ego_next_logits = ego_next_logits.flatten(0, 1) # [n_batch*n_step, K] - if self.hard_assignment: - idx_hard_assign = (ego_next_poses - target.unsqueeze(1))[..., :2].norm(dim=-1).argmin(-1) - n_batch_step = idx_hard_assign.shape[0] - ego_next_poses = ego_next_poses[torch.arange(n_batch_step), idx_hard_assign].unsqueeze(1) - cov = cov[torch.arange(n_batch_step), idx_hard_assign].unsqueeze(1) - ego_next_logits = ego_next_logits[torch.arange(n_batch_step), idx_hard_assign].unsqueeze(1) - - gmm = MixtureSameFamily( - Categorical(logits=ego_next_logits), - Independent(Normal(ego_next_poses, cov), 1), - ) - - loss = -gmm.log_prob(target) # [n_batch*n_step] - loss = loss.view(n_batch, n_step) # [n_batch, n_step] - - loss_weighting_mask = target_valid & ego_next_valid # [n_batch, n_step] - - self.loss_sum += (loss * loss_weighting_mask).sum() - self.count += (loss_weighting_mask > 0).sum() - - def compute(self) -> Tensor: - return self.loss_sum / self.count diff --git a/d123/training/models/sim_agent/smart/metrics/gmm_ade.py b/d123/training/models/sim_agent/smart/metrics/gmm_ade.py deleted file mode 100644 index e31fe477..00000000 --- a/d123/training/models/sim_agent/smart/metrics/gmm_ade.py +++ /dev/null @@ -1,33 +0,0 @@ -import torch -from torch import Tensor, tensor -from torchmetrics import Metric - - -class GMMADE(Metric): - def __init__(self) -> None: - super(GMMADE, self).__init__() - self.add_state("sum", default=tensor(0.0), dist_reduce_fx="sum") - self.add_state("count", default=tensor(0.0), dist_reduce_fx="sum") - - def update( - self, - logits: Tensor, # [n_agent, n_step, n_k] - pred: Tensor, # [n_agent, n_step, n_k, 2] - target: Tensor, # [n_agent, n_step, 2] - valid: Tensor, # [n_agent, n_step] - ) -> None: - n_agent, n_step, _ = logits.shape - idx_max = logits.argmax(-1) # [n_agent, n_step] - pred_max = pred[ - torch.arange(n_agent).unsqueeze(1), - torch.arange(n_step).unsqueeze(0), - idx_max, - ] # [n_agent, n_step, 2] - - dist = torch.norm(pred_max - target, p=2, dim=-1) # [n_agent, n_step] - dist = ((dist * valid).sum(-1)) / (valid.sum(-1) + 1e-6) # [n_agent] - self.sum += dist.sum() - self.count += valid.any(-1).sum() - - def compute(self) -> torch.Tensor: - return self.sum / self.count diff --git a/d123/training/models/sim_agent/smart/metrics/min_ade.py b/d123/training/models/sim_agent/smart/metrics/min_ade.py deleted file mode 100644 index 85db59fb..00000000 --- a/d123/training/models/sim_agent/smart/metrics/min_ade.py +++ /dev/null @@ -1,28 +0,0 @@ -import torch -from torch import Tensor, tensor -from torchmetrics import Metric - - -class minADE(Metric): - def __init__(self) -> None: - super(minADE, self).__init__() - self.add_state("sum", default=tensor(0.0), dist_reduce_fx="sum") - self.add_state("count", default=tensor(0.0), dist_reduce_fx="sum") - - def update( - self, - pred: Tensor, # [n_agent, n_rollout, n_step, 2] - target: Tensor, # [n_agent, n_step, 2] - target_valid: Tensor, # [n_agent, n_step] - ) -> None: - - # [n_agent, n_rollout, n_step] - dist = torch.norm(pred - target.unsqueeze(1), p=2, dim=-1) - dist = (dist * target_valid.unsqueeze(1)).sum(-1).min(-1).values # [n_agent] - - dist = dist / (target_valid.sum(-1) + 1e-6) # [n_agent] - self.sum += dist.sum() - self.count += target_valid.any(-1).sum() - - def compute(self) -> torch.Tensor: - return self.sum / self.count diff --git a/d123/training/models/sim_agent/smart/metrics/next_token_cls.py b/d123/training/models/sim_agent/smart/metrics/next_token_cls.py deleted file mode 100644 index ddca37c5..00000000 --- a/d123/training/models/sim_agent/smart/metrics/next_token_cls.py +++ /dev/null @@ -1,27 +0,0 @@ -import torch -from torchmetrics import Metric - - -class TokenCls(Metric): - def __init__(self, max_guesses: int = 6, **kwargs) -> None: - super(TokenCls, self).__init__(**kwargs) - self.add_state("sum", default=torch.tensor(0.0), dist_reduce_fx="sum") - self.add_state("count", default=torch.tensor(0.0), dist_reduce_fx="sum") - self.max_guesses = max_guesses - - def update( - self, - pred: torch.Tensor, # next_token_logits: [n_agent, 16, n_token] - pred_valid: torch.Tensor, # next_token_idx_gt: [n_agent, 16] - target: torch.Tensor, # next_token_idx_gt: [n_agent, 16] - target_valid: torch.Tensor, # [n_agent, 16] - ) -> None: - target = target[..., None] - acc = (torch.topk(pred, k=self.max_guesses, dim=-1)[1] == target).any(dim=-1) - valid_mask = pred_valid & target_valid - acc = acc * valid_mask - self.sum += acc.sum() - self.count += valid_mask.sum() - - def compute(self) -> torch.Tensor: - return self.sum / self.count diff --git a/d123/training/models/sim_agent/smart/metrics/utils.py b/d123/training/models/sim_agent/smart/metrics/utils.py deleted file mode 100644 index 39477b92..00000000 --- a/d123/training/models/sim_agent/smart/metrics/utils.py +++ /dev/null @@ -1,70 +0,0 @@ -from typing import Tuple - -import torch -from torch import Tensor -from torch.nn.functional import one_hot - -from d123.training.models.sim_agent.smart.utils.geometry import wrap_angle -from d123.training.models.sim_agent.smart.utils.rollout import cal_polygon_contour, transform_to_local - - -@torch.no_grad() -def get_prob_targets( - target: Tensor, # [n_agent, n_step, 3] x,y,yaw in local coord - token_agent_shape: Tensor, # [n_agent, 2] - token_traj: Tensor, # [n_agent, n_token, 4, 2] -) -> Tensor: # [n_agent, n_step, n_token] prob, last dim sum up to 1 - # ! tokenize to index, then compute prob - contour = cal_polygon_contour( - target[..., :2], # [n_agent, n_step, 2] - target[..., 2], # [n_agent, n_step] - token_agent_shape[:, None, :], # [n_agent, 1, 1, 2] - ) # [n_agent, n_step, 4, 2] in local coord - - # [n_agent, n_step, 1, 4, 2] - [n_agent, 1, n_token, 4, 2] - target_token_index = ( - torch.norm(contour.unsqueeze(2) - token_traj[:, None, :, :, :], dim=-1).sum(-1).argmin(-1) - ) # [n_agent, n_step] - - # [n_agent, n_step, n_token] bool - prob_target = one_hot(target_token_index, num_classes=token_traj.shape[1]) - prob_target = prob_target.to(target.dtype) - return prob_target - - -@torch.no_grad() -def get_euclidean_targets( - pred_pos: Tensor, # [n_agent, 18, 2] - pred_head: Tensor, # [n_agent, 18] - pred_valid: Tensor, # [n_agent, 18] - gt_pos: Tensor, # [n_agent, 18, 2] - gt_head: Tensor, # [n_agent, 18] - gt_valid: Tensor, # [n_agent, 18] -) -> Tuple[Tensor, Tensor]: - """ - Return: action that goes from [(10->15), ..., (85->90)] - target: [n_agent, 16, 3], x,y,yaw - target_valid: [n_agent, 16] - """ - gt_last_pos = gt_pos.roll(shifts=-1, dims=1).flatten(0, 1) - gt_last_head = gt_head.roll(shifts=-1, dims=1).flatten(0, 1) - gt_last_valid = gt_valid.roll(shifts=-1, dims=1) # [n_agent, 18] - gt_last_valid[:, -1:] = False # [n_agent, 18] - - target_pos, target_head = transform_to_local( - pos_global=gt_last_pos.unsqueeze(1), # [n_agent*18, 1, 2] - head_global=gt_last_head.unsqueeze(1), # [n_agent*18, 1] - pos_now=pred_pos.flatten(0, 1), # [n_agent*18, 2] - head_now=pred_head.flatten(0, 1), # [n_agent*18] - ) - target_valid = pred_valid & gt_last_valid # [n_agent, 18] - - target_pos = target_pos.squeeze(1).view(gt_pos.shape) # n_agent, 18, 2] - target_head = wrap_angle(target_head) # [n_agent, 18] - target_head = target_head.squeeze(1).view(gt_head.shape) - target = torch.cat((target_pos, target_head.unsqueeze(-1)), dim=-1) - - # truncate [(5->10), ..., (90->5)] to [(10->15), ..., (85->90)] - target = target[:, 1:-1] # [n_agent, 16, 3], x,y,yaw - target_valid = target_valid[:, 1:-1] # [n_agent, 16] - return target, target_valid diff --git a/d123/training/models/sim_agent/smart/modules/__init__.py b/d123/training/models/sim_agent/smart/modules/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/modules/smart_decoder.py b/d123/training/models/sim_agent/smart/modules/smart_decoder.py deleted file mode 100644 index da680810..00000000 --- a/d123/training/models/sim_agent/smart/modules/smart_decoder.py +++ /dev/null @@ -1,745 +0,0 @@ -from typing import Dict - -import torch -import torch.nn as nn -from torch import Tensor -from torch_cluster import radius, radius_graph -from torch_geometric.utils import dense_to_sparse, subgraph - -from d123.training.models.sim_agent.smart.layers.attention_layer import AttentionLayer -from d123.training.models.sim_agent.smart.layers.fourier_embedding import FourierEmbedding, MLPEmbedding -from d123.training.models.sim_agent.smart.layers.mlp_layer import MLPLayer -from d123.training.models.sim_agent.smart.smart_config import SMARTConfig, SMARTRolloutSampling -from d123.training.models.sim_agent.smart.utils.geometry import angle_between_2d_vectors, wrap_angle -from d123.training.models.sim_agent.smart.utils.rollout import sample_next_token_traj, transform_to_global -from d123.training.models.sim_agent.smart.utils.weight_init import weight_init - - -class SMARTDecoder(nn.Module): - def __init__(self, model_config: SMARTConfig, n_token_agent: int) -> None: - super(SMARTDecoder, self).__init__() - self.map_encoder: SMARTMapDecoder = SMARTMapDecoder(model_config) - self.agent_encoder: SMARTAgentDecoder = SMARTAgentDecoder(model_config, n_token_agent=n_token_agent) - - def forward(self, tokenized_map: Dict[str, Tensor], tokenized_agent: Dict[str, Tensor]) -> Dict[str, Tensor]: - map_feature = self.map_encoder(tokenized_map) - pred_dict = self.agent_encoder(tokenized_agent, map_feature) - return pred_dict - - def inference( - self, - tokenized_map: Dict[str, Tensor], - tokenized_agent: Dict[str, Tensor], - sampling_scheme: SMARTRolloutSampling, - ) -> Dict[str, Tensor]: - map_feature = self.map_encoder(tokenized_map) - pred_dict = self.agent_encoder.inference(tokenized_agent, map_feature, sampling_scheme) - return pred_dict - - -class SMARTMapDecoder(nn.Module): - def __init__(self, model_config: SMARTConfig) -> None: - super(SMARTMapDecoder, self).__init__() - - self.model_config = model_config - self.pl2pl_radius = model_config.pl2pl_radius - self.num_layers = model_config.num_map_layers - - self.type_pt_emb = nn.Embedding(10, model_config.hidden_dim) - self.polygon_type_emb = nn.Embedding(4, model_config.hidden_dim) - self.light_pl_emb = nn.Embedding(5, model_config.hidden_dim) - - input_dim_r_pt2pt = 3 - self.r_pt2pt_emb = FourierEmbedding( - input_dim=input_dim_r_pt2pt, - hidden_dim=model_config.hidden_dim, - num_freq_bands=model_config.num_freq_bands, - ) - self.pt2pt_layers = nn.ModuleList( - [ - AttentionLayer( - hidden_dim=model_config.hidden_dim, - num_heads=model_config.num_heads, - head_dim=model_config.head_dim, - dropout=model_config.dropout, - bipartite=False, - has_pos_emb=True, - ) - for _ in range(model_config.num_map_layers) - ] - ) - - # map_token_traj_src: [n_token, 11, 2].flatten(0,1) - self.token_emb = MLPEmbedding(input_dim=22, hidden_dim=model_config.hidden_dim) - self.apply(weight_init) - - def forward(self, tokenized_map: Dict) -> Dict[str, torch.Tensor]: - pos_pt = tokenized_map["position"] - orient_pt = tokenized_map["orientation"] - orient_vector_pt = torch.stack([orient_pt.cos(), orient_pt.sin()], dim=-1) - pt_token_emb_src = self.token_emb(tokenized_map["token_traj_src"]) - x_pt = pt_token_emb_src[tokenized_map["token_idx"]] - - x_pt_categorical_embs = [ - self.type_pt_emb(tokenized_map["type"]), - self.polygon_type_emb(tokenized_map["pl_type"]), - self.light_pl_emb(tokenized_map["light_type"]), - ] - x_pt = x_pt + torch.stack(x_pt_categorical_embs).sum(dim=0) - edge_index_pt2pt = radius_graph( - x=pos_pt, - r=self.pl2pl_radius, - batch=tokenized_map["batch"], - loop=False, - max_num_neighbors=100, - ) - rel_pos_pt2pt = pos_pt[edge_index_pt2pt[0]] - pos_pt[edge_index_pt2pt[1]] - rel_orient_pt2pt = wrap_angle(orient_pt[edge_index_pt2pt[0]] - orient_pt[edge_index_pt2pt[1]]) - r_pt2pt = torch.stack( - [ - torch.norm(rel_pos_pt2pt[:, :2], p=2, dim=-1), - angle_between_2d_vectors( - ctr_vector=orient_vector_pt[edge_index_pt2pt[1]], - nbr_vector=rel_pos_pt2pt[:, :2], - ), - rel_orient_pt2pt, - ], - dim=-1, - ) - r_pt2pt = self.r_pt2pt_emb(continuous_inputs=r_pt2pt, categorical_embs=None) - for i in range(self.num_layers): - x_pt = self.pt2pt_layers[i](x_pt, r_pt2pt, edge_index_pt2pt) - - return { - "pt_token": x_pt, - "position": pos_pt, - "orientation": orient_pt, - "batch": tokenized_map["batch"], - } - - -class SMARTAgentDecoder(nn.Module): - def __init__(self, model_config: SMARTConfig, n_token_agent: int) -> None: - super(SMARTAgentDecoder, self).__init__() - self.model_config = model_config - self.hidden_dim = model_config.hidden_dim - self.num_historical_steps = model_config.num_historical_steps - self.num_future_steps = model_config.num_future_steps - self.time_span = ( - model_config.time_span if model_config.time_span is not None else model_config.num_historical_steps - ) - self.pl2a_radius = model_config.pl2a_radius - self.a2a_radius = model_config.a2a_radius - self.num_layers = model_config.num_agent_layers - self.shift = 5 - self.hist_drop_prob = model_config.hist_drop_prob - - input_dim_x_a = 2 - input_dim_r_t = 4 - input_dim_r_pt2a = 3 - input_dim_r_a2a = 3 - input_dim_token = 8 - - self.type_a_emb = nn.Embedding(3, model_config.hidden_dim) - self.shape_emb = MLPLayer(3, model_config.hidden_dim, model_config.hidden_dim) - - self.x_a_emb = FourierEmbedding( - input_dim=input_dim_x_a, - hidden_dim=model_config.hidden_dim, - num_freq_bands=model_config.num_freq_bands, - ) - self.r_t_emb = FourierEmbedding( - input_dim=input_dim_r_t, - hidden_dim=model_config.hidden_dim, - num_freq_bands=model_config.num_freq_bands, - ) - self.r_pt2a_emb = FourierEmbedding( - input_dim=input_dim_r_pt2a, - hidden_dim=model_config.hidden_dim, - num_freq_bands=model_config.num_freq_bands, - ) - self.r_a2a_emb = FourierEmbedding( - input_dim=input_dim_r_a2a, - hidden_dim=model_config.hidden_dim, - num_freq_bands=model_config.num_freq_bands, - ) - self.token_emb_veh = MLPEmbedding(input_dim=input_dim_token, hidden_dim=model_config.hidden_dim) - self.token_emb_ped = MLPEmbedding(input_dim=input_dim_token, hidden_dim=model_config.hidden_dim) - self.token_emb_cyc = MLPEmbedding(input_dim=input_dim_token, hidden_dim=model_config.hidden_dim) - self.fusion_emb = MLPEmbedding(input_dim=self.hidden_dim * 2, hidden_dim=self.hidden_dim) - - self.t_attn_layers = nn.ModuleList( - [ - AttentionLayer( - hidden_dim=model_config.hidden_dim, - num_heads=model_config.num_heads, - head_dim=model_config.head_dim, - dropout=model_config.dropout, - bipartite=False, - has_pos_emb=True, - ) - for _ in range(model_config.num_agent_layers) - ] - ) - self.pt2a_attn_layers = nn.ModuleList( - [ - AttentionLayer( - hidden_dim=model_config.hidden_dim, - num_heads=model_config.num_heads, - head_dim=model_config.head_dim, - dropout=model_config.dropout, - bipartite=True, - has_pos_emb=True, - ) - for _ in range(model_config.num_agent_layers) - ] - ) - self.a2a_attn_layers = nn.ModuleList( - [ - AttentionLayer( - hidden_dim=model_config.hidden_dim, - num_heads=model_config.num_heads, - head_dim=model_config.head_dim, - dropout=model_config.dropout, - bipartite=False, - has_pos_emb=True, - ) - for _ in range(model_config.num_agent_layers) - ] - ) - self.token_predict_head = MLPLayer( - input_dim=model_config.hidden_dim, hidden_dim=model_config.hidden_dim, output_dim=n_token_agent - ) - self.apply(weight_init) - - def agent_token_embedding( - self, - agent_token_index, # [n_agent, n_step] - trajectory_token_veh, # [n_token, 8] - trajectory_token_ped, # [n_token, 8] - trajectory_token_cyc, # [n_token, 8] - pos_a, # [n_agent, n_step, 2] - head_vector_a, # [n_agent, n_step, 2] - agent_type, # [n_agent] - agent_shape, # [n_agent, 3] - inference=False, - ): - n_agent, n_step, traj_dim = pos_a.shape - _device = pos_a.device - - veh_mask = agent_type == 0 - ped_mask = agent_type == 1 - cyc_mask = agent_type == 2 - # [n_token, hidden_dim] - agent_token_emb_veh = self.token_emb_veh(trajectory_token_veh) - agent_token_emb_ped = self.token_emb_ped(trajectory_token_ped) - agent_token_emb_cyc = self.token_emb_cyc(trajectory_token_cyc) - agent_token_emb = torch.zeros((n_agent, n_step, self.hidden_dim), device=_device, dtype=pos_a.dtype) - agent_token_emb[veh_mask] = agent_token_emb_veh[agent_token_index[veh_mask]] - agent_token_emb[ped_mask] = agent_token_emb_ped[agent_token_index[ped_mask]] - agent_token_emb[cyc_mask] = agent_token_emb_cyc[agent_token_index[cyc_mask]] - - motion_vector_a = torch.cat( - [ - pos_a.new_zeros(agent_token_index.shape[0], 1, traj_dim), - pos_a[:, 1:] - pos_a[:, :-1], - ], - dim=1, - ) # [n_agent, n_step, 2] - feature_a = torch.stack( - [ - torch.norm(motion_vector_a[:, :, :2], p=2, dim=-1), - angle_between_2d_vectors(ctr_vector=head_vector_a, nbr_vector=motion_vector_a[:, :, :2]), - ], - dim=-1, - ) # [n_agent, n_step, 2] - categorical_embs = [ - self.type_a_emb(agent_type.long()), - self.shape_emb(agent_shape), - ] # List of len=2, shape [n_agent, hidden_dim] - - x_a = self.x_a_emb( - continuous_inputs=feature_a.view(-1, feature_a.size(-1)), - categorical_embs=[v.repeat_interleave(repeats=n_step, dim=0) for v in categorical_embs], - ) # [n_agent*n_step, hidden_dim] - x_a = x_a.view(-1, n_step, self.hidden_dim) # [n_agent, n_step, hidden_dim] - - feat_a = torch.cat((agent_token_emb, x_a), dim=-1) - feat_a = self.fusion_emb(feat_a) - - if inference: - return ( - feat_a, # [n_agent, n_step, hidden_dim] - agent_token_emb, # [n_agent, n_step, hidden_dim] - agent_token_emb_veh, # [n_agent, hidden_dim] - agent_token_emb_ped, # [n_agent, hidden_dim] - agent_token_emb_cyc, # [n_agent, hidden_dim] - veh_mask, # [n_agent] - ped_mask, # [n_agent] - cyc_mask, # [n_agent] - categorical_embs, # List of len=2, shape [n_agent, hidden_dim] - ) - else: - return feat_a # [n_agent, n_step, hidden_dim] - - def build_temporal_edge( - self, - pos_a, # [n_agent, n_step, 2] - head_a, # [n_agent, n_step] - head_vector_a, # [n_agent, n_step, 2], - mask, # [n_agent, n_step] - inference_mask=None, # [n_agent, n_step] - ): - pos_t = pos_a.flatten(0, 1) - head_t = head_a.flatten(0, 1) - head_vector_t = head_vector_a.flatten(0, 1) - - if self.hist_drop_prob > 0 and self.training: - _mask_keep = torch.bernoulli(torch.ones_like(mask) * (1 - self.hist_drop_prob)).bool() - mask = mask & _mask_keep - - if inference_mask is not None: - mask_t = mask.unsqueeze(2) & inference_mask.unsqueeze(1) - else: - mask_t = mask.unsqueeze(2) & mask.unsqueeze(1) - - edge_index_t = dense_to_sparse(mask_t)[0] - edge_index_t = edge_index_t[:, edge_index_t[1] > edge_index_t[0]] - edge_index_t = edge_index_t[:, edge_index_t[1] - edge_index_t[0] <= self.time_span / self.shift] - rel_pos_t = pos_t[edge_index_t[0]] - pos_t[edge_index_t[1]] - rel_pos_t = rel_pos_t[:, :2] - rel_head_t = wrap_angle(head_t[edge_index_t[0]] - head_t[edge_index_t[1]]) - r_t = torch.stack( - [ - torch.norm(rel_pos_t, p=2, dim=-1), - angle_between_2d_vectors(ctr_vector=head_vector_t[edge_index_t[1]], nbr_vector=rel_pos_t), - rel_head_t, - edge_index_t[0] - edge_index_t[1], - ], - dim=-1, - ) - r_t = self.r_t_emb(continuous_inputs=r_t, categorical_embs=None) - return edge_index_t, r_t - - def build_interaction_edge( - self, - pos_a, # [n_agent, n_step, 2] - head_a, # [n_agent, n_step] - head_vector_a, # [n_agent, n_step, 2] - batch_s, # [n_agent*n_step] - mask, # [n_agent, n_step] - ): - mask = mask.transpose(0, 1).reshape(-1) - pos_s = pos_a.transpose(0, 1).flatten(0, 1) - head_s = head_a.transpose(0, 1).reshape(-1) - head_vector_s = head_vector_a.transpose(0, 1).reshape(-1, 2) - edge_index_a2a = radius_graph( - x=pos_s[:, :2], - r=self.a2a_radius, - batch=batch_s, - loop=False, - max_num_neighbors=300, - ) - edge_index_a2a = subgraph(subset=mask, edge_index=edge_index_a2a)[0] - rel_pos_a2a = pos_s[edge_index_a2a[0]] - pos_s[edge_index_a2a[1]] - rel_head_a2a = wrap_angle(head_s[edge_index_a2a[0]] - head_s[edge_index_a2a[1]]) - r_a2a = torch.stack( - [ - torch.norm(rel_pos_a2a[:, :2], p=2, dim=-1), - angle_between_2d_vectors( - ctr_vector=head_vector_s[edge_index_a2a[1]], - nbr_vector=rel_pos_a2a[:, :2], - ), - rel_head_a2a, - ], - dim=-1, - ) - r_a2a = self.r_a2a_emb(continuous_inputs=r_a2a, categorical_embs=None) - return edge_index_a2a, r_a2a - - def build_map2agent_edge( - self, - pos_pl, # [n_pl, 2] - orient_pl, # [n_pl] - pos_a, # [n_agent, n_step, 2] - head_a, # [n_agent, n_step] - head_vector_a, # [n_agent, n_step, 2] - mask, # [n_agent, n_step] - batch_s, # [n_agent*n_step] - batch_pl, # [n_pl*n_step] - ): - n_step = pos_a.shape[1] - mask_pl2a = mask.transpose(0, 1).reshape(-1) - pos_s = pos_a.transpose(0, 1).flatten(0, 1) - head_s = head_a.transpose(0, 1).reshape(-1) - head_vector_s = head_vector_a.transpose(0, 1).reshape(-1, 2) - pos_pl = pos_pl.repeat(n_step, 1) - orient_pl = orient_pl.repeat(n_step) - edge_index_pl2a = radius( - x=pos_s[:, :2], - y=pos_pl[:, :2], - r=self.pl2a_radius, - batch_x=batch_s, - batch_y=batch_pl, - max_num_neighbors=300, - ) - edge_index_pl2a = edge_index_pl2a[:, mask_pl2a[edge_index_pl2a[1]]] - rel_pos_pl2a = pos_pl[edge_index_pl2a[0]] - pos_s[edge_index_pl2a[1]] - rel_orient_pl2a = wrap_angle(orient_pl[edge_index_pl2a[0]] - head_s[edge_index_pl2a[1]]) - r_pl2a = torch.stack( - [ - torch.norm(rel_pos_pl2a[:, :2], p=2, dim=-1), - angle_between_2d_vectors( - ctr_vector=head_vector_s[edge_index_pl2a[1]], - nbr_vector=rel_pos_pl2a[:, :2], - ), - rel_orient_pl2a, - ], - dim=-1, - ) - r_pl2a = self.r_pt2a_emb(continuous_inputs=r_pl2a, categorical_embs=None) - return edge_index_pl2a, r_pl2a - - def forward( - self, - tokenized_agent: Dict[str, torch.Tensor], - map_feature: Dict[str, torch.Tensor], - ) -> Dict[str, torch.Tensor]: - mask = tokenized_agent["valid_mask"] - pos_a = tokenized_agent["sampled_pos"] - head_a = tokenized_agent["sampled_heading"] - head_vector_a = torch.stack([head_a.cos(), head_a.sin()], dim=-1) - n_agent, n_step = head_a.shape - - # ! get agent token embeddings - feat_a = self.agent_token_embedding( - agent_token_index=tokenized_agent["sampled_idx"], # [n_ag, n_step] - trajectory_token_veh=tokenized_agent["trajectory_token_veh"], - trajectory_token_ped=tokenized_agent["trajectory_token_ped"], - trajectory_token_cyc=tokenized_agent["trajectory_token_cyc"], - pos_a=pos_a, # [n_agent, n_step, 2] - head_vector_a=head_vector_a, # [n_agent, n_step, 2] - agent_type=tokenized_agent["type"], # [n_agent] - agent_shape=tokenized_agent["shape"], # [n_agent, 3] - ) # feat_a: [n_agent, n_step, hidden_dim] - - # ! build temporal, interaction and map2agent edges - edge_index_t, r_t = self.build_temporal_edge( - pos_a=pos_a, # [n_agent, n_step, 2] - head_a=head_a, # [n_agent, n_step] - head_vector_a=head_vector_a, # [n_agent, n_step, 2] - mask=mask, # [n_agent, n_step] - ) # edge_index_t: [2, n_edge_t], r_t: [n_edge_t, hidden_dim] - - batch_s = torch.cat( - [tokenized_agent["batch"] + tokenized_agent["num_graphs"] * t for t in range(n_step)], - dim=0, - ) # [n_agent*n_step] - batch_pl = torch.cat( - [map_feature["batch"] + tokenized_agent["num_graphs"] * t for t in range(n_step)], - dim=0, - ) # [n_pl*n_step] - - edge_index_a2a, r_a2a = self.build_interaction_edge( - pos_a=pos_a, # [n_agent, n_step, 2] - head_a=head_a, # [n_agent, n_step] - head_vector_a=head_vector_a, # [n_agent, n_step, 2] - batch_s=batch_s, # [n_agent*n_step] - mask=mask, # [n_agent, n_step] - ) # edge_index_a2a: [2, n_edge_a2a], r_a2a: [n_edge_a2a, hidden_dim] - - edge_index_pl2a, r_pl2a = self.build_map2agent_edge( - pos_pl=map_feature["position"], # [n_pl, 2] - orient_pl=map_feature["orientation"], # [n_pl] - pos_a=pos_a, # [n_agent, n_step, 2] - head_a=head_a, # [n_agent, n_step] - head_vector_a=head_vector_a, # [n_agent, n_step, 2] - mask=mask, # [n_agent, n_step] - batch_s=batch_s, # [n_agent*n_step] - batch_pl=batch_pl, # [n_pl*n_step] - ) - - # ! attention layers - # [n_step*n_pl, hidden_dim] - feat_map = map_feature["pt_token"].unsqueeze(0).expand(n_step, -1, -1).flatten(0, 1) - - for i in range(self.num_layers): - feat_a = feat_a.flatten(0, 1) # [n_agent*n_step, hidden_dim] - feat_a = self.t_attn_layers[i](feat_a, r_t, edge_index_t) - # [n_step*n_agent, hidden_dim] - feat_a = feat_a.view(n_agent, n_step, -1).transpose(0, 1).flatten(0, 1) - feat_a = self.pt2a_attn_layers[i]((feat_map, feat_a), r_pl2a, edge_index_pl2a) - feat_a = self.a2a_attn_layers[i](feat_a, r_a2a, edge_index_a2a) - feat_a = feat_a.view(n_step, n_agent, -1).transpose(0, 1) - - # ! final mlp to get outputs - next_token_logits = self.token_predict_head(feat_a) - - return { - # action that goes from [(10->15), ..., (85->90)] - "next_token_logits": next_token_logits[:, 1:-1], # [n_agent, 16, n_token] - "next_token_valid": tokenized_agent["valid_mask"][:, 1:-1], # [n_agent, 16] - # for step {5, 10, ..., 90} and act [(0->5), (5->10), ..., (85->90)] - "pred_pos": tokenized_agent["sampled_pos"], # [n_agent, 18, 2] - "pred_head": tokenized_agent["sampled_heading"], # [n_agent, 18] - "pred_valid": tokenized_agent["valid_mask"], # [n_agent, 18] - # for step {5, 10, ..., 90} - "gt_pos_raw": tokenized_agent["gt_pos_raw"], # [n_agent, 18, 2] - "gt_head_raw": tokenized_agent["gt_head_raw"], # [n_agent, 18] - "gt_valid_raw": tokenized_agent["gt_valid_raw"], # [n_agent, 18] - # or use the tokenized gt - "gt_pos": tokenized_agent["gt_pos"], # [n_agent, 18, 2] - "gt_head": tokenized_agent["gt_heading"], # [n_agent, 18] - "gt_valid": tokenized_agent["valid_mask"], # [n_agent, 18] - } - - def inference( - self, - tokenized_agent: Dict[str, torch.Tensor], - map_feature: Dict[str, torch.Tensor], - sampling_scheme: SMARTRolloutSampling, - ) -> Dict[str, torch.Tensor]: - n_agent = tokenized_agent["valid_mask"].shape[0] - n_step_future_10hz = self.num_future_steps # 80 - n_step_future_2hz = n_step_future_10hz // self.shift # 16 - step_current_10hz = self.num_historical_steps - 1 # 10 - step_current_2hz = step_current_10hz // self.shift # 2 - - pos_a = tokenized_agent["gt_pos"][:, :step_current_2hz].clone() - head_a = tokenized_agent["gt_heading"][:, :step_current_2hz].clone() - head_vector_a = torch.stack([head_a.cos(), head_a.sin()], dim=-1) - pred_idx = tokenized_agent["gt_idx"].clone() - ( - feat_a, # [n_agent, step_current_2hz, hidden_dim] - agent_token_emb, # [n_agent, step_current_2hz, hidden_dim] - agent_token_emb_veh, # [n_agent, hidden_dim] - agent_token_emb_ped, # [n_agent, hidden_dim] - agent_token_emb_cyc, # [n_agent, hidden_dim] - veh_mask, # [n_agent] - ped_mask, # [n_agent] - cyc_mask, # [n_agent] - categorical_embs, # List of len=2, shape [n_agent, hidden_dim] - ) = self.agent_token_embedding( - agent_token_index=tokenized_agent["gt_idx"][:, :step_current_2hz], - trajectory_token_veh=tokenized_agent["trajectory_token_veh"], - trajectory_token_ped=tokenized_agent["trajectory_token_ped"], - trajectory_token_cyc=tokenized_agent["trajectory_token_cyc"], - pos_a=pos_a, - head_vector_a=head_vector_a, - agent_type=tokenized_agent["type"], - agent_shape=tokenized_agent["shape"], - inference=True, - ) - - if not self.training: - pred_traj_10hz = torch.zeros([n_agent, n_step_future_10hz, 2], dtype=pos_a.dtype, device=pos_a.device) - pred_head_10hz = torch.zeros([n_agent, n_step_future_10hz], dtype=pos_a.dtype, device=pos_a.device) - - pred_valid = tokenized_agent["valid_mask"].clone() - next_token_logits_list = [] - next_token_action_list = [] - feat_a_t_dict = {} - for t in range(n_step_future_2hz): # 0 -> 15 - t_now = step_current_2hz - 1 + t # 1 -> 16 - n_step = t_now + 1 # 2 -> 17 - - if t == 0: # init - hist_step = step_current_2hz - batch_s = torch.cat( - [tokenized_agent["batch"] + tokenized_agent["num_graphs"] * t for t in range(hist_step)], - dim=0, - ) - batch_pl = torch.cat( - [map_feature["batch"] + tokenized_agent["num_graphs"] * t for t in range(hist_step)], - dim=0, - ) - inference_mask = pred_valid[:, :n_step] - edge_index_t, r_t = self.build_temporal_edge( - pos_a=pos_a, - head_a=head_a, - head_vector_a=head_vector_a, - mask=pred_valid[:, :n_step], - ) - else: - hist_step = 1 - batch_s = tokenized_agent["batch"] - batch_pl = map_feature["batch"] - inference_mask = pred_valid[:, :n_step].clone() - inference_mask[:, :-1] = False - edge_index_t, r_t = self.build_temporal_edge( - pos_a=pos_a, - head_a=head_a, - head_vector_a=head_vector_a, - mask=pred_valid[:, :n_step], - inference_mask=inference_mask, - ) - edge_index_t[1] = (edge_index_t[1] + 1) // n_step - 1 - - # In the inference stage, we only infer the current stage for recurrent - edge_index_pl2a, r_pl2a = self.build_map2agent_edge( - pos_pl=map_feature["position"], # [n_pl, 2] - orient_pl=map_feature["orientation"], # [n_pl] - pos_a=pos_a[:, -hist_step:], # [n_agent, hist_step, 2] - head_a=head_a[:, -hist_step:], # [n_agent, hist_step] - head_vector_a=head_vector_a[:, -hist_step:], # [n_agent, hist_step, 2] - mask=inference_mask[:, -hist_step:], # [n_agent, hist_step] - batch_s=batch_s, # [n_agent*hist_step] - batch_pl=batch_pl, # [n_pl*hist_step] - ) - edge_index_a2a, r_a2a = self.build_interaction_edge( - pos_a=pos_a[:, -hist_step:], # [n_agent, hist_step, 2] - head_a=head_a[:, -hist_step:], # [n_agent, hist_step] - head_vector_a=head_vector_a[:, -hist_step:], # [n_agent, hist_step, 2] - batch_s=batch_s, # [n_agent*hist_step] - mask=inference_mask[:, -hist_step:], # [n_agent, hist_step] - ) - - # ! attention layers - for i in range(self.num_layers): - # [n_agent, n_step, hidden_dim] - _feat_temporal = feat_a if i == 0 else feat_a_t_dict[i] - - if t == 0: # init, process hist_step together - _feat_temporal = self.t_attn_layers[i](_feat_temporal.flatten(0, 1), r_t, edge_index_t).view( - n_agent, n_step, -1 - ) - _feat_temporal = _feat_temporal.transpose(0, 1).flatten(0, 1) - - # [hist_step*n_pl, hidden_dim] - _feat_map = map_feature["pt_token"].unsqueeze(0).expand(hist_step, -1, -1).flatten(0, 1) - - _feat_temporal = self.pt2a_attn_layers[i]((_feat_map, _feat_temporal), r_pl2a, edge_index_pl2a) - _feat_temporal = self.a2a_attn_layers[i](_feat_temporal, r_a2a, edge_index_a2a) - _feat_temporal = _feat_temporal.view(n_step, n_agent, -1).transpose(0, 1) - feat_a_now = _feat_temporal[:, -1] # [n_agent, hidden_dim] - - if i + 1 < self.num_layers: - feat_a_t_dict[i + 1] = _feat_temporal - - else: # process one step - feat_a_now = self.t_attn_layers[i]( - (_feat_temporal.flatten(0, 1), _feat_temporal[:, -1]), - r_t, - edge_index_t, - ) - # * give same results as below, but more efficient - # feat_a_now = self.t_attn_layers[i]( - # _feat_temporal.flatten(0, 1), r_t, edge_index_t - # ).view(n_agent, n_step, -1)[:, -1] - - feat_a_now = self.pt2a_attn_layers[i]( - (map_feature["pt_token"], feat_a_now), r_pl2a, edge_index_pl2a - ) - feat_a_now = self.a2a_attn_layers[i](feat_a_now, r_a2a, edge_index_a2a) - - # [n_agent, n_step, hidden_dim] - if i + 1 < self.num_layers: - feat_a_t_dict[i + 1] = torch.cat((feat_a_t_dict[i + 1], feat_a_now.unsqueeze(1)), dim=1) - - # ! get outputs - next_token_logits = self.token_predict_head(feat_a_now) - next_token_logits_list.append(next_token_logits) # [n_agent, n_token] - - next_token_idx, next_token_traj_all = sample_next_token_traj( - token_traj=tokenized_agent["token_traj"], - token_traj_all=tokenized_agent["token_traj_all"], - sampling_scheme=sampling_scheme, - # ! for most-likely sampling - next_token_logits=next_token_logits, - # ! for nearest-pos sampling - pos_now=pos_a[:, t_now], # [n_agent, 2] - head_now=head_a[:, t_now], # [n_agent] - pos_next_gt=tokenized_agent["gt_pos_raw"][:, n_step], # [n_agent, 2] - head_next_gt=tokenized_agent["gt_head_raw"][:, n_step], # [n_agent] - valid_next_gt=tokenized_agent["gt_valid_raw"][:, n_step], # [n_agent] - token_agent_shape=tokenized_agent["token_agent_shape"], # [n_token, 2] - ) # next_token_idx: [n_agent], next_token_traj_all: [n_agent, 6, 4, 2] - - diff_xy = next_token_traj_all[:, -1, 0] - next_token_traj_all[:, -1, 3] - next_token_action_list.append( - torch.cat( - [ - next_token_traj_all[:, -1].mean(1), # [n_agent, 2] - torch.arctan2(diff_xy[:, [1]], diff_xy[:, [0]]), # [n_agent, 1] - ], - dim=-1, - ) # [n_agent, 3] - ) - - token_traj_global = transform_to_global( - pos_local=next_token_traj_all.flatten(1, 2), # [n_agent, 6*4, 2] - head_local=None, - pos_now=pos_a[:, t_now], # [n_agent, 2] - head_now=head_a[:, t_now], # [n_agent] - )[0].view(*next_token_traj_all.shape) - - if not self.training: - pred_traj_10hz[:, t * 5 : (t + 1) * 5] = token_traj_global[:, 1:].mean(2) - diff_xy = token_traj_global[:, 1:, 0] - token_traj_global[:, 1:, 3] - pred_head_10hz[:, t * 5 : (t + 1) * 5] = torch.arctan2(diff_xy[:, :, 1], diff_xy[:, :, 0]) - - # ! get pos_a_next and head_a_next, spawn unseen agents - pos_a_next = token_traj_global[:, -1].mean(dim=1) - diff_xy_next = token_traj_global[:, -1, 0] - token_traj_global[:, -1, 3] - head_a_next = torch.arctan2(diff_xy_next[:, 1], diff_xy_next[:, 0]) - pred_idx[:, n_step] = next_token_idx - - # ! update tensors for for next step - pred_valid[:, n_step] = pred_valid[:, t_now] - # pred_valid[:, n_step] = pred_valid[:, t_now] | mask_spawn - pos_a = torch.cat([pos_a, pos_a_next.unsqueeze(1)], dim=1) - head_a = torch.cat([head_a, head_a_next.unsqueeze(1)], dim=1) - head_vector_a_next = torch.stack([head_a_next.cos(), head_a_next.sin()], dim=-1) - head_vector_a = torch.cat([head_vector_a, head_vector_a_next.unsqueeze(1)], dim=1) - - # ! get agent_token_emb_next - agent_token_emb_next = torch.zeros_like(agent_token_emb[:, 0]) - agent_token_emb_next[veh_mask] = agent_token_emb_veh[next_token_idx[veh_mask]] - agent_token_emb_next[ped_mask] = agent_token_emb_ped[next_token_idx[ped_mask]] - agent_token_emb_next[cyc_mask] = agent_token_emb_cyc[next_token_idx[cyc_mask]] - agent_token_emb = torch.cat([agent_token_emb, agent_token_emb_next.unsqueeze(1)], dim=1) - - # ! get feat_a_next - motion_vector_a = pos_a[:, -1] - pos_a[:, -2] # [n_agent, 2] - x_a = torch.stack( - [ - torch.norm(motion_vector_a, p=2, dim=-1), - angle_between_2d_vectors(ctr_vector=head_vector_a[:, -1], nbr_vector=motion_vector_a), - ], - dim=-1, - ) - # [n_agent, hidden_dim] - x_a = self.x_a_emb(continuous_inputs=x_a, categorical_embs=categorical_embs) - # [n_agent, 1, 2*hidden_dim] - feat_a_next = torch.cat((agent_token_emb_next, x_a), dim=-1).unsqueeze(1) - feat_a_next = self.fusion_emb(feat_a_next) - feat_a = torch.cat([feat_a, feat_a_next], dim=1) - - out_dict = { - # action that goes from [(10->15), ..., (85->90)] - "next_token_logits": torch.stack(next_token_logits_list, dim=1), - "next_token_valid": pred_valid[:, 1:-1], # [n_agent, 16] - # for step {5, 10, ..., 90} and act [(0->5), (5->10), ..., (85->90)] - "pred_pos": pos_a, # [n_agent, 18, 2] - "pred_head": head_a, # [n_agent, 18] - "pred_valid": pred_valid, # [n_agent, 18] - "pred_idx": pred_idx, # [n_agent, 18] - # for step {5, 10, ..., 90} - "gt_pos_raw": tokenized_agent["gt_pos_raw"], # [n_agent, 18, 2] - "gt_head_raw": tokenized_agent["gt_head_raw"], # [n_agent, 18] - "gt_valid_raw": tokenized_agent["gt_valid_raw"], # [n_agent, 18] - # or use the tokenized gt - "gt_pos": tokenized_agent["gt_pos"], # [n_agent, 18, 2] - "gt_head": tokenized_agent["gt_heading"], # [n_agent, 18] - "gt_valid": tokenized_agent["valid_mask"], # [n_agent, 18] - # for shifting proxy targets by lr - "next_token_action": torch.stack(next_token_action_list, dim=1), - } - - if not self.training: # 10hz predictions for wosac evaluation and submission - out_dict["pred_traj_10hz"] = pred_traj_10hz - out_dict["pred_head_10hz"] = pred_head_10hz - pred_z = tokenized_agent["gt_z_raw"].unsqueeze(1) # [n_agent, 1] - out_dict["pred_z_10hz"] = pred_z.expand(-1, pred_traj_10hz.shape[1]) - - return out_dict diff --git a/d123/training/models/sim_agent/smart/smart.py b/d123/training/models/sim_agent/smart/smart.py deleted file mode 100644 index 159630cd..00000000 --- a/d123/training/models/sim_agent/smart/smart.py +++ /dev/null @@ -1,176 +0,0 @@ -import math - -import torch -from lightning import LightningModule -from torch.optim.lr_scheduler import LambdaLR - -from d123.training.models.sim_agent.smart.metrics.cross_entropy import CrossEntropy -from d123.training.models.sim_agent.smart.metrics.min_ade import minADE -from d123.training.models.sim_agent.smart.metrics.next_token_cls import TokenCls -from d123.training.models.sim_agent.smart.modules.smart_decoder import SMARTDecoder -from d123.training.models.sim_agent.smart.smart_config import SMARTConfig -from d123.training.models.sim_agent.smart.tokens.token_processor import TokenProcessor - -# from d123.training.models.sim_agent.smart.utils.finetune import set_model_for_finetuning - -# from src.utils.vis_waymo import VisWaymo -# from src.utils.wosac_utils import get_scenario_id_int_tensor, get_scenario_rollouts - - -class SMART(LightningModule): - def __init__(self, model_config: SMARTConfig) -> None: - super(SMART, self).__init__() - self.save_hyperparameters() - - self.config = model_config - self.lr = model_config.lr - self.lr_warmup_steps = model_config.lr_warmup_steps - self.lr_total_steps = model_config.lr_total_steps - self.lr_min_ratio = model_config.lr_min_ratio - self.num_historical_steps = model_config.num_historical_steps - self.log_epoch = -1 - self.val_open_loop = model_config.val_open_loop - self.val_closed_loop = model_config.val_closed_loop - self.token_processor = TokenProcessor( - map_token_file=model_config.map_token_file, - agent_token_file=model_config.agent_token_file, - map_token_sampling=model_config.map_token_sampling, - agent_token_sampling=model_config.agent_token_sampling, - ) - - self.encoder = SMARTDecoder(model_config=model_config, n_token_agent=self.token_processor.n_token_agent) - # set_model_for_finetuning(self.encoder, model_config.finetune) - - self.minADE = minADE() - self.TokenCls = TokenCls(max_guesses=5) - # self.wosac_metrics = WOSACMetrics("val_closed") - # self.wosac_submission = WOSACSubmission(**model_config.wosac_submission) - self.training_loss = CrossEntropy( - use_gt_raw=model_config.use_gt_raw, - gt_thresh_scale_length=model_config.gt_thresh_scale_length, - label_smoothing=model_config.label_smoothing, - rollout_as_gt=model_config.rollout_as_gt, - ) - - self.n_rollout_closed_val = model_config.n_rollout_closed_val - self.n_vis_batch = model_config.n_vis_batch - self.n_vis_scenario = model_config.n_vis_scenario - self.n_vis_rollout = model_config.n_vis_rollout - # self.n_batch_wosac_metric = model_config.n_batch_wosac_metric - - self.training_rollout_sampling = model_config.training_rollout_sampling - self.validation_rollout_sampling = model_config.validation_rollout_sampling - - def training_step(self, data, batch_idx): - tokenized_map, tokenized_agent = self.token_processor(data) - if self.training_rollout_sampling.num_k <= 0: - pred = self.encoder(tokenized_map, tokenized_agent) - else: - pred = self.encoder.inference( - tokenized_map, - tokenized_agent, - sampling_scheme=self.training_rollout_sampling, - ) - - loss = self.training_loss( - **pred, - token_agent_shape=tokenized_agent["token_agent_shape"], # [n_agent, 2] - token_traj=tokenized_agent["token_traj"], # [n_agent, n_token, 4, 2] - train_mask=data["agent"]["train_mask"], # [n_agent] - current_epoch=self.current_epoch, - ) - self.log("train/loss", loss, on_step=True, batch_size=1) - - return loss - - def validation_step(self, data, batch_idx): - tokenized_map, tokenized_agent = self.token_processor(data) - - # ! open-loop vlidation - if self.val_open_loop: - pred = self.encoder(tokenized_map, tokenized_agent) - loss = self.training_loss( - **pred, - token_agent_shape=tokenized_agent["token_agent_shape"], # [n_agent, 2] - token_traj=tokenized_agent["token_traj"], # [n_agent, n_token, 4, 2] - ) - - self.TokenCls.update( - # action that goes from [(10->15), ..., (85->90)] - pred=pred["next_token_logits"], # [n_agent, 16, n_token] - pred_valid=pred["next_token_valid"], # [n_agent, 16] - target=tokenized_agent["gt_idx"][:, 2:], - target_valid=tokenized_agent["valid_mask"][:, 2:], - ) - self.log( - "val_open/acc", - self.TokenCls, - on_epoch=True, - sync_dist=True, - batch_size=1, - ) - self.log("val_open/loss", loss, on_epoch=True, sync_dist=True, batch_size=1) - - # ! closed-loop vlidation - if self.val_closed_loop: - pred_traj, pred_z, pred_head = [], [], [] - for _ in range(self.n_rollout_closed_val): - pred = self.encoder.inference(tokenized_map, tokenized_agent, self.validation_rollout_sampling) - pred_traj.append(pred["pred_traj_10hz"]) - pred_z.append(pred["pred_z_10hz"]) - pred_head.append(pred["pred_head_10hz"]) - - pred_traj = torch.stack(pred_traj, dim=1) # [n_ag, n_rollout, n_step, 2] - pred_z = torch.stack(pred_z, dim=1) # [n_ag, n_rollout, n_step] - pred_head = torch.stack(pred_head, dim=1) # [n_ag, n_rollout, n_step] - self.minADE.update( - pred=pred_traj, - target=data["agent"]["position"][:, self.num_historical_steps :, : pred_traj.shape[-1]], - target_valid=data["agent"]["valid_mask"][:, self.num_historical_steps :], - ) - return pred_traj - - def on_validation_epoch_end(self): - pass - - def configure_optimizers(self): - - # TODO: add to hydra config - optimizer = torch.optim.AdamW(self.parameters(), lr=self.lr) - - def lr_lambda(current_step): - current_step = self.current_epoch + 1 - if current_step < self.lr_warmup_steps: - return self.lr_min_ratio + (1 - self.lr_min_ratio) * current_step / self.lr_warmup_steps - return self.lr_min_ratio + 0.5 * (1 - self.lr_min_ratio) * ( - 1.0 - + math.cos( - math.pi - * min( - 1.0, - (current_step - self.lr_warmup_steps) / (self.lr_total_steps - self.lr_warmup_steps), - ) - ) - ) - - lr_scheduler = LambdaLR(optimizer, lr_lambda=lr_lambda) - return [optimizer], [lr_scheduler] - - def test_step(self, data, batch_idx): - tokenized_map, tokenized_agent = self.token_processor(data) - - # ! only closed-loop validation - pred_traj, pred_z, pred_head = [], [], [] - for _ in range(self.n_rollout_closed_val): - pred = self.encoder.inference(tokenized_map, tokenized_agent, self.validation_rollout_sampling) - pred_traj.append(pred["pred_traj_10hz"]) - pred_z.append(pred["pred_z_10hz"]) - pred_head.append(pred["pred_head_10hz"]) - - pred_traj = torch.stack(pred_traj, dim=1) # [n_ag, n_rollout, n_step, 2] - pred_z = torch.stack(pred_z, dim=1) # [n_ag, n_rollout, n_step] - pred_head = torch.stack(pred_head, dim=1) # [n_ag, n_rollout, n_step] - return pred_traj, pred_z, pred_head - - def on_test_epoch_end(self): - pass diff --git a/d123/training/models/sim_agent/smart/smart_config.py b/d123/training/models/sim_agent/smart/smart_config.py deleted file mode 100644 index 47d122bc..00000000 --- a/d123/training/models/sim_agent/smart/smart_config.py +++ /dev/null @@ -1,68 +0,0 @@ -from dataclasses import dataclass, field -from typing import Optional - - -@dataclass -class SMARTRolloutSampling: - num_k: int = 1 - temp: float = 1.0 - criteria: Optional[str] = "topk_prob" # {topk_dist_sampled_with_prob, topk_prob, topk_prob_sampled_with_dist} - - -@dataclass -class SMARTConfig: - - lr: float = 0.0005 - lr_warmup_steps: int = 0 - lr_total_steps: int = 100000 - lr_min_ratio: float = 0.05 - - val_open_loop: bool = True - val_closed_loop: bool = True - - # Tokenizer - map_token_file: str = "map_traj_token5.pkl" - agent_token_file: str = "agent_vocab_555_s2.pkl" - - map_token_sampling: SMARTRolloutSampling = field( - default_factory=lambda: SMARTRolloutSampling(num_k=1, temp=1.0, criteria=None) - ) - agent_token_sampling: SMARTRolloutSampling = field( - default_factory=lambda: SMARTRolloutSampling(num_k=1, temp=1.0, criteria=None) - ) - - # Rollout Sampling - validation_rollout_sampling: SMARTRolloutSampling = field( - default_factory=lambda: SMARTRolloutSampling(num_k=5, temp=1.0, criteria="topk_prob") - ) - training_rollout_sampling: SMARTRolloutSampling = field( - default_factory=lambda: SMARTRolloutSampling(num_k=-1, temp=1.0, criteria="topk_prob") - ) - - # Decoder - hidden_dim: int = 128 - num_freq_bands: int = 64 - num_heads: int = 8 - head_dim: int = 16 - dropout: float = 0.1 - hist_drop_prob: float = 0.1 - num_map_layers: int = 3 - num_agent_layers: int = 6 - pl2pl_radius: float = 10 - pl2a_radius: float = 30 - a2a_radius: float = 60 - time_span: Optional[int] = 30 - num_historical_steps: int = 11 - num_future_steps: int = 90 - - # train loss - use_gt_raw: bool = True - gt_thresh_scale_length: float = -1.0 # {"veh": 4.8, "cyc": 2.0, "ped": 1.0} - label_smoothing: float = 0.1 - rollout_as_gt: bool = False - - # else: - n_rollout_closed_val: int = 10 - n_vis_batch: int = 2 - n_vis_scenario: int = 2 - n_vis_rollout: int = 5 diff --git a/d123/training/models/sim_agent/smart/tokens/__init__.py b/d123/training/models/sim_agent/smart/tokens/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/tokens/token_processor.py b/d123/training/models/sim_agent/smart/tokens/token_processor.py deleted file mode 100644 index 93fa3a4d..00000000 --- a/d123/training/models/sim_agent/smart/tokens/token_processor.py +++ /dev/null @@ -1,339 +0,0 @@ -import os -import pickle -from typing import Dict, Tuple - -import torch -from torch import Tensor -from torch.distributions import Categorical -from torch_geometric.data import HeteroData - -from d123.training.models.sim_agent.smart.smart_config import SMARTRolloutSampling -from d123.training.models.sim_agent.smart.utils.geometry import wrap_angle -from d123.training.models.sim_agent.smart.utils.rollout import ( - cal_polygon_contour, - transform_to_global, - transform_to_local, -) - - -class TokenProcessor(torch.nn.Module): - def __init__( - self, - map_token_file: str, - agent_token_file: str, - map_token_sampling: SMARTRolloutSampling, - agent_token_sampling: SMARTRolloutSampling, - ) -> None: - super(TokenProcessor, self).__init__() - self.map_token_sampling = map_token_sampling - self.agent_token_sampling = agent_token_sampling - self.shift = 5 - - module_dir = os.path.dirname(__file__) - self.init_agent_token(os.path.join(module_dir, agent_token_file)) - self.init_map_token(os.path.join(module_dir, map_token_file)) - self.n_token_agent = self.agent_token_all_veh.shape[0] - - @torch.no_grad() - def forward(self, data: HeteroData) -> Tuple[Dict[str, Tensor], Dict[str, Tensor]]: - tokenized_map = self.tokenize_map(data) - tokenized_agent = self.tokenize_agent(data) - return tokenized_map, tokenized_agent - - def init_map_token(self, map_token_traj_path, argmin_sample_len=3) -> None: - map_token_traj = pickle.load(open(map_token_traj_path, "rb"))["traj_src"] - indices = torch.linspace(0, map_token_traj.shape[1] - 1, steps=argmin_sample_len).long() - - self.register_buffer( - "map_token_traj_src", - torch.tensor(map_token_traj, dtype=torch.float32).flatten(1, 2), - persistent=False, - ) # [n_token, 11*2] - - self.register_buffer( - "map_token_sample_pt", - torch.tensor(map_token_traj[:, indices], dtype=torch.float32).unsqueeze(0), - persistent=False, - ) # [1, n_token, 3, 2] - - def init_agent_token(self, agent_token_path) -> None: - agent_token_data = pickle.load(open(agent_token_path, "rb")) - for k, v in agent_token_data["token_all"].items(): - v = torch.tensor(v, dtype=torch.float32) - # [n_token, 6, 4, 2], countour, 10 hz - self.register_buffer(f"agent_token_all_{k}", v, persistent=False) - - def tokenize_map(self, data: HeteroData) -> Dict[str, Tensor]: - traj_pos = data["map_save"]["traj_pos"] # [n_pl, 3, 2] - traj_theta = data["map_save"]["traj_theta"] # [n_pl] - - traj_pos_local, _ = transform_to_local( - pos_global=traj_pos, # [n_pl, 3, 2] - head_global=None, # [n_pl, 1] - pos_now=traj_pos[:, 0], # [n_pl, 2] - head_now=traj_theta, # [n_pl] - ) - # [1, n_token, 3, 2] - [n_pl, 1, 3, 2] - dist = torch.sum( - (self.map_token_sample_pt - traj_pos_local.unsqueeze(1)) ** 2, - dim=(-2, -1), - ) # [n_pl, n_token] - - if self.training and (self.map_token_sampling.num_k > 1): - topk_dists, topk_indices = torch.topk( - dist, - self.map_token_sampling.num_k, - dim=-1, - largest=False, - sorted=False, - ) # [n_pl, K] - - topk_logits = (-1e-6 - topk_dists) / self.map_token_sampling.temp - _samples = Categorical(logits=topk_logits).sample() # [n_pl] in K - token_idx = topk_indices[torch.arange(len(_samples)), _samples].contiguous() - else: - token_idx = torch.argmin(dist, dim=-1) - - tokenized_map = { - "position": traj_pos[:, 0].contiguous(), # [n_pl, 2] - "orientation": traj_theta, # [n_pl] - "token_idx": token_idx, # [n_pl] - "token_traj_src": self.map_token_traj_src, # [n_token, 11*2] - "type": data["pt_token"]["type"].long(), # [n_pl] - "pl_type": data["pt_token"]["pl_type"].long(), # [n_pl] - "light_type": data["pt_token"]["light_type"].long(), # [n_pl] - "batch": data["pt_token"]["batch"], # [n_pl] - } - return tokenized_map - - def tokenize_agent(self, data: HeteroData) -> Dict[str, Tensor]: - """ - Args: data["agent"]: Dict - "valid_mask": [n_agent, n_step], bool - "role": [n_agent, 3], bool - "id": [n_agent], int64 - "type": [n_agent], uint8 - "position": [n_agent, n_step, 3], float32 - "heading": [n_agent, n_step], float32 - "velocity": [n_agent, n_step, 2], float32 - "shape": [n_agent, 3], float32 - """ - # ! collate width/length, traj tokens for current batch - agent_shape, token_traj_all, token_traj = self._get_agent_shape_and_token_traj(data["agent"]["type"]) - - # ! get raw trajectory data - valid = data["agent"]["valid_mask"] # [n_agent, n_step] - heading = data["agent"]["heading"] # [n_agent, n_step] - pos = data["agent"]["position"][..., :2].contiguous() # [n_agent, n_step, 2] - vel = data["agent"]["velocity"] # [n_agent, n_step, 2] - - # ! agent, specifically vehicle's heading can be 180 degree off. We fix it here. - heading = self._clean_heading(valid, heading) - # ! extrapolate to previous 5th step. - valid, pos, heading, vel = self._extrapolate_agent_to_prev_token_step(valid, pos, heading, vel) - - # ! prepare output dict - tokenized_agent = { - "num_graphs": data.num_graphs, - "type": data["agent"]["type"], - "shape": data["agent"]["shape"], - "ego_mask": data["agent"]["role"][:, 0], # [n_agent] - "token_agent_shape": agent_shape, # [n_agent, 2] - "batch": data["agent"]["batch"], - "token_traj_all": token_traj_all, # [n_agent, n_token, 6, 4, 2] - "token_traj": token_traj, # [n_agent, n_token, 4, 2] - # for step {5, 10, ..., 90} - "gt_pos_raw": pos[:, self.shift :: self.shift], # [n_agent, n_step=18, 2] - "gt_head_raw": heading[:, self.shift :: self.shift], # [n_agent, n_step=18] - "gt_valid_raw": valid[:, self.shift :: self.shift], # [n_agent, n_step=18] - } - # [n_token, 8] - for k in ["veh", "ped", "cyc"]: - tokenized_agent[f"trajectory_token_{k}"] = getattr(self, f"agent_token_all_{k}")[:, -1].flatten(1, 2) - - # ! match token for each agent - if not self.training: - # [n_agent] - tokenized_agent["gt_z_raw"] = data["agent"]["position"][:, 10, 2] - - token_dict = self._match_agent_token( - valid=valid, - pos=pos, - heading=heading, - agent_shape=agent_shape, - token_traj=token_traj, - ) - tokenized_agent.update(token_dict) - return tokenized_agent - - def _match_agent_token( - self, - valid: Tensor, # [n_agent, n_step] - pos: Tensor, # [n_agent, n_step, 2] - heading: Tensor, # [n_agent, n_step] - agent_shape: Tensor, # [n_agent, 2] - token_traj: Tensor, # [n_agent, n_token, 4, 2] - ) -> Dict[str, Tensor]: - """n_step_token=n_step//5 - n_step_token=18 for train with BC. - n_step_token=2 for val/test and train with closed-loop rollout. - Returns: Dict - # ! action that goes from [(0->5), (5->10), ..., (85->90)] - "valid_mask": [n_agent, n_step_token] - "gt_idx": [n_agent, n_step_token] - # ! at step [5, 10, 15, ..., 90] - "gt_pos": [n_agent, n_step_token, 2] - "gt_heading": [n_agent, n_step_token] - # ! noisy sampling for training data augmentation - "sampled_idx": [n_agent, n_step_token] - "sampled_pos": [n_agent, n_step_token, 2] - "sampled_heading": [n_agent, n_step_token] - """ - num_k = self.agent_token_sampling.num_k if self.training else 1 - n_agent, n_step = valid.shape - range_a = torch.arange(n_agent) - - prev_pos, prev_head = pos[:, 0], heading[:, 0] # [n_agent, 2], [n_agent] - prev_pos_sample, prev_head_sample = pos[:, 0], heading[:, 0] - - out_dict = { - "valid_mask": [], - "gt_idx": [], - "gt_pos": [], - "gt_heading": [], - "sampled_idx": [], - "sampled_pos": [], - "sampled_heading": [], - } - - for i in range(self.shift, n_step, self.shift): # [5, 10, 15, ..., 90] - _valid_mask = valid[:, i - self.shift] & valid[:, i] # [n_agent] - _invalid_mask = ~_valid_mask - out_dict["valid_mask"].append(_valid_mask) - - # ! gt_contour: [n_agent, 4, 2] in global coord - gt_contour = cal_polygon_contour(pos[:, i], heading[:, i], agent_shape) - gt_contour = gt_contour.unsqueeze(1) # [n_agent, 1, 4, 2] - - # ! tokenize without sampling - token_world_gt = transform_to_global( - pos_local=token_traj.flatten(1, 2), # [n_agent, n_token*4, 2] - head_local=None, - pos_now=prev_pos, # [n_agent, 2] - head_now=prev_head, # [n_agent] - )[0].view(*token_traj.shape) - token_idx_gt = torch.argmin(torch.norm(token_world_gt - gt_contour, dim=-1).sum(-1), dim=-1) # [n_agent] - # [n_agent, 4, 2] - token_contour_gt = token_world_gt[range_a, token_idx_gt] - - # udpate prev_pos, prev_head - prev_head = heading[:, i].clone() - dxy = token_contour_gt[:, 0] - token_contour_gt[:, 3] - prev_head[_valid_mask] = torch.arctan2(dxy[:, 1], dxy[:, 0])[_valid_mask] - prev_pos = pos[:, i].clone() - prev_pos[_valid_mask] = token_contour_gt.mean(1)[_valid_mask] - # add to output dict - out_dict["gt_idx"].append(token_idx_gt) - out_dict["gt_pos"].append(prev_pos.masked_fill(_invalid_mask.unsqueeze(1), 0)) - out_dict["gt_heading"].append(prev_head.masked_fill(_invalid_mask, 0)) - - # ! tokenize from sampled rollout state - if num_k == 1: # K=1 means no sampling - out_dict["sampled_idx"].append(out_dict["gt_idx"][-1]) - out_dict["sampled_pos"].append(out_dict["gt_pos"][-1]) - out_dict["sampled_heading"].append(out_dict["gt_heading"][-1]) - else: - # contour: [n_agent, n_token, 4, 2], 2HZ, global coord - token_world_sample = transform_to_global( - pos_local=token_traj.flatten(1, 2), # [n_agent, n_token*4, 2] - head_local=None, - pos_now=prev_pos_sample, # [n_agent, 2] - head_now=prev_head_sample, # [n_agent] - )[0].view(*token_traj.shape) - - # dist: [n_agent, n_token] - dist = torch.norm(token_world_sample - gt_contour, dim=-1).mean(-1) - topk_dists, topk_indices = torch.topk(dist, num_k, dim=-1, largest=False, sorted=False) # [n_agent, K] - - topk_logits = (-1.0 * topk_dists) / self.agent_token_sampling.temp - _samples = Categorical(logits=topk_logits).sample() # [n_agent] in K - token_idx_sample = topk_indices[range_a, _samples] - token_contour_sample = token_world_sample[range_a, token_idx_sample] - - # udpate prev_pos_sample, prev_head_sample - prev_head_sample = heading[:, i].clone() - dxy = token_contour_sample[:, 0] - token_contour_sample[:, 3] - prev_head_sample[_valid_mask] = torch.arctan2(dxy[:, 1], dxy[:, 0])[_valid_mask] - prev_pos_sample = pos[:, i].clone() - prev_pos_sample[_valid_mask] = token_contour_sample.mean(1)[_valid_mask] - # add to output dict - out_dict["sampled_idx"].append(token_idx_sample) - out_dict["sampled_pos"].append(prev_pos_sample.masked_fill(_invalid_mask.unsqueeze(1), 0.0)) - out_dict["sampled_heading"].append(prev_head_sample.masked_fill(_invalid_mask, 0.0)) - out_dict = {k: torch.stack(v, dim=1) for k, v in out_dict.items()} - return out_dict - - @staticmethod - def _clean_heading(valid: Tensor, heading: Tensor) -> Tensor: - valid_pairs = valid[:, :-1] & valid[:, 1:] - for i in range(heading.shape[1] - 1): - heading_diff = torch.abs(wrap_angle(heading[:, i] - heading[:, i + 1])) - change_needed = (heading_diff > 1.5) & valid_pairs[:, i] - heading[:, i + 1][change_needed] = heading[:, i][change_needed] - return heading - - def _extrapolate_agent_to_prev_token_step( - self, - valid: Tensor, # [n_agent, n_step] - pos: Tensor, # [n_agent, n_step, 2] - heading: Tensor, # [n_agent, n_step] - vel: Tensor, # [n_agent, n_step, 2] - ) -> Tuple[Tensor, Tensor, Tensor, Tensor]: - # [n_agent], max will give the first True step - first_valid_step = torch.max(valid, dim=1).indices - - for i, t in enumerate(first_valid_step): # extrapolate to previous 5th step. - n_step_to_extrapolate = t % self.shift - if (t == 10) and (not valid[i, 10 - self.shift]): - # such that at least one token is valid in the history. - n_step_to_extrapolate = self.shift - - if n_step_to_extrapolate > 0: - vel[i, t - n_step_to_extrapolate : t] = vel[i, t] - valid[i, t - n_step_to_extrapolate : t] = True - heading[i, t - n_step_to_extrapolate : t] = heading[i, t] - - for j in range(n_step_to_extrapolate): - pos[i, t - j - 1] = pos[i, t - j] - vel[i, t] * 0.1 - - return valid, pos, heading, vel - - def _get_agent_shape_and_token_traj(self, agent_type: Tensor) -> Tuple[Tensor, Tensor, Tensor, Tensor, Tensor]: - """ - agent_shape: [n_agent, 2] - token_traj_all: [n_agent, n_token, 6, 4, 2] - token_traj: [n_agent, n_token, 4, 2] - """ - agent_type_masks = { - "veh": agent_type == 0, - "ped": agent_type == 1, - "cyc": agent_type == 2, - } - agent_shape = 0.0 - token_traj_all = 0.0 - for k, mask in agent_type_masks.items(): - if k == "veh": - width = 2.0 - length = 4.8 - elif k == "cyc": - width = 1.0 - length = 2.0 - else: - width = 1.0 - length = 1.0 - agent_shape += torch.stack([width * mask, length * mask], dim=-1) - - token_traj_all += mask[:, None, None, None, None] * (getattr(self, f"agent_token_all_{k}").unsqueeze(0)) - - token_traj = token_traj_all[:, :, -1, :, :].contiguous() - return agent_shape, token_traj_all, token_traj diff --git a/d123/training/models/sim_agent/smart/tokens/traj_clustering.py b/d123/training/models/sim_agent/smart/tokens/traj_clustering.py deleted file mode 100644 index c93f4238..00000000 --- a/d123/training/models/sim_agent/smart/tokens/traj_clustering.py +++ /dev/null @@ -1,155 +0,0 @@ -import pickle -from pathlib import Path - -import lightning as L -import torch -from torch_geometric.data import HeteroData -from torch_geometric.loader import DataLoader -from tqdm import tqdm - -from d123.training.models.sim_agent.smart.datasets.scalable_dataset import MultiDataset -from d123.training.models.sim_agent.smart.tokens.token_processor import TokenProcessor -from d123.training.models.sim_agent.smart.utils.geometry import wrap_angle -from d123.training.models.sim_agent.smart.utils.rollout import cal_polygon_contour, transform_to_local - - -def Kdisk_cluster( - X, # [n_trajs, 4, 2], bbox of the last point of the segment - N, # int - tol, # float - a_pos, # [n_trajs, 6, 3], the complete segment - cal_mean_heading=True, -): - n_total = X.shape[0] - ret_traj_list = [] - - for i in range(N): - if i == 0: - choice_index = 0 # always include [0, 0, 0] - else: - choice_index = torch.randint(0, X.shape[0], (1,)).item() - x0 = X[choice_index] - # res_mask = torch.sum((X - x0) ** 2, dim=[1, 2]) / 4.0 > (tol**2) - res_mask = torch.norm(X - x0, dim=-1).mean(-1) > tol - if cal_mean_heading: - ret_traj = a_pos[~res_mask].mean(0, keepdim=True) - else: - ret_traj = a_pos[[choice_index]] - X = X[res_mask] - a_pos = a_pos[res_mask] - ret_traj_list.append(ret_traj) - - remain = X.shape[0] * 100.0 / n_total - n_inside = (~res_mask).sum().item() - print(f"{i=}, {remain=:.2f}%, {n_inside=}") - - return torch.cat(ret_traj_list, dim=0) # [N, 6, 3] - - -if __name__ == "__main__": - L.seed_everything(seed=2, workers=True) - n_trajs = 2048 * 100 # 2e5 - load_data_from_file = True - data_cache_path = Path("/root/.cache/SMART") - out_file_name = "agent_vocab_555_s2.pkl" - tol_dist = [0.05, 0.05, 0.05] # veh, ped, cyc - - # ! don't change these params - shift = 5 # motion token time dimension - num_cluster = 2048 # vocabulary size - n_step = 91 - data_file_path = data_cache_path / "kdisk_trajs.pkl" - if load_data_from_file: - with open(data_file_path, "rb") as f: - data = pickle.load(f) - else: - trajs = [ - torch.zeros([1, 6, 3], dtype=torch.float32), # veh - torch.zeros([1, 6, 3], dtype=torch.float32), # ped - torch.zeros([1, 6, 3], dtype=torch.float32), # cyc - ] - dataloader = DataLoader( - dataset=MultiDataset(raw_dir=data_cache_path / "training", transform=lambda x: HeteroData(x)), - batch_size=8, - shuffle=False, - num_workers=8, - drop_last=False, - ) - - with tqdm( - total=len(dataloader), - desc=f"n_trajs={n_trajs}", - postfix={"n_veh": 0, "n_ped": 0, "n_cyc": 0}, - ) as pbar: - - for data in dataloader: - valid_mask = data["agent"]["valid_mask"] - data["agent"]["heading"] = TokenProcessor._clean_heading(valid_mask, data["agent"]["heading"]) - - for i_ag in range(valid_mask.shape[0]): - if valid_mask[i_ag, :].sum() < 30: - continue - for t in range(0, n_step - shift, shift): - if valid_mask[i_ag, t] and valid_mask[i_ag, t + shift]: - _type = data["agent"]["type"][i_ag] - if trajs[_type].shape[0] < n_trajs: - pos = data["agent"]["position"][i_ag, t : t + shift + 1, :2] - head = data["agent"]["heading"][i_ag, t : t + shift + 1] - pos, head = transform_to_local( - pos_global=pos.unsqueeze(0), # [1, 6, 2] - head_global=head.unsqueeze(0), # [1, 6] - pos_now=pos[[0]], # [1, 2] - head_now=head[[0]], # [1] - ) - head = wrap_angle(head) - to_add = torch.cat([pos, head.unsqueeze(-1)], dim=-1) - - if not (((trajs[_type] - to_add).abs().sum([1, 2]) < 1e-2).any()): - trajs[_type] = torch.cat([trajs[_type], to_add], dim=0) - pbar.update(1) - pbar.set_postfix( - n_veh=trajs[0].shape[0], - n_ped=trajs[1].shape[0], - n_cyc=trajs[2].shape[0], - ) - if trajs[0].shape[0] == n_trajs and trajs[1].shape[0] == n_trajs and trajs[2].shape[0] == n_trajs: - break - - # [n_trajs, shift+1, [relative_x, relative_y, relative_theta]] - data = {"veh": trajs[0], "ped": trajs[1], "cyc": trajs[2]} - - with open(data_file_path, "wb") as f: - pickle.dump(data, f) - - res = {"token_all": {}} - - for k, v in data.items(): - if k == "veh": - width_length = torch.tensor([2.0, 4.8]) - elif k == "ped": - width_length = torch.tensor([1.0, 1.0]) - elif k == "cyc": - width_length = torch.tensor([1.0, 2.0]) - width_length = width_length.unsqueeze(0) # [1, 2] - - contour = cal_polygon_contour(pos=v[:, -1, :2], head=v[:, -1, 2], width_length=width_length) # [n_trajs, 4, 2] - - if k == "veh": - tol = tol_dist[0] - elif k == "ped": - tol = tol_dist[1] - elif k == "cyc": - tol = tol_dist[2] - print(k, tol) - ret_traj = Kdisk_cluster(X=contour, N=num_cluster, tol=tol, a_pos=v) - ret_traj[:, :, -1] = wrap_angle(ret_traj[:, :, -1]) - - contour = cal_polygon_contour( - pos=ret_traj[:, :, :2], # [N, 6, 2] - head=ret_traj[:, :, 2], # [N, 6] - width_length=width_length.unsqueeze(0), - ) - res["token_all"][k] = contour.numpy() - - with open(Path(__file__).resolve().parent / out_file_name, "wb") as f: - pickle.dump(res, f) diff --git a/d123/training/models/sim_agent/smart/utils/__init__.py b/d123/training/models/sim_agent/smart/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/d123/training/models/sim_agent/smart/utils/finetune.py b/d123/training/models/sim_agent/smart/utils/finetune.py deleted file mode 100644 index 9db8e6f4..00000000 --- a/d123/training/models/sim_agent/smart/utils/finetune.py +++ /dev/null @@ -1,33 +0,0 @@ -import logging - -import torch - -logger = logging.getLogger(__name__) - - -def set_model_for_finetuning(model: torch.nn.Module, finetune: bool) -> None: - def _unfreeze(module: torch.nn.Module) -> None: - for p in module.parameters(): - p.requires_grad = True - - if finetune: - for p in model.parameters(): - p.requires_grad = False - - try: - _unfreeze(model.agent_encoder.token_predict_head) - logger.info("Unfreezing token_predict_head") - except: # noqa: E722 - logger.info("No token_predict_head in model.agent_encoder") - - try: - _unfreeze(model.agent_encoder.gmm_logits_head) - _unfreeze(model.agent_encoder.gmm_pose_head) - # _unfreeze(model.agent_encoder.gmm_gmm_covpose_head) - logger.info("Unfreezing gmm heads") - except: # noqa: E722 - logger.info("No gmm_logits_head in model.agent_encoder") - - _unfreeze(model.agent_encoder.t_attn_layers) - _unfreeze(model.agent_encoder.pt2a_attn_layers) - _unfreeze(model.agent_encoder.a2a_attn_layers) diff --git a/d123/training/models/sim_agent/smart/utils/geometry.py b/d123/training/models/sim_agent/smart/utils/geometry.py deleted file mode 100644 index a351046c..00000000 --- a/d123/training/models/sim_agent/smart/utils/geometry.py +++ /dev/null @@ -1,14 +0,0 @@ -import math - -import torch - - -def angle_between_2d_vectors(ctr_vector: torch.Tensor, nbr_vector: torch.Tensor) -> torch.Tensor: - return torch.atan2( - ctr_vector[..., 0] * nbr_vector[..., 1] - ctr_vector[..., 1] * nbr_vector[..., 0], - (ctr_vector[..., :2] * nbr_vector[..., :2]).sum(dim=-1), - ) - - -def wrap_angle(angle: torch.Tensor, min_val: float = -math.pi, max_val: float = math.pi) -> torch.Tensor: - return min_val + (angle + max_val) % (max_val - min_val) diff --git a/d123/training/models/sim_agent/smart/utils/preprocess.py b/d123/training/models/sim_agent/smart/utils/preprocess.py deleted file mode 100644 index f15b68c9..00000000 --- a/d123/training/models/sim_agent/smart/utils/preprocess.py +++ /dev/null @@ -1,150 +0,0 @@ -from typing import Any, Dict - -import numpy as np -import torch -from scipy.interpolate import interp1d - - -def get_polylines_from_polygon(polygon: np.ndarray) -> np.ndarray: - # polygon: [4, 3] - l1 = np.linalg.norm(polygon[1, :2] - polygon[0, :2]) - l2 = np.linalg.norm(polygon[2, :2] - polygon[1, :2]) - - def _pl_interp_start_end(start: np.ndarray, end: np.ndarray) -> np.ndarray: - length = np.linalg.norm(start - end) - unit_vec = (end - start) / length - pl = [] - for i in range(int(length) + 1): # 4.5 -> 5 [0,1,2,3,4] - x, y, z = start + unit_vec * i - pl.append([x, y, z]) - pl.append([end[0], end[1], end[2]]) - return np.array(pl) - - if l1 > l2: - pl1 = _pl_interp_start_end(polygon[0], polygon[1]) - pl2 = _pl_interp_start_end(polygon[2], polygon[3]) - else: - pl1 = _pl_interp_start_end(polygon[0], polygon[3]) - pl2 = _pl_interp_start_end(polygon[2], polygon[1]) - return np.concatenate([pl1, pl1[::-1], pl2, pl2[::-1]], axis=0) - - -def _interplating_polyline(polylines, distance=0.5, split_distace=5): - # Calculate the cumulative distance along the path, up-sample the polyline to 0.5 meter - dist_along_path_list = [] - polylines_list = [] - euclidean_dists = np.linalg.norm(polylines[1:, :2] - polylines[:-1, :2], axis=-1) - euclidean_dists = np.concatenate([[0], euclidean_dists]) - breakpoints = np.where(euclidean_dists > 3)[0] - breakpoints = np.concatenate([[0], breakpoints, [polylines.shape[0]]]) - for i in range(1, breakpoints.shape[0]): - start = breakpoints[i - 1] - end = breakpoints[i] - dist_along_path_list.append(np.cumsum(euclidean_dists[start:end]) - euclidean_dists[start]) - polylines_list.append(polylines[start:end]) - - multi_polylines_list = [] - for idx in range(len(dist_along_path_list)): - if len(dist_along_path_list[idx]) < 2: - continue - dist_along_path = dist_along_path_list[idx] - polylines_cur = polylines_list[idx] - # Create interpolation functions for x and y coordinates - fxy = interp1d(dist_along_path, polylines_cur, axis=0) - - # Create an array of distances at which to interpolate - new_dist_along_path = np.arange(0, dist_along_path[-1], distance) - new_dist_along_path = np.concatenate([new_dist_along_path, dist_along_path[[-1]]]) - - # Combine the new x and y coordinates into a single array - new_polylines = fxy(new_dist_along_path) - polyline_size = int(split_distace / distance) - if new_polylines.shape[0] >= (polyline_size + 1): - padding_size = (new_polylines.shape[0] - (polyline_size + 1)) % polyline_size - final_index = (new_polylines.shape[0] - (polyline_size + 1)) // polyline_size + 1 - else: - padding_size = new_polylines.shape[0] - final_index = 0 - multi_polylines = None - new_polylines = torch.from_numpy(new_polylines) - new_heading = torch.atan2( - new_polylines[1:, 1] - new_polylines[:-1, 1], - new_polylines[1:, 0] - new_polylines[:-1, 0], - ) - new_heading = torch.cat([new_heading, new_heading[-1:]], -1)[..., None] - new_polylines = torch.cat([new_polylines, new_heading], -1) - if new_polylines.shape[0] >= (polyline_size + 1): - multi_polylines = new_polylines.unfold(dimension=0, size=polyline_size + 1, step=polyline_size) - multi_polylines = multi_polylines.transpose(1, 2) - multi_polylines = multi_polylines[:, ::5, :] - if padding_size >= 3: - last_polyline = new_polylines[final_index * polyline_size :] - last_polyline = last_polyline[torch.linspace(0, last_polyline.shape[0] - 1, steps=3).long()] - if multi_polylines is not None: - multi_polylines = torch.cat([multi_polylines, last_polyline.unsqueeze(0)], dim=0) - else: - multi_polylines = last_polyline.unsqueeze(0) - if multi_polylines is None: - continue - multi_polylines_list.append(multi_polylines) - if len(multi_polylines_list) > 0: - multi_polylines_list = torch.cat(multi_polylines_list, dim=0).to(torch.float32) - else: - multi_polylines_list = None - return multi_polylines_list - - -def preprocess_map(map_data: Dict[str, Any]) -> Dict[str, Any]: - pt2pl = map_data[("map_point", "to", "map_polygon")]["edge_index"] - split_polyline_type = [] - split_polyline_pos = [] - split_polyline_theta = [] - split_polygon_type = [] - split_light_type = [] - - for i in sorted(torch.unique(pt2pl[1])): - index = pt2pl[0, pt2pl[1] == i] - if len(index) <= 2: - continue - - polygon_type = map_data["map_polygon"]["type"][i] - light_type = map_data["map_polygon"]["light_type"][i] - cur_type = map_data["map_point"]["type"][index] - cur_pos = map_data["map_point"]["position"][index, :2] - - # assert len(np.unique(cur_type)) == 1 - - split_polyline = _interplating_polyline(cur_pos.numpy()) - if split_polyline is None: - continue - split_polyline_pos.append(split_polyline[..., :2]) - split_polyline_theta.append(split_polyline[..., 2]) - split_polyline_type.append(cur_type[0].repeat(split_polyline.shape[0])) - split_polygon_type.append(polygon_type.repeat(split_polyline.shape[0])) - split_light_type.append(light_type.repeat(split_polyline.shape[0])) - - data = {} - if len(split_polyline_pos) == 0: # add dummy empty map - data["map_save"] = { - # 6e4 such that it's within the range of float16. - "traj_pos": torch.zeros([1, 3, 2], dtype=torch.float32) + 6e4, - "traj_theta": torch.zeros([1], dtype=torch.float32), - } - data["pt_token"] = { - "type": torch.tensor([0], dtype=torch.uint8), - "pl_type": torch.tensor([0], dtype=torch.uint8), - "light_type": torch.tensor([0], dtype=torch.uint8), - "num_nodes": 1, - } - else: - data["map_save"] = { - "traj_pos": torch.cat(split_polyline_pos, dim=0), # [num_nodes, 3, 2] - "traj_theta": torch.cat(split_polyline_theta, dim=0)[:, 0], # [num_nodes] - } - data["pt_token"] = { - "type": torch.cat(split_polyline_type, dim=0), # [num_nodes], uint8 - "pl_type": torch.cat(split_polygon_type, dim=0), # [num_nodes], uint8 - "light_type": torch.cat(split_light_type, dim=0), # [num_nodes], uint8 - "num_nodes": data["map_save"]["traj_pos"].shape[0], - } - return data diff --git a/d123/training/models/sim_agent/smart/utils/rollout.py b/d123/training/models/sim_agent/smart/utils/rollout.py deleted file mode 100644 index 041b5acb..00000000 --- a/d123/training/models/sim_agent/smart/utils/rollout.py +++ /dev/null @@ -1,260 +0,0 @@ -from typing import Optional, Tuple - -import torch -from omegaconf import DictConfig -from torch import Tensor -from torch.distributions import Categorical, Independent, MixtureSameFamily, Normal - -from d123.training.models.sim_agent.smart.smart_config import SMARTRolloutSampling - - -@torch.no_grad() -def cal_polygon_contour( - pos: Tensor, # [n_agent, n_step, n_target, 2] - head: Tensor, # [n_agent, n_step, n_target] - width_length: Tensor, # [n_agent, 1, 1, 2] -) -> Tensor: # [n_agent, n_step, n_target, 4, 2] - x, y = pos[..., 0], pos[..., 1] # [n_agent, n_step, n_target] - width, length = width_length[..., 0], width_length[..., 1] # [n_agent, 1 ,1] - - half_cos = 0.5 * head.cos() # [n_agent, n_step, n_target] - half_sin = 0.5 * head.sin() # [n_agent, n_step, n_target] - length_cos = length * half_cos # [n_agent, n_step, n_target] - length_sin = length * half_sin # [n_agent, n_step, n_target] - width_cos = width * half_cos # [n_agent, n_step, n_target] - width_sin = width * half_sin # [n_agent, n_step, n_target] - - left_front_x = x + length_cos - width_sin - left_front_y = y + length_sin + width_cos - left_front = torch.stack((left_front_x, left_front_y), dim=-1) - - right_front_x = x + length_cos + width_sin - right_front_y = y + length_sin - width_cos - right_front = torch.stack((right_front_x, right_front_y), dim=-1) - - right_back_x = x - length_cos + width_sin - right_back_y = y - length_sin - width_cos - right_back = torch.stack((right_back_x, right_back_y), dim=-1) - - left_back_x = x - length_cos - width_sin - left_back_y = y - length_sin + width_cos - left_back = torch.stack((left_back_x, left_back_y), dim=-1) - - polygon_contour = torch.stack((left_front, right_front, right_back, left_back), dim=-2) - - return polygon_contour - - -def transform_to_global( - pos_local: Tensor, # [n_agent, n_step, 2] - head_local: Optional[Tensor], # [n_agent, n_step] - pos_now: Tensor, # [n_agent, 2] - head_now: Tensor, # [n_agent] -) -> Tuple[Tensor, Optional[Tensor]]: - cos, sin = head_now.cos(), head_now.sin() - rot_mat = torch.zeros((head_now.shape[0], 2, 2), device=head_now.device) - rot_mat[:, 0, 0] = cos - rot_mat[:, 0, 1] = sin - rot_mat[:, 1, 0] = -sin - rot_mat[:, 1, 1] = cos - - pos_global = torch.bmm(pos_local, rot_mat) # [n_agent, n_step, 2]*[n_agent, 2, 2] - pos_global = pos_global + pos_now.unsqueeze(1) - if head_local is None: - head_global = None - else: - head_global = head_local + head_now.unsqueeze(1) - return pos_global, head_global - - -def transform_to_local( - pos_global: Tensor, # [n_agent, n_step, 2] - head_global: Optional[Tensor], # [n_agent, n_step] - pos_now: Tensor, # [n_agent, 2] - head_now: Tensor, # [n_agent] -) -> Tuple[Tensor, Optional[Tensor]]: - cos, sin = head_now.cos(), head_now.sin() - rot_mat = torch.zeros((head_now.shape[0], 2, 2), device=head_now.device) - rot_mat[:, 0, 0] = cos - rot_mat[:, 0, 1] = -sin - rot_mat[:, 1, 0] = sin - rot_mat[:, 1, 1] = cos - - pos_local = pos_global - pos_now.unsqueeze(1) - pos_local = torch.bmm(pos_local, rot_mat) # [n_agent, n_step, 2]*[n_agent, 2, 2] - if head_global is None: - head_local = None - else: - head_local = head_global - head_now.unsqueeze(1) - return pos_local, head_local - - -def sample_next_token_traj( - token_traj: Tensor, # [n_agent, n_token, 4, 2] - token_traj_all: Tensor, # [n_agent, n_token, 6, 4, 2] - sampling_scheme: SMARTRolloutSampling, - # ! for most-likely sampling - next_token_logits: Tensor, # [n_agent, n_token], with grad - # ! for nearest-pos sampling, sampling near to GT - pos_now: Tensor, # [n_agent, 2] - head_now: Tensor, # [n_agent] - pos_next_gt: Tensor, # [n_agent, 2] - head_next_gt: Tensor, # [n_agent] - valid_next_gt: Tensor, # [n_agent] - token_agent_shape: Tensor, # [n_agent, 2] -) -> Tuple[Tensor, Tensor]: - """ - Returns: - next_token_traj_all: [n_agent, 6, 4, 2], local coord - next_token_idx: [n_agent], without grad - """ - range_a = torch.arange(next_token_logits.shape[0]) - next_token_logits = next_token_logits.detach() - - if sampling_scheme.criteria == "topk_prob" or sampling_scheme.criteria == "topk_prob_sampled_with_dist": - topk_logits, topk_indices = torch.topk(next_token_logits, sampling_scheme.num_k, dim=-1, sorted=False) - if sampling_scheme.criteria == "topk_prob_sampled_with_dist": - # ! gt_contour: [n_agent, 4, 2] in global coord - gt_contour = cal_polygon_contour(pos_next_gt, head_next_gt, token_agent_shape) - gt_contour = gt_contour.unsqueeze(1) # [n_agent, 1, 4, 2] - token_world_sample = token_traj[range_a.unsqueeze(1), topk_indices] - token_world_sample = transform_to_global( - pos_local=token_world_sample.flatten(1, 2), - head_local=None, - pos_now=pos_now, # [n_agent, 2] - head_now=head_now, # [n_agent] - )[0].view(*token_world_sample.shape) - - # dist: [n_agent, n_token] - dist = torch.norm(token_world_sample - gt_contour, dim=-1).mean(-1) - topk_logits = topk_logits.masked_fill(valid_next_gt.unsqueeze(1), 0.0) - 1.0 * dist.masked_fill( - ~valid_next_gt.unsqueeze(1), 0.0 - ) - elif sampling_scheme.criteria == "topk_dist_sampled_with_prob": - # ! gt_contour: [n_agent, 4, 2] in global coord - gt_contour = cal_polygon_contour(pos_next_gt, head_next_gt, token_agent_shape) - gt_contour = gt_contour.unsqueeze(1) # [n_agent, 1, 4, 2] - token_world_sample = transform_to_global( - pos_local=token_traj.flatten(1, 2), # [n_agent, n_token*4, 2] - head_local=None, - pos_now=pos_now, # [n_agent, 2] - head_now=head_now, # [n_agent] - )[0].view(*token_traj.shape) - - _invalid = ~valid_next_gt - # dist: [n_agent, n_token] - dist = torch.norm(token_world_sample - gt_contour, dim=-1).mean(-1) - _logits = -1.0 * dist.masked_fill(_invalid.unsqueeze(1), 0.0) - - if _invalid.any(): - _logits[_invalid] = next_token_logits[_invalid] - _, topk_indices = torch.topk(_logits, sampling_scheme.num_k, dim=-1, sorted=False) # [n_agent, K] - topk_logits = next_token_logits[range_a.unsqueeze(1), topk_indices] - - else: - raise ValueError(f"Invalid criteria: {sampling_scheme.criteria}") - - # topk_logits, topk_indices: [n_agent, K] - topk_logits = topk_logits / sampling_scheme.temp - samples = Categorical(logits=topk_logits).sample() # [n_agent] in K - next_token_idx = topk_indices[range_a, samples] - next_token_traj_all = token_traj_all[range_a, next_token_idx] - - return next_token_idx, next_token_traj_all - - -def sample_next_gmm_traj( - token_traj: Tensor, # [n_agent, n_token, 4, 2] - token_traj_all: Tensor, # [n_agent, n_token, 6, 4, 2] - sampling_scheme: DictConfig, - # ! for most-likely sampling - ego_mask: Tensor, # [n_agent], bool, ego_mask.sum()==n_batch - ego_next_logits: Tensor, # [n_batch, n_k_ego_gmm] - ego_next_poses: Tensor, # [n_batch, n_k_ego_gmm, 3] - ego_next_cov: Tensor, # [2], one for pos, one for heading. - # ! for nearest-pos sampling, sampling near to GT - pos_now: Tensor, # [n_agent, 2] - head_now: Tensor, # [n_agent] - pos_next_gt: Tensor, # [n_agent, 2] - head_next_gt: Tensor, # [n_agent] - valid_next_gt: Tensor, # [n_agent] - token_agent_shape: Tensor, # [n_agent, 2] - next_token_idx: Tensor, # [n_agent] -) -> Tuple[Tensor, Tensor]: - """ - Returns: - next_token_traj_all: [n_agent, 6, 4, 2], local coord - next_token_idx: [n_agent], without grad - """ - n_agent = token_traj.shape[0] - n_batch = ego_next_logits.shape[0] - next_token_traj_all = token_traj_all[torch.arange(n_agent), next_token_idx] - - # ! sample only the ego-vehicle - assert sampling_scheme.criteria == "topk_prob" or sampling_scheme.criteria == "topk_prob_sampled_with_dist" - topk_logits, topk_indices = torch.topk( - ego_next_logits, sampling_scheme.num_k, dim=-1, sorted=False - ) # [n_agent, k], [n_agent, k] - ego_pose_topk = ego_next_poses[torch.arange(n_batch).unsqueeze(1), topk_indices] # [n_batch, k, 3] - - if sampling_scheme.criteria == "topk_prob_sampled_with_dist": - # udpate topk_logits - gt_contour = cal_polygon_contour( - pos_next_gt[ego_mask], - head_next_gt[ego_mask], - token_agent_shape[ego_mask], - ) # [n_batch, 4, 2] in global coord - gt_contour = gt_contour.unsqueeze(1) # [n_batch, 1, 4, 2] - - ego_pos_global, ego_head_global = transform_to_global( - pos_local=ego_pose_topk[:, :, :2], # [n_batch, k, 2] - head_local=ego_pose_topk[:, :, -1], # [n_batch, k] - pos_now=pos_now[ego_mask], # [n_batch, 2] - head_now=head_now[ego_mask], # [n_batch] - ) - ego_contour = cal_polygon_contour( - ego_pos_global, # [n_batch, k, 2] - ego_head_global, # [n_batch, k] - token_agent_shape[ego_mask].unsqueeze(1), - ) # [n_batch, k, 4, 2] in global coord - - dist = torch.norm(ego_contour - gt_contour, dim=-1).mean(-1) # [n_batch, k] - topk_logits = topk_logits.masked_fill(valid_next_gt[ego_mask].unsqueeze(1), 0.0) - 1.0 * dist.masked_fill( - ~valid_next_gt[ego_mask].unsqueeze(1), 0.0 - ) - - topk_logits = topk_logits / sampling_scheme.temp_mode # [n_batch, k] - ego_pose_topk = torch.cat( - [ - ego_pose_topk[..., :2], - ego_pose_topk[..., [-1]].cos(), - ego_pose_topk[..., [-1]].sin(), - ], - dim=-1, - ) - cov = ( - (ego_next_cov * sampling_scheme.temp_cov).repeat_interleave(2)[None, None, :].expand(*ego_pose_topk.shape) - ) # [n_batch, k, 4] - gmm = MixtureSameFamily(Categorical(logits=topk_logits), Independent(Normal(ego_pose_topk, cov), 1)) - ego_sample = gmm.sample() # [n_batch, 4] - - ego_contour_local = cal_polygon_contour( - ego_sample[:, :2], # [n_batch, 2] - torch.arctan2(ego_sample[:, -1], ego_sample[:, -2]), # [n_batch] - token_agent_shape[ego_mask], # [n_batch, 2] - ) # [n_batch, 4, 2] in local coord - - ego_token_local = token_traj[ego_mask] # [n_batch, n_token, 4, 2] - - dist = torch.norm(ego_contour_local.unsqueeze(1) - ego_token_local, dim=-1).mean(-1) # [n_batch, n_token] - next_token_idx[ego_mask] = dist.argmin(-1) - - ego_contour_local # [n_batch, 4, 2] in local coord - ego_countour_start = next_token_traj_all[ego_mask][:, 0] # [n_batch, 4, 2] - n_step = next_token_traj_all.shape[1] - diff = (ego_contour_local - ego_countour_start) / (n_step - 1) - ego_token_interp = [ego_countour_start + diff * i for i in range(n_step)] - # [n_batch, 6, 4, 2] - next_token_traj_all[ego_mask] = torch.stack(ego_token_interp, dim=1) - - return next_token_idx, next_token_traj_all diff --git a/d123/training/models/sim_agent/smart/utils/weight_init.py b/d123/training/models/sim_agent/smart/utils/weight_init.py deleted file mode 100644 index a507cf30..00000000 --- a/d123/training/models/sim_agent/smart/utils/weight_init.py +++ /dev/null @@ -1,69 +0,0 @@ -import torch.nn as nn - - -def weight_init(m: nn.Module) -> None: - if isinstance(m, nn.Linear): - nn.init.xavier_uniform_(m.weight) - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): - fan_in = m.in_channels / m.groups - fan_out = m.out_channels / m.groups - bound = (6.0 / (fan_in + fan_out)) ** 0.5 - nn.init.uniform_(m.weight, -bound, bound) - if m.bias is not None: - nn.init.zeros_(m.bias) - elif isinstance(m, nn.Embedding): - nn.init.normal_(m.weight, mean=0.0, std=0.02) - elif isinstance(m, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d)): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.LayerNorm): - nn.init.ones_(m.weight) - nn.init.zeros_(m.bias) - elif isinstance(m, nn.MultiheadAttention): - if m.in_proj_weight is not None: - fan_in = m.embed_dim - fan_out = m.embed_dim - bound = (6.0 / (fan_in + fan_out)) ** 0.5 - nn.init.uniform_(m.in_proj_weight, -bound, bound) - else: - nn.init.xavier_uniform_(m.q_proj_weight) - nn.init.xavier_uniform_(m.k_proj_weight) - nn.init.xavier_uniform_(m.v_proj_weight) - if m.in_proj_bias is not None: - nn.init.zeros_(m.in_proj_bias) - nn.init.xavier_uniform_(m.out_proj.weight) - if m.out_proj.bias is not None: - nn.init.zeros_(m.out_proj.bias) - if m.bias_k is not None: - nn.init.normal_(m.bias_k, mean=0.0, std=0.02) - if m.bias_v is not None: - nn.init.normal_(m.bias_v, mean=0.0, std=0.02) - elif isinstance(m, (nn.LSTM, nn.LSTMCell)): - for name, param in m.named_parameters(): - if "weight_ih" in name: - for ih in param.chunk(4, 0): - nn.init.xavier_uniform_(ih) - elif "weight_hh" in name: - for hh in param.chunk(4, 0): - nn.init.orthogonal_(hh) - elif "weight_hr" in name: - nn.init.xavier_uniform_(param) - elif "bias_ih" in name: - nn.init.zeros_(param) - elif "bias_hh" in name: - nn.init.zeros_(param) - nn.init.ones_(param.chunk(4, 0)[1]) - elif isinstance(m, (nn.GRU, nn.GRUCell)): - for name, param in m.named_parameters(): - if "weight_ih" in name: - for ih in param.chunk(3, 0): - nn.init.xavier_uniform_(ih) - elif "weight_hh" in name: - for hh in param.chunk(3, 0): - nn.init.orthogonal_(hh) - elif "bias_ih" in name: - nn.init.zeros_(param) - elif "bias_hh" in name: - nn.init.zeros_(param) diff --git a/docs/_static/logo_black.png b/docs/_static/logo_black.png new file mode 100644 index 00000000..6717f5f8 Binary files /dev/null and b/docs/_static/logo_black.png differ diff --git a/docs/_static/logo_white.png b/docs/_static/logo_white.png new file mode 100644 index 00000000..16aec842 Binary files /dev/null and b/docs/_static/logo_white.png differ diff --git a/docs/conf.py b/docs/conf.py index 9d7e8c5f..0479a8ec 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -6,10 +6,11 @@ # -- Project information ----------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information -project = "d123" + +project = "py123d" copyright = "2025, 123D Contributors" author = "123D Contributors" -release = "v0.0.6" +release = "v0.0.7" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration @@ -18,8 +19,11 @@ "sphinx.ext.duration", "sphinx.ext.doctest", "sphinx.ext.autodoc", + "sphinx.ext.intersphinx", "sphinx.ext.autosummary", "sphinx.ext.intersphinx", + "sphinx.ext.napoleon", + "sphinx_copybutton", "myst_parser", ] @@ -45,24 +49,46 @@ # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = "sphinx_rtd_theme" +html_theme = "furo" +html_title = "" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] -html_theme_options = { - "analytics_anonymize_ip": False, - "logo_only": False, - "display_version": True, - "prev_next_buttons_location": "bottom", - "style_external_links": False, - "vcs_pageview_mode": "", - # Toc options - "collapse_navigation": True, - "sticky_navigation": True, - "navigation_depth": 3, - "includehidden": True, - "titles_only": False, +html_theme_options = {} + +autodoc_typehints = "both" +autodoc_class_signature = "separated" +autodoc_default_options = { + "members": True, + "member-order": "bysource", + "undoc-members": True, + "inherited-members": False, + "exclude-members": "__init__, __post_init__, __new__", + "imported-members": True, } + + +# Custom CSS for color theming +html_css_files = [ + "custom.css", +] + +# Additional theme options for color customization +html_theme_options.update( + { + "light_logo": "logo_black.png", + "dark_logo": "logo_white.png", + "sidebar_hide_name": True, + } +) + +# This CSS should go in /home/daniel/py123d_workspace/py123d/docs/_static/custom.css +# Your conf.py already references it in html_css_files = ["custom.css"] + +# If you want to add custom CSS via configuration, you can use: +html_css_files = [ + "custom.css", +] diff --git a/docs/datasets/av2.rst b/docs/datasets/av2.rst index ab01edfb..3c09f329 100644 --- a/docs/datasets/av2.rst +++ b/docs/datasets/av2.rst @@ -5,7 +5,6 @@ Argoverse 2 .. image:: https://www.argoverse.org/assets/images/reference_images/av2_vehicle.jpg :alt: Dataset sample image - :width: 290px | **Paper:** `Name of Paper `_ | **Download:** `Documentation `_ @@ -29,7 +28,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/carla.rst b/docs/datasets/carla.rst index 3ca7ff09..ccc921f1 100644 --- a/docs/datasets/carla.rst +++ b/docs/datasets/carla.rst @@ -29,7 +29,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/index.rst b/docs/datasets/index.rst index ead97169..851fbf32 100644 --- a/docs/datasets/index.rst +++ b/docs/datasets/index.rst @@ -6,7 +6,7 @@ Brief overview of the datasets section... This section provides comprehensive documentation for various autonomous driving and computer vision datasets. Each dataset entry includes installation instructions, available data types, known issues, and proper citation formats. .. toctree:: - :maxdepth: 0 + :maxdepth: 1 av2 nuplan diff --git a/docs/datasets/kitti-360.rst b/docs/datasets/kitti-360.rst index 76100d27..d4fc8a9f 100644 --- a/docs/datasets/kitti-360.rst +++ b/docs/datasets/kitti-360.rst @@ -7,12 +7,12 @@ KiTTI-360 :alt: Dataset sample image :width: 290px - | **Paper:** `Name of Paper `_ - | **Download:** `Documentation `_ - | **Code:** [Code] - | **Documentation:** [License type] + | **Paper:** `KITTI-360: A Novel Dataset and Benchmarks for Urban Scene Understanding in 2D and 3D `_ + | **Download:** `www.cvlibs.net/datasets/kitti-360 `_ + | **Code:** `www.github.com/autonomousvision/kitti360Scripts `_ + | **Documentation:** `kitti-360 Document`_ | **License:** [License type] - | **Duration:** [Duration here] + | **Duration:** 320k image | **Supported Versions:** [Yes/No/Conditions] | **Redistribution:** [Yes/No/Conditions] @@ -29,7 +29,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/nuplan.rst b/docs/datasets/nuplan.rst index 065c26a5..94b50f08 100644 --- a/docs/datasets/nuplan.rst +++ b/docs/datasets/nuplan.rst @@ -1,5 +1,5 @@ nuPlan ------ +------ .. sidebar:: nuPlan @@ -29,7 +29,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/nuscenes.rst b/docs/datasets/nuscenes.rst index 1f4e1621..638c5ad6 100644 --- a/docs/datasets/nuscenes.rst +++ b/docs/datasets/nuscenes.rst @@ -29,7 +29,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/template.rst b/docs/datasets/template.rst index d38723ed..29797269 100644 --- a/docs/datasets/template.rst +++ b/docs/datasets/template.rst @@ -29,7 +29,7 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip diff --git a/docs/datasets/wopd.rst b/docs/datasets/wopd.rst index 8dc276a8..3b00d2e6 100644 --- a/docs/datasets/wopd.rst +++ b/docs/datasets/wopd.rst @@ -29,10 +29,21 @@ Installation .. code-block:: bash # Example installation commands - pip install d123[dataset_name] + pip install py123d[dataset_name] # or wget https://example.com/dataset.zip + +.. code-block:: bash + + conda create -n py123d_waymo python=3.10 + conda activate py123d_waymo + pip install -e .[waymo] + + # pip install protobuf==6.30.2 + # pip install tensorflow==2.13.0 + # pip install waymo-open-dataset-tf-2-12-0==1.6.6 + Available Data ~~~~~~~~~~~~~~ diff --git a/docs/contributing.md b/docs/development/contributing.md similarity index 87% rename from docs/contributing.md rename to docs/development/contributing.md index e447d876..e7ac92fe 100644 --- a/docs/contributing.md +++ b/docs/development/contributing.md @@ -1,22 +1,22 @@ # Contributing to 123D -Thank you for your interest in contributing to 123D! This guide will help you get started with the development process. +Contributions to 123D are highly encouraged! This guide will help you get started with the development process. ## Getting Started ### 1. Clone the Repository ```sh -git clone git@github.com:DanielDauner/d123.git -cd d123 +git clone git@github.com:DanielDauner/py123d.git +cd py123d ``` ### 2. Install the pip-package ```sh -conda env create -f environment.yml --name d123_dev # Optional -conda activate d123_dev +conda env create -f environment.yml --name py123d_dev # Optional +conda activate py123d_dev pip install -e .[dev] pre-commit install ``` @@ -27,7 +27,7 @@ pre-commit install ### 3. Managing dependencies -One principal of 123D is to keep *minimal dependencies*. However, various datasets require problematic (or even outdated) dependencies in order to load or preprocess the dataset. In this case, you can add optional dependencies to the `pyproject.toml` install file. You can follow examples of Waymo or nuPlan. These optional dependencies can be install with +One principal of 123D is to keep *minimal dependencies*. However, various datasets require dependencies in order to load or preprocess the dataset. In this case, you can add optional dependencies to the `pyproject.toml` install file. You can follow examples of Waymo/nuPlan. These optional dependencies can be install with ```sh pip install -e .[dev,waymo,nuplan] @@ -42,7 +42,7 @@ import numpy.typing as npt def load_camera_from_outdated_dataset(file_path: str) -> npt.NDArray[np.uint8]: try: - from outdated_dataset import load_camera_image + from optional_dataset import load_camera_image except ImportError: raise ImportError( "Optional dependency 'outdated_dataset' is required to load camera images from this dataset. " diff --git a/docs/development/index.rst b/docs/development/index.rst new file mode 100644 index 00000000..579ccee4 --- /dev/null +++ b/docs/development/index.rst @@ -0,0 +1,7 @@ +Development +=========== + +.. toctree:: + :maxdepth: 0 + + contributing diff --git a/docs/geometry.rst b/docs/geometry.rst new file mode 100644 index 00000000..0a3215e3 --- /dev/null +++ b/docs/geometry.rst @@ -0,0 +1,87 @@ + +Geometry +======== + +Geometric Primitives +-------------------- + +Points +~~~~~~ +.. autoclass:: py123d.geometry.Point2D() + +.. autoclass:: py123d.geometry.Point3D() + +Vectors +~~~~~~~ +.. autoclass:: py123d.geometry.Vector2D() + +.. autoclass:: py123d.geometry.Vector3D() + +Special Euclidean Group +~~~~~~~~~~~~~~~~~~~~~~~ +.. autoclass:: py123d.geometry.StateSE2() + +.. autoclass:: py123d.geometry.StateSE3() + +Bounding Boxes +~~~~~~~~~~~~~~ +.. autoclass:: py123d.geometry.BoundingBoxSE2() + +.. autoclass:: py123d.geometry.BoundingBoxSE3() + +Indexing Enums +~~~~~~~~~~~~~~ +.. autoclass:: py123d.geometry.Point2DIndex() + +.. autoclass:: py123d.geometry.Point3DIndex() + +.. autoclass:: py123d.geometry.Vector2DIndex() + +.. autoclass:: py123d.geometry.Vector3DIndex() + +.. autoclass:: py123d.geometry.StateSE2Index() + +.. autoclass:: py123d.geometry.StateSE3Index() + +.. autoclass:: py123d.geometry.BoundingBoxSE2Index() + +.. autoclass:: py123d.geometry.BoundingBoxSE3Index() + +.. autoclass:: py123d.geometry.Corners2DIndex() + +.. autoclass:: py123d.geometry.Corners3DIndex() + + +Transformations +--------------- + +Transformations in 2D +~~~~~~~~~~~~~~~~~~~~~ +.. autofunction:: py123d.geometry.transform.convert_absolute_to_relative_se2_array + +.. autofunction:: py123d.geometry.transform.convert_relative_to_absolute_se2_array + +.. autofunction:: py123d.geometry.transform.translate_se2_along_body_frame + +.. autofunction:: py123d.geometry.transform.translate_se2_along_x + +.. autofunction:: py123d.geometry.transform.translate_se2_along_y + + +Transformations in 3D +~~~~~~~~~~~~~~~~~~~~~ +.. autofunction:: py123d.geometry.transform.convert_absolute_to_relative_se3_array + +.. autofunction:: py123d.geometry.transform.convert_relative_to_absolute_se3_array + +.. autofunction:: py123d.geometry.transform.translate_se3_along_body_frame + +.. autofunction:: py123d.geometry.transform.translate_se3_along_x + +.. autofunction:: py123d.geometry.transform.translate_se3_along_y + +.. autofunction:: py123d.geometry.transform.translate_se3_along_z + +Occupancy Map +------------- +.. autoclass:: py123d.geometry.OccupancyMap2D() diff --git a/docs/index.rst b/docs/index.rst index b169f012..a9e1d197 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -13,10 +13,27 @@ documentation for details. .. toctree:: :maxdepth: 1 - :caption: Contents: + :caption: Overview: installation datasets/index - schema + + +.. toctree:: + :maxdepth: 2 + :caption: API Reference: + + geometry + + +.. toctree:: + :maxdepth: 1 + :caption: Visualization: + visualization - contributing + +.. toctree:: + :maxdepth: 1 + :caption: Development: + + development/index diff --git a/docs/installation.md b/docs/installation.md index 95d09a04..39846467 100644 --- a/docs/installation.md +++ b/docs/installation.md @@ -1,10 +1,9 @@ - # Installation -Note, the following installation assumes the following folder structure: +Note, the following installation assumes the following folder structure: TODO UPDATE ``` -~/d123_workspace -├── d123 +~/py123d_workspace +├── py123d ├── exp │ └── ... └── data @@ -25,24 +24,14 @@ Note, the following installation assumes the following folder structure: ``` -First you need to create a new conda environment and install `d123` as editable pip package. +First you need to create a new conda environment and install `py123d` as editable pip package. ```bash -conda env create -f environment.yml -conda activate d123 +conda create -n py123d_dev python=3.12 +conda activate py123d_dev pip install -e . ``` Next, you need add the following environment variables in your `.bashrc`: ```bash -export D123_DEVKIT_ROOT="$HOME/d123_workspace/d123" -export D123_MAPS_ROOT="$HOME/d123_workspace/data/maps" -export D123_DATA_ROOT="$HOME/d123_workspace/data" -export D123_EXP_ROOT="$HOME/d123_workspace/exp" - -# CARLA -export CARLA_SIMULATOR_ROOT="$HOME/carla_workspace/carla_garage/carla" - -# nuPlan -export NUPLAN_DATA_ROOT="/path/to/nuplan/dataset" -export NUPLAN_MAPS_ROOT="/path/to/nuplan/dataset/maps" +export PY123D_DATA_ROOT="$HOME/py123d_workspace/data" ``` diff --git a/docs/schema.md b/docs/schema.md deleted file mode 100644 index b3d44a6e..00000000 --- a/docs/schema.md +++ /dev/null @@ -1,15 +0,0 @@ - -# Schema - -TODO - diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 53e458bc..00000000 --- a/environment.yml +++ /dev/null @@ -1,7 +0,0 @@ -name: d123 -channels: - - conda-forge -dependencies: - - python=3.12 - - pip=25.1.1 - - nb_conda_kernels diff --git a/examples/01_viser.py b/examples/01_viser.py new file mode 100644 index 00000000..a91005cf --- /dev/null +++ b/examples/01_viser.py @@ -0,0 +1,37 @@ +from py123d.common.multithreading.worker_sequential import Sequential +from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder +from py123d.datatypes.scene.scene_filter import SceneFilter +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType +from py123d.visualization.viser.viser_viewer import ViserViewer + +if __name__ == "__main__": + # splits = ["kitti360_train"] + # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] + # splits = ["nuplan-mini_test", "nuplan-mini_train", "nuplan-mini_val"] + # splits = ["nuplan_private_test"] + # splits = ["carla_test"] + splits = ["wopd_val"] + # splits = ["av2-sensor_train"] + # splits = ["pandaset_test", "pandaset_val", "pandaset_train"] + # log_names = ["2021.08.24.13.12.55_veh-45_00386_00472"] + # log_names = ["2013_05_28_drive_0000_sync"] + # log_names = ["2013_05_28_drive_0000_sync"] + log_names = None + # scene_uuids = ["87bf69e4-f2fb-5491-99fa-8b7e89fb697c"] + scene_uuids = None + + scene_filter = SceneFilter( + split_names=splits, + log_names=log_names, + scene_uuids=scene_uuids, + duration_s=None, + history_s=0.0, + timestamp_threshold_s=None, + shuffle=True, + pinhole_camera_types=[PinholeCameraType.PCAM_F0], + ) + scene_builder = ArrowSceneBuilder() + worker = Sequential() + scenes = scene_builder.get_scenes(scene_filter, worker) + print(f"Found {len(scenes)} scenes") + visualization_server = ViserViewer(scenes, scene_index=0) diff --git a/notebooks/av2/delete_me.ipynb b/notebooks/av2/delete_me.ipynb deleted file mode 100644 index e28ef5ef..00000000 --- a/notebooks/av2/delete_me.ipynb +++ /dev/null @@ -1,480 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from pathlib import Path\n", - "\n", - "import numpy as np\n", - "import io\n", - "\n", - "from PIL import Image\n", - "\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# split = \"test\"\n", - "\n", - "\n", - "split = \"train\"\n", - "# split = \"val\"\n", - "\n", - "split_folder = Path(f\"/media/nvme1/argoverse/sensor_mini/{split}\")\n", - "log_folder = sorted(list(split_folder.iterdir()))[0]\n", - "\n", - "log_folder.name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "def _ls(path: Path):\n", - " \"\"\"List all files in the given path.\"\"\"\n", - " return [f.name for f in path.iterdir()]\n", - "\n", - "\n", - "def get_arrow_from_file(file_path: Path):\n", - " if file_path.suffix == \".parquet\":\n", - " import pyarrow.parquet as pq\n", - " return pq.read_table(file_path)\n", - " elif file_path.suffix == \".feather\":\n", - " import pyarrow.feather as feather\n", - " return feather.read_feather(file_path)\n", - " else:\n", - " raise ValueError(f\"Unsupported file type: {file_path.suffix}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "\n", - "# 1. Calibration \n", - "calibration_folder = log_folder / \"calibration\"\n", - "\n", - "# 1.1 -> ego to sensor transformation\n", - "egovehicle_SE3_sensor_file = log_folder / \"calibration\" / \"egovehicle_SE3_sensor.feather\"\n", - "egovehicle_se3_sensor_table = get_arrow_from_file(egovehicle_SE3_sensor_file)\n", - "\n", - "egovehicle_se3_sensor_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# 1.2 -> intrinsic parameters\n", - "intrinsics_file = log_folder / \"calibration\" / \"intrinsics.feather\"\n", - "intrinsics_table = get_arrow_from_file(intrinsics_file)\n", - "\n", - "\n", - "intrinsics_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# 2. Ego Vehicle\n", - "city_SE3_egovehicle_file = log_folder / \"city_SE3_egovehicle.feather\"\n", - "city_se3_egovehicle_table = get_arrow_from_file(city_SE3_egovehicle_file)\n", - "\n", - "city_se3_egovehicle_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# # 3. Map\n", - "# # map_folder = log_folder / \"map\"\n", - "# # print(_ls(map_folder))\n", - "\n", - "# # # 4. sensors\n", - "# # print(_ls(log_folder))\n", - "\n", - "# # from d123.dataset.dataset_specific.av2.av2_data_converter import AV2SensorDataConverter\n", - "# from d123.dataset.dataset_specific.av2.av2_data_converter import AV2SensorDataConverter\n", - "\n", - "# # AV2SensorDataConverter([])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# 5. Annotations\n", - "annotations_file = log_folder / \"annotations.feather\"\n", - "annotations_table = get_arrow_from_file(annotations_file)\n", - "\n", - "# print(_ls(annotations_folder))\n", - "annotations_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "camera_name = \"ring_side_left\"\n", - "\n", - "camera_folder = log_folder / \"sensors\" / \"cameras\" / camera_name\n", - "camera_files = sorted(list(camera_folder.iterdir()))\n", - "\n", - "\n", - "def jpg_to_array(file_path):\n", - " image = np.array(Image.open(io.BytesIO(file_path.read_bytes())))\n", - " return image\n", - "\n", - "plt.imshow(jpg_to_array(camera_files[1]))\n", - "print(len(camera_files))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "lidar_folder = log_folder / \"sensors\" / \"lidar\" \n", - "lidar_files = sorted(list(lidar_folder.iterdir()))\n", - "\n", - "\n", - "get_arrow_from_file(lidar_files[0])\n", - "\n", - "print(lidar_files)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [ - "# Testing sensor syn dataframes\n", - "\n", - "from typing import Optional\n", - "from d123.dataset.dataset_specific.av2.av2_constants import AV2_CAMERA_TYPE_MAPPING\n", - "from d123.dataset.dataset_specific.av2.av2_helper import build_sensor_dataframe, build_synchronization_dataframe\n", - "\n", - "\n", - "sensor_df = build_sensor_dataframe(log_folder)\n", - "synchronization_df = build_synchronization_dataframe(sensor_df)\n", - "dataset_dir = split_folder.parent\n", - "\n", - "\n", - "def find_closest_target_fpath(\n", - " split: str,\n", - " log_id: str,\n", - " src_sensor_name: str,\n", - " src_timestamp_ns: int,\n", - " target_sensor_name: str,\n", - ") -> Optional[Path]:\n", - " \"\"\"Find the file path to the target sensor from a source sensor.\"\"\"\n", - " if synchronization_df is None:\n", - " raise RuntimeError(\"Requested synchronized data, but the synchronization database has not been created.\")\n", - "\n", - " src_timedelta_ns = pd.Timedelta(src_timestamp_ns)\n", - " src_to_target_records = synchronization_df.loc[(split, log_id, src_sensor_name)].set_index(src_sensor_name)\n", - " index = src_to_target_records.index\n", - " if src_timedelta_ns not in index:\n", - " # This timestamp does not correspond to any lidar sweep.\n", - " return None\n", - "\n", - " # Grab the synchronization record.\n", - " target_timestamp_ns = src_to_target_records.loc[src_timedelta_ns, target_sensor_name]\n", - " if pd.isna(target_timestamp_ns):\n", - " # No match was found within tolerance.\n", - " return None\n", - "\n", - " sensor_dir = dataset_dir / split / log_id / \"sensors\"\n", - " valid_cameras = list(AV2_CAMERA_TYPE_MAPPING.keys())\n", - " timestamp_ns_str = str(target_timestamp_ns.asm8.item())\n", - " if target_sensor_name in valid_cameras:\n", - " target_path = sensor_dir / \"cameras\" / target_sensor_name / f\"{timestamp_ns_str}.jpg\"\n", - " else:\n", - " target_path = sensor_dir / target_sensor_name / f\"{timestamp_ns_str}.feather\"\n", - " return target_path" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": {}, - "outputs": [], - "source": [ - "# split=\"train\"\n", - "# log_id=\"00a6ffc1-6ce9-3bc3-a060-6006e9893a1a\"\n", - "# src_sensor_name=\"ring_front_center\"\n", - "# src_timestamp_ns=315967376959702000\n", - "# target_sensor_name=\"lidar\"\n", - "\n", - "# src_to_target_records = synchronization_df.loc[(\"train\", \"\", src_sensor_name)]\n", - "# # synchronization_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12", - "metadata": {}, - "outputs": [], - "source": [ - "lidar_timestamp_ns_list = [int(path.stem) for path in lidar_files]\n", - "\n", - "\n", - "\n", - "\n", - "for lidar_timestamp_ns in lidar_timestamp_ns_list:\n", - "\n", - " fpath = find_closest_target_fpath(\n", - " split=\"train\",\n", - " log_id=log_folder.name,\n", - " src_sensor_name=\"lidar\",\n", - " src_timestamp_ns=lidar_timestamp_ns,\n", - " target_sensor_name=\"ring_front_center\",\n", - " )\n", - " if fpath is None:\n", - " continue\n", - " # print(fpath)\n", - "\n", - " egovehicle_se3_sensor_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13", - "metadata": {}, - "outputs": [], - "source": [ - "for _, row in egovehicle_se3_sensor_table.iterrows():\n", - " row = row.to_dict()\n", - " print(row)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15", - "metadata": {}, - "outputs": [], - "source": [ - "import cv2\n", - "from pyquaternion import Quaternion\n", - "\n", - "lidar_timestamps = [int(f.stem) for f in lidar_files]\n", - "camera_timestamps = [int(f.stem) for f in camera_files]\n", - "\n", - "\n", - "def get_slice_with_timestamp_ns(dataframe: pd.DataFrame, timestamp_ns: int):\n", - " \"\"\"Get the index of the closest timestamp to the target timestamp.\"\"\"\n", - " return dataframe[dataframe[\"timestamp_ns\"] == timestamp_ns]\n", - "\n", - "\n", - "def find_nearest_timestamp(target_ns, timestamp_list):\n", - " timestamp_array = np.array(timestamp_list, dtype=np.int64)\n", - " idx = np.argmin(np.abs(timestamp_array - np.int64(target_ns)))\n", - " return int(timestamp_array[idx])\n", - "\n", - "# for lidar_timestamp in lidar_timestamps:\n", - "# slice = get_slice_with_timestamp_ns(annotations_table, lidar_timestamp)\n", - "# assert len(slice) >= 1\n", - "\n", - "\n", - "\n", - "# ego_pose = city_SE3_egovehicle_table[city_SE3_egovehicle_table[\"timestamp_ns\"] == lidar_timestamps[10]]\n", - "# ego_pose_dict = ego_pose.iloc[0].to_dict()\n", - "\n", - "\n", - "annotations_slice = get_slice_with_timestamp_ns(annotations_table, lidar_timestamps[10])\n", - "for _, row in annotations_slice.iterrows():\n", - "# qw = row[\"qw\"]\n", - "# qx = row[\"qx\"]\n", - "# qy = row[\"qy\"]\n", - "# qz = row[\"qz\"]\n", - "# tx_m = row[\"tx_m\"]\n", - "# ty_m = row[\"ty_m\"]\n", - "# tz_m = row[\"tz_m\"]\n", - " print(row.to_dict())\n", - "\n", - "annotations_slice\n", - "\n", - "# qw\tqx\tqy\tqz\ttx_m\tty_m\ttz_m\n", - "# # def jpg_to_array(file_path):\n", - "\n", - "# camera_frames = []\n", - "# for lidar_timestamp in lidar_timestamps:\n", - "# camera_stamp_at_lidar = find_nearest_timestamp(lidar_timestamp, camera_timestamps)\n", - "# image = jpg_to_array(camera_folder / f\"{camera_stamp_at_lidar}.jpg\")\n", - "# camera_frames.append(image)\n", - " \n", - "# print(len(camera_frames))\n", - "# height, width, _ = camera_frames[0].shape\n", - "# video_path = \"camera_frames_video.mp4\"\n", - "# fourcc = cv2.VideoWriter_fourcc(*'mp4v')\n", - "# out = cv2.VideoWriter(video_path, fourcc, 10, (width, height))\n", - "\n", - "# for frame in camera_frames:\n", - "# out.write(cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))\n", - "\n", - "# out.release()\n", - "# print(f\"Saved video to {video_path}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "16", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "from pyquaternion import Quaternion\n", - "from d123.common.datatypes.detection.detection_types import DetectionType\n", - "from d123.common.geometry.base import StateSE2\n", - "from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2\n", - "from d123.common.visualization.color.config import PlotConfig\n", - "from d123.common.visualization.color.default import BOX_DETECTION_CONFIG\n", - "from d123.common.visualization.matplotlib.utils import add_shapely_polygon_to_ax\n", - "\n", - "\n", - "\n", - "fig, ax = plt.subplots(1, 1, figsize=(10, 10))\n", - "\n", - "for cuboid in sensor_data.annotations:\n", - " yaw, pitch, roll = Quaternion(matrix=cuboid.dst_SE3_object.rotation).yaw_pitch_roll\n", - " center = StateSE2(cuboid.dst_SE3_object.translation[0], cuboid.dst_SE3_object.translation[1], yaw)\n", - " bounding_box = BoundingBoxSE2(center, cuboid.length_m, cuboid.width_m)\n", - " add_shapely_polygon_to_ax(ax, bounding_box.shapely_polygon, BOX_DETECTION_CONFIG[DetectionType.VEHICLE])\n", - "\n", - "ax.set_aspect(\"equal\")\n", - "\n", - "radius = 200\n", - "ax.set_xlim(-radius, radius)\n", - "ax.set_ylim(-radius, radius)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "17", - "metadata": {}, - "outputs": [], - "source": [ - "bounding_box.shapely_polygon\n", - "\n", - "bounding_box.corners_array" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "sensor_cache = \"/home/daniel/.cache/av2/sensor_cache.feather\"\n", - "get_arrow_from_file(Path(sensor_cache))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "19", - "metadata": {}, - "outputs": [], - "source": [ - "synchronization_cache = \"/home/daniel/.cache/av2/synchronization_cache.feather\"\n", - "synchronization_cache = get_arrow_from_file(Path(synchronization_cache))\n", - "\n", - "synchronization_cache[\"sensor_name\"].unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20", - "metadata": {}, - "outputs": [], - "source": [ - "CAM_SHUTTER_INTERVAL_MS" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/av2/delete_me_map.ipynb b/notebooks/av2/delete_me_map.ipynb deleted file mode 100644 index 03e62838..00000000 --- a/notebooks/av2/delete_me_map.ipynb +++ /dev/null @@ -1,313 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "from pathlib import Path\n", - "\n", - "import numpy as np\n", - "import io\n", - "\n", - "from PIL import Image\n", - "\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# split = \"test\"\n", - "\n", - "\n", - "split = \"train\"\n", - "# split = \"val\"\n", - "\n", - "split_folder = Path(f\"/media/nvme1/argoverse/sensor_mini/{split}\")\n", - "log_folder = sorted(list(split_folder.iterdir()))[2]\n", - "\n", - "log_folder.name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "for lane_segment_id, lane_segment_dict in log_map_archive[\"lane_segments\"].items():\n", - " print(f\"Lane Segment ID: {lane_segment_id}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "from typing import Dict, List\n", - "\n", - "from d123.common.geometry.line.polylines import Polyline3D\n", - "from d123.dataset.dataset_specific.av2.av2_map_conversion import _extract_lane_group_dict\n", - "\n", - "\n", - "def _extract_polyline(data: List[Dict[str, float]], close: bool = False) -> Polyline3D:\n", - " polyline = np.array([[p[\"x\"], p[\"y\"], p[\"z\"]] for p in data], dtype=np.float64)\n", - " if close:\n", - " polyline = np.vstack([polyline, polyline[0]])\n", - "\n", - " return Polyline3D.from_array(polyline)\n", - "\n", - "\n", - "map_folder = log_folder / \"map\"\n", - "log_map_archive_path = next(map_folder.glob(\"log_map_archive_*.json\"))\n", - "\n", - "with open(log_map_archive_path, \"r\") as f:\n", - " log_map_archive = json.load(f)\n", - "\n", - "drivable_areas: Dict[int, Polyline3D] = {}\n", - "\n", - "for drivable_area_id, drivable_area_dict in log_map_archive[\"drivable_areas\"].items():\n", - " # keys: [\"area_boundary\", \"id\"]\n", - " drivable_areas[int(drivable_area_id)] = _extract_polyline(drivable_area_dict[\"area_boundary\"], close=True)\n", - "\n", - "for lane_segment_id, lane_segment_dict in log_map_archive[\"lane_segments\"].items():\n", - " # keys = [\n", - " # \"id\",\n", - " # \"is_intersection\",\n", - " # \"lane_type\",\n", - " # \"left_lane_boundary\",\n", - " # \"left_lane_mark_type\",\n", - " # \"right_lane_boundary\",\n", - " # \"right_lane_mark_type\",\n", - " # \"successors\",\n", - " # \"predecessors\",\n", - " # \"right_neighbor_id\",\n", - " # \"left_neighbor_id\",\n", - " # ]\n", - " lane_segment_dict[\"left_lane_boundary\"] = _extract_polyline(lane_segment_dict[\"left_lane_boundary\"])\n", - " lane_segment_dict[\"right_lane_boundary\"] = _extract_polyline(lane_segment_dict[\"right_lane_boundary\"])\n", - "\n", - "for crosswalk_id, crosswalk_dict in log_map_archive[\"pedestrian_crossings\"].items():\n", - " # keys = [\"id\", \"outline\"]\n", - " # https://github.com/argoverse/av2-api/blob/6b22766247eda941cb1953d6a58e8d5631c561da/src/av2/map/pedestrian_crossing.py\n", - "\n", - " p1, p2 = np.array([[p[\"x\"], p[\"y\"], p[\"z\"]] for p in crosswalk_dict[\"edge1\"]], dtype=np.float64)\n", - " p3, p4 = np.array([[p[\"x\"], p[\"y\"], p[\"z\"]] for p in crosswalk_dict[\"edge2\"]], dtype=np.float64)\n", - " crosswalk_dict[\"outline\"] = Polyline3D.from_array(np.array([p1, p2, p4, p3, p1], dtype=np.float64))\n", - "\n", - "\n", - "lane_group_dict = _extract_lane_group_dict(log_map_archive[\"lane_segments\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Any\n", - "\n", - "import shapely\n", - "\n", - "from d123.common.geometry.base import Point3DIndex\n", - "import geopandas as gpd\n", - "\n", - "from d123.common.geometry.line.polylines import Polyline2D\n", - "from d123.common.geometry.occupancy_map import OccupancyMap2D\n", - "\n", - "import numpy.typing as npt\n", - "\n", - "\n", - "def _interpolate_z_on_segment(point: shapely.Point, segment_coords: npt.NDArray[np.float64]) -> float:\n", - " \"\"\"Interpolate Z coordinate along a 3D line segment.\"\"\"\n", - " p1, p2 = segment_coords[0], segment_coords[1]\n", - "\n", - " # Project point onto segment\n", - " segment_vec = p2[:2] - p1[:2]\n", - " point_vec = np.array([point.x, point.y]) - p1[:2]\n", - "\n", - " # Handle degenerate case\n", - " segment_length_sq = np.dot(segment_vec, segment_vec)\n", - " if segment_length_sq == 0:\n", - " return p1[2]\n", - "\n", - " # Calculate projection parameter\n", - " t = np.dot(point_vec, segment_vec) / segment_length_sq\n", - " t = np.clip(t, 0, 1) # Clamp to segment bounds\n", - "\n", - " # Interpolate Z\n", - " return p1[2] + t * (p2[2] - p1[2])\n", - "\n", - "\n", - "def _extract_intersection_dict(\n", - " lanes: Dict[int, Any], lane_group_dict: Dict[int, Any], max_distance: float = 0.01\n", - ") -> Dict[str, Any]:\n", - "\n", - " # 1. Collect all lane groups where at least one lane is marked as an intersection.\n", - " lane_group_intersection_dict = {}\n", - " for lane_group_id, lane_group in lane_group_dict.items():\n", - " is_intersection_lanes = [lanes[str(lane_id)][\"is_intersection\"] for lane_id in lane_group[\"lane_ids\"]]\n", - " if any(is_intersection_lanes):\n", - " lane_group_intersection_dict[lane_group_id] = lane_group\n", - "\n", - " # 2. Merge polygons of lane groups that are marked as intersections.\n", - " lane_group_intersection_geometry = {\n", - " lane_group_id: shapely.Polygon(lane_group[\"outline\"].array[:, Point3DIndex.XY])\n", - " for lane_group_id, lane_group in lane_group_intersection_dict.items()\n", - " }\n", - " intersection_polygons = gpd.GeoSeries(lane_group_intersection_geometry).union_all()\n", - "\n", - " # 3. Collect all intersection polygons and their lane group IDs.\n", - " intersection_dict = {}\n", - " for intersection_idx, intersection_polygon in enumerate(intersection_polygons.geoms):\n", - " if intersection_polygon.is_empty:\n", - " continue\n", - " lane_group_ids = [\n", - " lane_group_id\n", - " for lane_group_id, lane_group_polygon in lane_group_intersection_geometry.items()\n", - " if intersection_polygon.intersects(lane_group_polygon)\n", - " ]\n", - " intersection_dict[f\"intersection_{intersection_idx}\"] = {\n", - " \"id\": intersection_idx,\n", - " \"outline_2d\": Polyline2D.from_array(np.array(list(intersection_polygon.exterior.coords), dtype=np.float64)),\n", - " \"lane_group_ids\": lane_group_ids,\n", - " }\n", - "\n", - " # 4. Lift intersection outlines to 3D.\n", - " boundary_segments = []\n", - " for lane_group in lane_group_intersection_dict.values():\n", - " coords = np.array(lane_group[\"outline\"].linestring.coords, dtype=np.float64).reshape(-1, 1, 3)\n", - " segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1)\n", - " boundary_segments.append(segment_coords_boundary)\n", - "\n", - " boundary_segments = np.concatenate(boundary_segments, axis=0)\n", - " boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments)\n", - " occupancy_map = OccupancyMap2D(boundary_segment_linestrings)\n", - "\n", - " for intersection_id, intersection_data in intersection_dict.items():\n", - " points_2d = intersection_data[\"outline_2d\"].array\n", - " points_3d = np.zeros((len(points_2d), 3), dtype=np.float64)\n", - " points_3d[:, :2] = points_2d\n", - "\n", - " query_points = shapely.creation.points(points_2d)\n", - " results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True)\n", - " for query_idx, geometry_idx in zip(*results):\n", - " query_point = query_points[query_idx]\n", - " segment_coords = boundary_segments[geometry_idx]\n", - " best_z = _interpolate_z_on_segment(query_point, segment_coords)\n", - " points_3d[query_idx, 2] = best_z\n", - "\n", - " intersection_dict[intersection_id][\"outline_3d\"] = Polyline3D.from_array(points_3d)\n", - "\n", - " return intersection_dict\n", - "\n", - "\n", - "intersection_dict = _extract_intersection_dict(log_map_archive[\"lane_segments\"], lane_group_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "from matplotlib import cm\n", - "from matplotlib.colors import Normalize\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "for intersection_id, values in intersection_dict.items():\n", - "\n", - " outline = values[\"outline_3d\"].array\n", - " print(outline[:, 2].min(), outline[:, 2].max())\n", - "\n", - " # Normalize z values to [0, 1] for colormap mapping\n", - " norm = Normalize(vmin=outline[:, 2].min(), vmax=outline[:, 2].max())\n", - " colors = cm.viridis(norm(outline[:, 2]))\n", - "\n", - " # Plot each segment with its corresponding color\n", - " for i in range(len(outline) - 1):\n", - " ax.plot(outline[i : i + 2, 0], outline[i : i + 2, 1], color=colors[i], linewidth=2)\n", - "\n", - "ax.set_aspect(\"equal\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "\n", - "for lane in log_map_archive[\"lane_segments\"].values():\n", - " left_boundary = lane[\"left_lane_boundary\"]\n", - " right_boundary = lane[\"right_lane_boundary\"]\n", - "\n", - " polygon = np.concatenate([left_boundary.array, right_boundary.array[::-1]]).reshape(-1, 3)[:, :2]\n", - " print(polygon)\n", - " ax.fill(\n", - " polygon[:, 0], polygon[:, 1], alpha=0.5, edgecolor=\"black\", color=\"red\" if lane[\"is_intersection\"] else \"blue\"\n", - " )\n", - "\n", - " # if left_boundary and right_boundary:\n", - " # ax.plot(left_boundary.array[:, 0], left_boundary.array[:, 1], color=\"blue\", linewidth=1)\n", - " # ax.plot(right_boundary.array[:, 0], right_boundary.array[:, 1], color=\"red\", linewidth=1)\n", - "\n", - "ax.set_title(\"Lane Segments\")\n", - "ax.set_xlabel(\"X\")\n", - "ax.set_ylabel(\"Y\")\n", - "ax.set_aspect(\"equal\")\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/viz/bev_matplotlib.ipynb b/notebooks/bev_matplotlib.ipynb similarity index 52% rename from notebooks/viz/bev_matplotlib.ipynb rename to notebooks/bev_matplotlib.ipynb index 1feef159..e7bbc334 100644 --- a/notebooks/viz/bev_matplotlib.ipynb +++ b/notebooks/bev_matplotlib.ipynb @@ -7,11 +7,12 @@ "metadata": {}, "outputs": [], "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "from d123.common.datatypes.sensor.camera import CameraType" + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "# from py123d.datatypes.sensors.pinhole_camera_type import PinholeCameraType" ] }, { @@ -21,36 +22,30 @@ "metadata": {}, "outputs": [], "source": [ - "# split = \"nuplan_private_test\"\n", - "# log_names = [\"2021.09.29.17.35.58_veh-44_00066_00432\"]\n", - "\n", - "\n", - "# splits = [\"wopd_train\"]\n", - "# splits = [\"carla\"]\n", - "# splits = [\"nuplan_private_test\"]\n", - "splits = [\"av2-sensor-mini_train\"]\n", - "# log_names = None\n", - "\n", - "\n", + "# splits = [\"kitti360_train\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"carla_test\"]\n", + "# splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", "log_names = None\n", - "scene_tokens = None\n", + "scene_uuids = None\n", "\n", "scene_filter = SceneFilter(\n", " split_names=splits,\n", " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=10,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=None,\n", " history_s=0.0,\n", - " timestamp_threshold_s=20,\n", + " timestamp_threshold_s=30.0,\n", " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", + "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", - "# worker = RayDistributed()\n", "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" + "print(f\"Found {len(scenes)} scenes\")\n" ] }, { @@ -61,24 +56,26 @@ "outputs": [], "source": [ "from typing import List, Optional, Tuple\n", + "\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", - "from d123.common.geometry.base import Point2D\n", - "from d123.common.visualization.color.color import BLACK, DARK_GREY, DARKER_GREY, LIGHT_GREY, NEW_TAB_10, TAB_10\n", - "from d123.common.visualization.color.config import PlotConfig\n", - "from d123.common.visualization.color.default import CENTERLINE_CONFIG, MAP_SURFACE_CONFIG, ROUTE_CONFIG\n", - "from d123.common.visualization.matplotlib.observation import (\n", + "\n", + "from py123d.geometry import Point2D\n", + "from py123d.visualization.color.color import BLACK, DARK_GREY, DARKER_GREY, LIGHT_GREY, NEW_TAB_10, TAB_10\n", + "from py123d.visualization.color.config import PlotConfig\n", + "from py123d.visualization.color.default import CENTERLINE_CONFIG, MAP_SURFACE_CONFIG, ROUTE_CONFIG\n", + "from py123d.visualization.matplotlib.observation import (\n", " add_box_detections_to_ax,\n", " add_default_map_on_ax,\n", " add_ego_vehicle_to_ax,\n", " add_traffic_lights_to_ax,\n", ")\n", - "from d123.common.visualization.matplotlib.utils import add_shapely_linestring_to_ax, add_shapely_polygon_to_ax\n", - "from d123.dataset.maps.abstract_map import AbstractMap\n", - "from d123.dataset.maps.abstract_map_objects import AbstractLane, AbstractLaneGroup\n", - "from d123.dataset.maps.gpkg.gpkg_map_objects import GPKGIntersection\n", - "from d123.dataset.maps.map_datatypes import MapLayer\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", + "from py123d.visualization.matplotlib.utils import add_shapely_linestring_to_ax, add_shapely_polygon_to_ax\n", + "from py123d.datatypes.maps.abstract_map import AbstractMap\n", + "from py123d.datatypes.maps.abstract_map_objects import AbstractLane, AbstractLaneGroup\n", + "from py123d.datatypes.maps.gpkg.gpkg_map_objects import GPKGIntersection\n", + "from py123d.datatypes.maps.map_datatypes import MapLayer\n", + "from py123d.datatypes.scene.abstract_scene import AbstractScene\n", "\n", "\n", "import shapely.geometry as geom\n", @@ -115,9 +112,9 @@ ")\n", "\n", "ROAD_EDGE_CONFIG: PlotConfig = PlotConfig(\n", - " fill_color=DARKER_GREY.set_brightness(0.0),\n", + " fill_color=DARKER_GREY,\n", " fill_color_alpha=1.0,\n", - " line_color=DARKER_GREY.set_brightness(0.0),\n", + " line_color=DARKER_GREY,\n", " line_color_alpha=1.0,\n", " line_width=1.0,\n", " line_style=\"-\",\n", @@ -143,22 +140,22 @@ " route_lane_group_ids: Optional[List[int]] = None,\n", ") -> None:\n", " layers: List[MapLayer] = [\n", - " MapLayer.LANE,\n", + " # MapLayer.LANE,\n", " MapLayer.LANE_GROUP,\n", " MapLayer.GENERIC_DRIVABLE,\n", " MapLayer.CARPARK,\n", - " MapLayer.CROSSWALK,\n", - " MapLayer.INTERSECTION,\n", + " # MapLayer.CROSSWALK,\n", + " # MapLayer.INTERSECTION,\n", " MapLayer.WALKWAY,\n", " MapLayer.ROAD_EDGE,\n", - " # MapLayer.ROAD_LINE,\n", + " MapLayer.ROAD_LINE,\n", " ]\n", " x_min, x_max = point_2d.x - radius, point_2d.x + radius\n", " y_min, y_max = point_2d.y - radius, point_2d.y + radius\n", " patch = geom.box(x_min, y_min, x_max, y_max)\n", " map_objects_dict = map_api.query(geometry=patch, layers=layers, predicate=\"intersects\")\n", + " # print(map_objects_dict[MapLayer.ROAD_EDGE])\n", "\n", - " done = False\n", " for layer, map_objects in map_objects_dict.items():\n", " for map_object in map_objects:\n", " try:\n", @@ -166,7 +163,7 @@ " MapLayer.GENERIC_DRIVABLE,\n", " MapLayer.CARPARK,\n", " MapLayer.CROSSWALK,\n", - " MapLayer.INTERSECTION,\n", + " # MapLayer.INTERSECTION,\n", " MapLayer.WALKWAY,\n", " ]:\n", " add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer])\n", @@ -175,44 +172,54 @@ " map_object: AbstractLaneGroup\n", " add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer])\n", "\n", + " if map_object.intersection is not None:\n", + " add_shapely_polygon_to_ax(ax, map_object.intersection.shapely_polygon, ROUTE_CONFIG)\n", "\n", - " if layer in [MapLayer.LANE]:\n", - " add_shapely_linestring_to_ax(ax, map_object.centerline.linestring, CENTERLINE_CONFIG)\n", + " for lane in map_object.lanes:\n", + " add_shapely_linestring_to_ax(ax, lane.centerline.linestring, CENTERLINE_CONFIG)\n", "\n", - " # if layer in [MapLayer.ROAD_EDGE]:\n", - " # add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_EDGE_CONFIG)\n", + " # if layer in [MapLayer.LANE]:\n", + " # add_shapely_linestring_to_ax(ax, map_object.centerline.linestring, CENTERLINE_CONFIG)\n", + " # add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer])\n", "\n", - " # if layer in [MapLayer.ROAD_LINE]:\n", - " # # line_type = int(map_object.road_line_type)\n", - " # # plt_config = PlotConfig(\n", - " # # fill_color=NEW_TAB_10[line_type % (len(NEW_TAB_10) - 1)],\n", - " # # fill_color_alpha=1.0,\n", - " # # line_color=NEW_TAB_10[line_type % (len(NEW_TAB_10) - 1)],\n", - " # # line_color_alpha=1.0,\n", - " # # line_width=1.5,\n", - " # # line_style=\"-\",\n", - " # # zorder=10,\n", - " # # )\n", - " # add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_LINE_CONFIG)\n", + " if layer in [MapLayer.ROAD_EDGE]:\n", + " add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_EDGE_CONFIG)\n", + "\n", + " if layer in [MapLayer.ROAD_LINE]:\n", + " # line_type = int(map_object.road_line_type)\n", + " # plt_config = PlotConfig(\n", + " # fill_color=NEW_TAB_10[line_type % (len(NEW_TAB_10) - 1)],\n", + " # fill_color_alpha=1.0,\n", + " # line_color=NEW_TAB_10[line_type % (len(NEW_TAB_10) - 1)],\n", + " # line_color_alpha=1.0,\n", + " # line_width=1.5,\n", + " # line_style=\"-\",\n", + " # zorder=10,\n", + " # )\n", + " add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_LINE_CONFIG)\n", "\n", " except Exception:\n", " import traceback\n", "\n", - " print(f\"Error adding map object of type {layer.name} and id {map_object.id}\")\n", + " print(f\"Error adding map object of type {layer.name} and id {map_object.object_id}\")\n", " traceback.print_exc()\n", "\n", - " ax.set_title(f\"Map: {map_api.map_name}\")\n", + " # ax.set_title(f\"Map: {map_api.map_name}\")\n", "\n", "\n", "def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes:\n", "\n", " ego_vehicle_state = scene.get_ego_state_at_iteration(iteration)\n", " box_detections = scene.get_box_detections_at_iteration(iteration)\n", + " map_api = scene.get_map_api()\n", "\n", " point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d\n", - " # add_debug_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", - " add_default_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", - " # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.map_api)\n", + " if map_api is not None:\n", + " # add_debug_map_on_ax(ax, scene.get_map_api(), point_2d, radius=radius, route_lane_group_ids=None)\n", + "\n", + "\n", + " add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", + " # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.get_map_api())\n", "\n", " add_box_detections_to_ax(ax, box_detections)\n", " add_ego_vehicle_to_ax(ax, ego_vehicle_state)\n", @@ -236,19 +243,18 @@ " return fig, ax\n", "\n", "\n", - "scene_index = 9\n", - "iteration = 99\n", - "fig, ax = plot_scene_at_iteration(scenes[scene_index], iteration=iteration, radius=30)\n", - "plt.show()\n", - "\n", - "# camera = scenes[scene_index].get_camera_at_iteration(\n", - "# iteration=iteration, camera_type=CameraType.CAM_F0\n", - "# )\n", + "# scene_index =\n", + "iteration = 1\n", "\n", - "# plt.imshow(camera.image, cmap=\"gray\", vmin=0, vmax=255)\n", - "# # fig.savefig(f\"/home/daniel/scene_{scene_index}_iteration_1.pdf\", dpi=300, bbox_inches=\"tight\")\n", + "scale = 10\n", + "fig, ax = plt.subplots(1, 1, figsize=(scale, scale))\n", + "scene = np.random.choice(scenes)\n", + "_plot_scene_on_ax(ax, scene, iteration, radius=80)\n", + "# _plot_scene_on_ax(ax[1], scene, iteration, radius=50)\n", + "# _plot_scene_on_ax(ax[2], scene, iteration,\n", + "# radius=100)\n", "\n", - "# scenes[scene_index].log_name" + "plt.show()" ] }, { @@ -257,83 +263,12 @@ "id": "3", "metadata": {}, "outputs": [], - "source": [ - "scene = scenes[scene_index]\n", - "\n", - "\n", - "scene.get_camera_at_iteration(camera_type=CameraType.CAM_F0, iteration=0)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import get_road_edge_linear_rings\n", - "from d123.dataset.maps.gpkg.gpkg_map import GPKGMap\n", - "\n", - "\n", - "map_api: GPKGMap = scenes[scene_index].map_api\n", - "\n", - "drivable_polygons = map_api._gpd_dataframes[MapLayer.LANE]\n", - "\n", - "\n", - "\n", - "linear_rings = get_road_edge_linear_rings(drivable_polygons.geometry.tolist())\n", - "rings_lengths = [ring.length for ring in linear_rings]\n", - "max_length_idx = np.argmax(rings_lengths)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "size = 16\n", - "fig, ax = plt.subplots(figsize=(size, size))\n", - "\n", - "for idx, ring in enumerate(linear_rings):\n", - " if idx == max_length_idx:\n", - " ax.plot(*ring.xy, color=\"black\", linewidth=2, label=\"Longest Road Edge\")\n", - " else:\n", - " ax.plot(*ring.xy)\n", - "\n", - "\n", - "ax.set_aspect(\"equal\", adjustable=\"box\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], "source": [] } ], "metadata": { "kernelspec": { - "display_name": "d123", + "display_name": "py123d", "language": "python", "name": "python3" }, @@ -347,7 +282,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.11" + "version": "3.12.12" } }, "nbformat": 4, diff --git a/notebooks/bev_render.ipynb b/notebooks/bev_render.ipynb new file mode 100644 index 00000000..1bc41014 --- /dev/null +++ b/notebooks/bev_render.ipynb @@ -0,0 +1,131 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "# from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "log_names = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", + "\n", + "scene_filter = SceneFilter(\n", + " split_names=splits,\n", + " log_names=log_names,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=None,\n", + " history_s=0.0,\n", + " timestamp_threshold_s=None,\n", + " shuffle=True,\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", + ")\n", + "scene_builder = ArrowSceneBuilder()\n", + "worker = Sequential()\n", + "scenes = scene_builder.get_scenes(scene_filter, worker)\n", + "\n", + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.visualization.matplotlib.plots import render_scene_animation\n", + "\n", + "for i in [0]:\n", + " render_scene_animation(scenes[i], output_path=\"test\", format=\"mp4\", fps=20, step=1, radius=50)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py123d", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/camera_matplotlib.ipynb b/notebooks/camera_matplotlib.ipynb new file mode 100644 index 00000000..b33cfdd8 --- /dev/null +++ b/notebooks/camera_matplotlib.ipynb @@ -0,0 +1,162 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# splits = [\"wopd_val\"]\n", + "# splits = [\"carla_test\"]\n", + "# splits = [\"nuplan-mini_test\"]\n", + "# splits = [\"av2-sensor-mini_train\"]\n", + "\n", + "\n", + "splits = [\"pandaset_train\"]\n", + "# log_names = None\n", + "\n", + "\n", + "log_names = None\n", + "scene_uuids = None\n", + "\n", + "scene_filter = SceneFilter(\n", + " split_names=splits,\n", + " log_names=log_names,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=6.0,\n", + " history_s=0.0,\n", + " timestamp_threshold_s=20,\n", + " shuffle=True,\n", + " pinhole_camera_types=[PinholeCameraType.PCAM_F0],\n", + ")\n", + "scene_builder = ArrowSceneBuilder()\n", + "worker = Sequential()\n", + "# worker = RayDistributed()\n", + "scenes = scene_builder.get_scenes(scene_filter, worker)\n", + "\n", + "print(f\"Found {len(scenes)} scenes\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from py123d.datatypes.scene.abstract_scene import AbstractScene\n", + "from py123d.visualization.matplotlib.camera import add_box_detections_to_camera_ax, add_camera_ax\n", + "\n", + "iteration = 0\n", + "scene = scenes[0]\n", + "\n", + "scene: AbstractScene\n", + "print(scene.uuid, scene.available_pinhole_camera_types)\n", + "\n", + "scale = 3.0\n", + "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", + "\n", + "\n", + "camera_ax_mapping = {\n", + " PinholeCameraType.PCAM_L0: ax[0, 0],\n", + " PinholeCameraType.PCAM_F0: ax[0, 1],\n", + " PinholeCameraType.PCAM_R0: ax[0, 2],\n", + " PinholeCameraType.PCAM_L1: ax[1, 0],\n", + " PinholeCameraType.PCAM_B0: ax[1, 1],\n", + " PinholeCameraType.PCAM_R1: ax[1, 2],\n", + "}\n", + "\n", + "\n", + "for camera_type, ax_ in camera_ax_mapping.items():\n", + " camera = scene.get_pinhole_camera_at_iteration(iteration, camera_type)\n", + " box_detections = scene.get_box_detections_at_iteration(iteration)\n", + " ego_state = scene.get_ego_state_at_iteration(iteration)\n", + "\n", + " add_box_detections_to_camera_ax(\n", + " ax_,\n", + " camera,\n", + " box_detections,\n", + " ego_state,\n", + " )\n", + " ax_.set_title(f\"Camera: {camera_type.name}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py123d", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/camera_render.ipynb b/notebooks/camera_render.ipynb new file mode 100644 index 00000000..4365c424 --- /dev/null +++ b/notebooks/camera_render.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType\n", + "\n", + "KITTI360_DATA_ROOT = \"/home/daniel/kitti_360/KITTI-360\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "log_names = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", + "\n", + "scene_filter = SceneFilter(\n", + " split_names=splits,\n", + " log_names=log_names,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=None,\n", + " history_s=0.0,\n", + " timestamp_threshold_s=None,\n", + " shuffle=True,\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", + ")\n", + "scene_builder = ArrowSceneBuilder()\n", + "worker = Sequential()\n", + "scenes = scene_builder.get_scenes(scene_filter, worker)\n", + "\n", + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from py123d.datatypes.scene.abstract_scene import AbstractScene\n", + "from py123d.visualization.matplotlib.camera import add_box_detections_to_camera_ax, add_camera_ax\n", + "import imageio\n", + "import numpy as np\n", + "\n", + "iteration = 0\n", + "scene = scenes[0]\n", + "\n", + "scene: AbstractScene\n", + "fps = 15 # frames per second\n", + "output_file = f\"camera_{scene.log_metadata.split}_{scene.uuid}.mp4\"\n", + "\n", + "writer = imageio.get_writer(output_file, fps=fps)\n", + "\n", + "scale = 3.0\n", + "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", + "\n", + "\n", + "camera_type = PinholeCameraType.CAM_F0\n", + "\n", + "for i in range(scene.number_of_iterations):\n", + " camera = scene.get_camera_at_iteration(i, camera_type)\n", + " box_detections = scene.get_box_detections_at_iteration(i)\n", + " ego_state = scene.get_ego_state_at_iteration(i)\n", + "\n", + " _, image = add_box_detections_to_camera_ax(\n", + " None,\n", + " camera,\n", + " box_detections,\n", + " ego_state,\n", + " return_image=True,\n", + " )\n", + " writer.append_data(image)\n", + "\n", + "writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py123d", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/notebooks/carla/vehicle_params.ipynb b/notebooks/carla/vehicle_params.ipynb deleted file mode 100644 index 319b0ae8..00000000 --- a/notebooks/carla/vehicle_params.ipynb +++ /dev/null @@ -1,127 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import gzip\n", - "import json\n", - "from typing import Dict\n", - "\n", - "\n", - "path = Path(\"/home/daniel/carla_workspace/data/_Rep0_longest1_route0_07_04_10_18_47/boxes\")\n", - "\n", - "def _load_json_gz(path: Path) -> Dict:\n", - " \"\"\"Helper function to load a gzipped JSON file.\"\"\"\n", - " with gzip.open(path, \"rt\") as f:\n", - " data = json.load(f)\n", - " return data\n", - "\n", - "bounding_box_paths = sorted([bb_path for bb_path in path.iterdir()])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "width = [] \n", - "length = [] \n", - "height = []\n", - "wheel_base = [] \n", - "rear_axle_to_center_vertical = [] \n", - "rear_axle_to_center_longitudinal = [] \n", - "for bb_path in bounding_box_paths:\n", - " bb_dict = _load_json_gz(bb_path)\n", - " vehicle_parameters = bb_dict[\"vehicle_parameters\"]\n", - " width.append(vehicle_parameters[\"width\"])\n", - " length.append(vehicle_parameters[\"length\"])\n", - " height.append(vehicle_parameters[\"height\"])\n", - " wheel_base.append(vehicle_parameters[\"wheel_base\"])\n", - " rear_axle_to_center_vertical.append(vehicle_parameters[\"rear_axle_to_center_vertical\"])\n", - " rear_axle_to_center_longitudinal.append(vehicle_parameters[\"rear_axle_to_center_longitudinal\"])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "print(\"width\", np.mean(width), np.std(width))\n", - "print(\"length\", np.mean(length), np.std(length))\n", - "print(\"height\", np.mean(height), np.std(height))\n", - "print(\"wheel_base\", np.mean(wheel_base), np.std(wheel_base))\n", - "print(\"rear_axle_to_center_vertical\", np.mean(rear_axle_to_center_vertical), np.std(rear_axle_to_center_vertical))\n", - "print(\"rear_axle_to_center_longitudinal\", np.mean(rear_axle_to_center_longitudinal), np.std(rear_axle_to_center_longitudinal))\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "print(\"width\", round(np.median(width), 5), np.std(width))\n", - "print(\"length\", round(np.median(length), 5), np.std(length))\n", - "print(\"height\", round(np.median(height), 5), np.std(height))\n", - "print(\"wheel_base\", round(np.median(wheel_base), 5), np.std(wheel_base))\n", - "print(\"rear_axle_to_center_vertical\", round(np.median(rear_axle_to_center_vertical), 5), np.std(rear_axle_to_center_vertical))\n", - "print(\"rear_axle_to_center_longitudinal\", round(np.median(rear_axle_to_center_longitudinal), 5), np.std(rear_axle_to_center_longitudinal))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "vehicle_parameters[\"vehicle_name\"]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/dataset/dataset.ipynb b/notebooks/dataset/dataset.ipynb deleted file mode 100644 index 83f602a1..00000000 --- a/notebooks/dataset/dataset.ipynb +++ /dev/null @@ -1,148 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "s3_uri = \"d123testing/nuplan_private_test/2021.07.25.16.16.23_veh-26_02446_02589.arrow\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import pyarrow as pa\n", - "import pyarrow.fs as fs\n", - "import pyarrow.dataset as ds\n", - "\n", - "import os\n", - "\n", - "s3_fs = fs.S3FileSystem(\n", - " access_key=os.environ.get('AWS_ACCESS_KEY_ID'),\n", - " secret_key=os.environ.get('AWS_SECRET_ACCESS_KEY'),\n", - " region=os.environ.get('AWS_DEFAULT_REGION')\n", - ")\n", - "from d123.common.utils.timer import Timer\n", - "\n", - "\n", - "timer = Timer()\n", - "timer.start()\n", - "\n", - "dataset = ds.dataset(f\"{s3_uri}\", format=\"ipc\", filesystem=s3_fs)\n", - "timer.log(\"1. Dataset loaded\")\n", - "\n", - "# Get all column names and remove the ones you want to drop\n", - "all_columns = dataset.schema.names\n", - "columns_to_keep = [col for col in all_columns if col not in [\"front_cam_demo\", \"front_cam_transform\"]]\n", - "timer.log(\"2. Columns filtered\")\n", - "\n", - "table = dataset.to_table(columns=columns_to_keep)\n", - "timer.log(\"3. Table created\")\n", - "# Save locally\n", - "with pa.ipc.new_file(\"filtered_file.arrow\", table.schema) as writer:\n", - " writer.write_table(table)\n", - "timer.log(\"4. Table saved locally\")\n", - "\n", - "timer.end()\n", - "timer.stats(verbose=False)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "timer = Timer()\n", - "timer.start()\n", - "\n", - "table = dataset.take([i for i in range(0, 100, 1)], columns=all_columns)\n", - "timer.log(\"3. Table created\")\n", - "\n", - "# Save locally\n", - "with pa.ipc.new_file(\"filtered_file_v2.arrow\", table.schema) as writer:\n", - " writer.write_table(table)\n", - "\n", - "timer.log(\"4. Table saved locally\")\n", - "timer.end()\n", - "timer.stats(verbose=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "# with pa.memory_map(\"filtered_file_v2.arrow\", \"r\") as source:\n", - "# mmap_table = pa.ipc.open_file(source).read_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# import pyarrow.parquet as pq\n", - "# import pyarrow.dataset as ds\n", - "# import pyarrow.dataset as ds\n", - "# import pyarrow as pa\n", - "# from huggingface_hub import hf_hub_download\n", - "# from io import BytesIO\n", - "\n", - "# # Download specific file from the repo\n", - "# file_path = hf_hub_download(\n", - "# repo_id=\"DanielDauner/delete_me\",\n", - "# filename=\"2021.05.25.14.16.10_veh-35_01690_02183.arrow\", # or whatever the file is named\n", - "# repo_type=\"dataset\"\n", - "# )\n", - "\n", - "# # Read with PyArrow\n", - "# with pa.memory_map(file_path, \"r\") as source:\n", - "# mmap_table = pa.ipc.open_file(source).read_all()\n", - "\n", - "# # Create dataset and slice\n", - "# sliced_table = mmap_table.slice(0, 1000) # First 1000 rows\n", - "# sliced_table" - ] - }, - { - "cell_type": "markdown", - "id": "5", - "metadata": {}, - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.21" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/dataset/parquet_testing.ipynb b/notebooks/dataset/parquet_testing.ipynb deleted file mode 100644 index 13b3fed7..00000000 --- a/notebooks/dataset/parquet_testing.ipynb +++ /dev/null @@ -1,133 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import pyarrow as pa\n", - "\n", - "import pyarrow.ipc as ipc\n", - "import pyarrow.parquet as pq\n", - "\n", - "import os, psutil\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "with pa.memory_map(\"filtered_file_v2.arrow\", \"r\") as source:\n", - " reader = ipc.RecordBatchFileReader(source)\n", - " table = reader.read_all()\n", - "\n", - "pq.write_table(table, \"filtered_file_v2.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "print_memory_usage()\n", - "\n", - "parquet_table = pq.read_table(\"/home/daniel/Downloads/episode_000000.parquet\")\n", - "\n", - "\n", - "print_memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "print_memory_usage()\n", - "\n", - "with pa.memory_map(\"filtered_file_v2.arrow\", \"r\") as source:\n", - " reader = ipc.RecordBatchFileReader(source)\n", - " arrow_table = reader.read_all()\n", - "\n", - "\n", - "print_memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "\n", - "parquet_web_file = \"https://huggingface.co/datasets/yaak-ai/L2D/raw/main/data/chunk-000/episode_000000.parquet\"\n", - "response = requests.get(parquet_web_file)\n", - "with pa.BufferReader(response.content) as source:\n", - " web_table = pq.read_table(source)\n", - "\n", - "\n", - "web_table" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "import pyarrow as pa\n", - "import pyarrow.fs as fs\n", - "import pyarrow.dataset as ds\n", - "import pyarrow as pa\n", - "\n", - "# s3_fs = fs.S3FileSystem()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/arrow.ipynb b/notebooks/deprecated/arrow.ipynb deleted file mode 100644 index 8fa5d9bb..00000000 --- a/notebooks/deprecated/arrow.ipynb +++ /dev/null @@ -1,178 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "print_memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from geoarrow.pyarrow import io\n", - "import tempfile\n", - "import geopandas\n", - "import os\n", - "\n", - "\n", - "import shapely.geometry as geom\n", - "from d123.dataset.maps.gpkg.gpkg_map import GPKGMap\n", - "from d123.dataset.maps.map_datatypes import MapLayer\n", - "\n", - "\n", - "temp_gpkg = \"/home/daniel/d123_workspace/data/maps/carla_town01.gpkg\"\n", - "\n", - "print_memory_usage()\n", - "map_api = GPKGMap(temp_gpkg)\n", - "print_memory_usage()\n", - "map_api.initialize()\n", - "print_memory_usage()\n", - "\n", - "\n", - "table = map_api._gpd_dataframes[MapLayer.LANE]\n", - "\n", - "# import pyarrow as pa\n", - "# pa.table(table).schema\n", - "\n", - "print_memory_usage()\n", - "inter = table.sindex.query(geom.box(-10, -10, 10, 10), predicate='intersects')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "import geoarrow.pyarrow as ga\n", - "\n", - "\n", - "from shapely import node\n", - "from shapely.strtree import STRtree\n", - "ga_array = ga.as_geoarrow([str(i) for i in pa_table[\"left_boundary\"]])\n", - "# ga_array.wkb()\n", - "print_memory_usage()\n", - "\n", - "STRtree([el.to_shapely() for el in ga_array], node_capacity=10)\n", - "print_memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "import pyarrow.ipc as ipc\n", - "import pyarrow as pa\n", - "\n", - "# Save to Arrow IPC file\n", - "with pa.OSFile(\"data.arrow\", \"wb\") as sink:\n", - " with ipc.new_file(sink, pa_table.schema) as writer:\n", - " writer.write_table(pa_table)\n", - "\n", - "# Load with memory mapping\n", - "with pa.memory_map(\"data.arrow\", \"r\") as source:\n", - " pa_table = ipc.open_file(source).read_all()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "import geoarrow.pandas as gap\n", - "\n", - "\n", - "gap.GeoArrowExtensionDtype" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "print_memory_usage()\n", - "# Load with memory mapping\n", - "with pa.memory_map('data.arrow', 'r') as source:\n", - " table_mmap = ipc.open_file(source).read_all()\n", - "print_memory_usage()\n", - "\n", - "STRtree([el.to_shapely() for el in table_mmap[\"geometry\"]], node_capacity=10)\n", - "print_memory_usage()\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "from shapely.geometry import mapping\n", - "table_mmap[\"geom\"].apply(lambda geom: mapping(geom.to_shapely()) if geom else None)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/collect_sim_metrics_gym.ipynb b/notebooks/deprecated/collect_sim_metrics_gym.ipynb deleted file mode 100644 index 40100cd6..00000000 --- a/notebooks/deprecated/collect_sim_metrics_gym.ipynb +++ /dev/null @@ -1,154 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "from d123.common.multithreading.worker_sequential import Sequential\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "import numpy as np\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "\n", - "num_scenes = 100\n", - "split = \"nuplan_mini_test\"\n", - "\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "scene_tokens = None\n", - "# scene_tokens = [\"1b48bd8c60b3790a\"]\n", - "log_names = None\n", - "\n", - "scene_filter = SceneFilter(split_names=[split], log_names=log_names, scene_tokens=scene_tokens, duration_s=8.1, history_s=1.0)\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "np.random.seed(42)\n", - "scenes = list(np.random.choice(scenes, size=num_scenes, replace=False))\n", - "\n", - "# scenes = [scene for scene in scenes if scene.token ==\"be74699d84ec5662\"]\n", - "\n", - "experiment_name = None\n", - "print([scene.token for scene in scenes])\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import trace\n", - "import traceback\n", - "from tqdm import tqdm\n", - "from d123.dataset.arrow.conversion import BoxDetectionWrapper, DetectionType\n", - "from d123.simulation.gym.demo_gym_env import DemoGymEnv\n", - "from d123.simulation.observation.agents_observation import _filter_agents_by_type\n", - "\n", - "from typing import List\n", - "\n", - "from d123.simulation.metrics.sim_agents.sim_agents import get_sim_agents_metrics\n", - "\n", - "from d123.dataset.arrow.conversion import BoxDetection, DetectionType\n", - "from d123.dataset.recording.detection.detection import BoxDetectionWrapper\n", - "\n", - "import time\n", - "\n", - "import pandas as pd\n", - "\n", - "action = [1.0, 0.1] # Placeholder action, replace with actual action logic\n", - "env = DemoGymEnv(scenes)\n", - "\n", - "results = []\n", - "\n", - "for scene in tqdm(scenes):\n", - " try:\n", - "\n", - " agent_rollouts = []\n", - "\n", - " map_api, ego_state, detection_observation, current_scene = env.reset(scene)\n", - " initial_ego_state = ego_state\n", - " agent_rollouts.append(detection_observation.box_detections)\n", - "\n", - " result = {}\n", - " result[\"token\"] = scene.token\n", - " for i in range(150):\n", - " ego_state, detection_observation, end = env.step(action)\n", - " agent_rollouts.append(detection_observation.box_detections)\n", - " if end:\n", - " break\n", - " result.update(get_sim_agents_metrics(current_scene, agent_rollouts))\n", - " results.append(result)\n", - " except Exception as e:\n", - " print(current_scene.token)\n", - " traceback.print_exc()\n", - " continue\n", - "\n", - "\n", - "\n", - "df = pd.DataFrame(results)\n", - "avg_row = df.drop(columns=['token']).mean(numeric_only=True)\n", - "avg_row['token'] = 'average'\n", - "df = pd.concat([df, pd.DataFrame([avg_row])], ignore_index=True)\n", - "df.to_csv(\"test.csv\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/extraction_testing.ipynb b/notebooks/deprecated/extraction_testing.ipynb deleted file mode 100644 index d782d7ba..00000000 --- a/notebooks/deprecated/extraction_testing.ipynb +++ /dev/null @@ -1,166 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "from functools import partial\n", - "from pathlib import Path\n", - "from typing import Iterator, List, Optional, Set, Union\n", - "\n", - "\n", - "import pyarrow as pa\n", - "from d123.common.multithreading.worker_pool import WorkerPool\n", - "\n", - "from d123.dataset.arrow.helper import open_arrow_arrow_table\n", - "from d123.dataset.dataset_specific.nuplan.nuplan_data_processor import worker_map\n", - "from d123.dataset.logs.log_metadata import LogMetadata\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "from d123.dataset.scene.arrow_scene import ArrowScene, SceneExtractionInfo\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "DURATION_SECONDS = 10.0 \n", - "HISTORY_SECONDS = 3.0\n", - "ITERATION_DURATION_SECONDS = 0.1\n", - "\n", - "\n", - "\n", - "log_path = \"/home/daniel/d123_workspace/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow\"\n", - "\n", - "recording_table = open_arrow_arrow_table(log_path)\n", - "log_metadata = LogMetadata.from_arrow_table(recording_table)\n", - "\n", - "\n", - "# scene_tokens = [str(token) for token in np.random.choice(recording_table.column(\"token\").to_pylist(), size=10)]\n", - "scene_tokens = [\"\"]\n", - "timestamp_threshold_s: float = 10.0\n", - "# timestamp_threshold_s = None\n", - "filter = SceneFilter(scene_tokens=scene_tokens, timestamp_threshold_s=timestamp_threshold_s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "scene_tokens" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "def _get_scene_extraction_info(log_path: str, filter: SceneFilter) -> List[SceneExtractionInfo]:\n", - " scene_extraction_infos: List[SceneExtractionInfo] = []\n", - "\n", - " recording_table = open_arrow_arrow_table(log_path)\n", - " log_metadata = LogMetadata.from_arrow_table(recording_table)\n", - "\n", - " # 1. Filter map name\n", - " if filter.map_names is not None and log_metadata.map_name not in filter.map_names:\n", - " return scene_extraction_infos\n", - "\n", - " start_idx = int(filter.history_s / log_metadata.timestep_seconds)\n", - " end_idx = len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds)\n", - "\n", - " scene_token_set = set(filter.scene_tokens) if filter.scene_tokens else None\n", - "\n", - " for idx in range(start_idx, end_idx):\n", - " scene_extraction_info: Optional[SceneExtractionInfo] = None\n", - "\n", - " if scene_token_set is None:\n", - " scene_extraction_info = SceneExtractionInfo(\n", - " initial_token=str(recording_table[\"token\"][idx]),\n", - " initial_idx=idx,\n", - " duration_s=filter.duration_s,\n", - " history_s=filter.history_s,\n", - " iteration_duration_s=ITERATION_DURATION_SECONDS,\n", - " )\n", - " elif str(recording_table[\"token\"][idx]) in scene_token_set:\n", - " scene_extraction_info = SceneExtractionInfo(\n", - " initial_token=str(recording_table[\"token\"][idx]),\n", - " initial_idx=idx,\n", - " duration_s=filter.duration_s,\n", - " history_s=filter.history_s,\n", - " iteration_duration_s=ITERATION_DURATION_SECONDS,\n", - " )\n", - "\n", - " if scene_extraction_info is not None:\n", - " # TODO: add more options\n", - " if filter.timestamp_threshold_s is not None and len(scene_extraction_infos) > 0:\n", - " iteration_delta = idx - scene_extraction_infos[-1].initial_idx\n", - " if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s:\n", - " continue\n", - "\n", - " scene_extraction_infos.append(scene_extraction_info)\n", - "\n", - " del recording_table, log_metadata\n", - " return scene_extraction_infos\n", - "\n", - "\n", - "scenes = _get_scene_extraction_info(log_path, filter)\n", - "len(scenes)\n", - "\n", - "# 4580" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "_get_scene_extraction_info(log_path, filter)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/map_conversion/test_nuplan_conversion.ipynb b/notebooks/deprecated/map_conversion/test_nuplan_conversion.ipynb deleted file mode 100644 index 32805c97..00000000 --- a/notebooks/deprecated/map_conversion/test_nuplan_conversion.ipynb +++ /dev/null @@ -1,71 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.dataset.dataset_specific.nuplan.nuplan_map_conversion import NuPlanMapConverter, MAP_LOCATIONS\n", - "\n", - "\n", - "\n", - "\n", - "# file_to_delete = Path(\"nuplan_us-pa-pittsburgh-hazelwood.gpkg\")\n", - "# file_to_delete = Path(\"nuplan_us-pa-pittsburgh-hazelwood.gpkg\")\n", - "# if file_to_delete.exists():\n", - "# file_to_delete.unlink()\n", - "\n", - "# NuPlanMapConverter().convert()\n", - "root = Path(\"/home/daniel/d123_workspace/data\") / \"maps\"\n", - "\n", - "\n", - "for map_name in MAP_LOCATIONS:\n", - " print(f\"Converting {map_name} map...\")\n", - " file_to_delete = root / f\"nuplan_{map_name}.gpkg\"\n", - " if file_to_delete.exists():\n", - " file_to_delete.unlink()\n", - "\n", - " NuPlanMapConverter(root).convert(map_name=map_name)\n", - " print(f\"Converting {map_name} map... done\")\n", - "\n", - "# NUPLAN_MAPS_ROOT = os.environ[\"NUPLAN_MAPS_ROOT\"]\n", - "\n", - "# map_file_path = Path(NUPLAN_MAPS_ROOT) / \"us-pa-pittsburgh-hazelwood\" / \"9.17.1937\" / \"map.gpkg\"\n", - "\n", - "# meassure RAM\n", - "\n", - "# gdf_in_pixel_coords = gpd.read_file(map_file_path, layer=\"baseline_paths\", fid_as_index=True)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/deprecated/map_conversion/test_opendrive_conversion.ipynb b/notebooks/deprecated/map_conversion/test_opendrive_conversion.ipynb deleted file mode 100644 index ac7aaded..00000000 --- a/notebooks/deprecated/map_conversion/test_opendrive_conversion.ipynb +++ /dev/null @@ -1,191 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# import matplotlib.pyplot as plt\n", - "# from shapely.geometry import LineString, Polygon, Point\n", - "# import numpy as np\n", - "\n", - "# # from typing import List\n", - "\n", - "# # from d123.dataset.conversion.map.opendrive.elements.lane import Lane, LaneSection\n", - "# # from d123.dataset.conversion.map.opendrive.elements.reference import Border" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.dataset.conversion.map.opendrive.parser.opendrive import OpenDrive\n", - "\n", - "CARLA_MAP_LOCATIONS = [\n", - " \"Town01\", # A small, simple town with a river and several bridges.\n", - " \"Town02\", # A small simple town with a mixture of residential and commercial buildings.\n", - " \"Town03\", # A larger, urban map with a roundabout and large junctions.\n", - " \"Town04\", # A small town embedded in the mountains with a special \"figure of 8\" infinite highway.\n", - " \"Town05\", # Squared-grid town with cross junctions and a bridge. It has multiple lanes per direction. Useful to perform lane changes.\n", - " \"Town06\", # Long many lane highways with many highway entrances and exits. It also has a Michigan left.\n", - " \"Town07\", # A rural environment with narrow roads, corn, barns and hardly any traffic lights.\n", - " \"Town08\", # Secret \"unseen\" town used for the Leaderboard challenge\n", - " \"Town09\", # Secret \"unseen\" town used for the Leaderboard challenge\n", - " \"Town10HD\", # A downtown urban environment with skyscrapers, residential buildings and an ocean promenade.\n", - " \"Town11\", # A Large Map that is undecorated. Serves as a proof of concept for the Large Maps feature.\n", - " \"Town12\", # A Large Map with numerous different regions, including high-rise, residential and rural environments.\n", - " \"Town13\", # ???\n", - " \"Town14\", # Secret \"unseen\" town used for the Leaderboard challenge ???\n", - " \"Town15\", # ???\n", - "]\n", - "\n", - "AVAILABLE_CARLA_MAP_LOCATIONS = [\n", - " \"Town01\", # A small, simple town with a river and several bridges.\n", - " \"Town02\", # A small simple town with a mixture of residential and commercial buildings.\n", - " \"Town03\", # A larger, urban map with a roundabout and large junctions.\n", - " \"Town04\", # A small town embedded in the mountains with a special \"figure of 8\" infinite highway.\n", - " \"Town05\", # Squared-grid town with cross junctions and a bridge. It has multiple lanes per direction. Useful to perform lane changes.\n", - " \"Town06\", # Long many lane highways with many highway entrances and exits. It also has a Michigan left.\n", - " \"Town07\", # A rural environment with narrow roads, corn, barns and hardly any traffic lights.\n", - " \"Town10HD\", # A downtown urban environment with skyscrapers, residential buildings and an ocean promenade.\n", - " \"Town11\", # A Large Map that is undecorated. Serves as a proof of concept for the Large Maps feature.\n", - " \"Town12\", # A Large Map with numerous different regions, including high-rise, residential and rural environments.\n", - " \"Town13\", # ???\n", - " \"Town15\", # ???\n", - "]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# town_name.lower()" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.conversion.map.opendrive.opendrive_converter import OpenDriveConverter\n", - "\n", - "\n", - "\n", - "MAP_ROOT = Path(\"/home/daniel/d123_workspace/data\") / \"maps\"\n", - "\n", - "for town_name in AVAILABLE_CARLA_MAP_LOCATIONS:\n", - " map_name = town_name.lower()\n", - " print(f\"Start {map_name} map...\")\n", - " file_to_delete = MAP_ROOT / f\"carla_{map_name}.gpkg\"\n", - " if file_to_delete.exists():\n", - " print(f\"Unlink {file_to_delete} map...\")\n", - " file_to_delete.unlink()\n", - " print(f\"Unlink {file_to_delete} map... done\")\n", - "\n", - " if town_name not in [\"Town11\", \"Town12\", \"Town13\", \"Town15\"]:\n", - " carla_maps_root = Path(\"/home/daniel/carla_workspace/carla_garage/carla/CarlaUE4/Content/Carla/Maps/OpenDrive\")\n", - " carla_map_path = carla_maps_root / f\"{town_name}.xodr\"\n", - "\n", - " else:\n", - " carla_map_path = f\"/home/daniel/carla_workspace/carla_garage/carla/CarlaUE4/Content/Carla/Maps/{town_name}/OpenDrive/{town_name}.xodr\"\n", - "\n", - " print(f\"Parsing {map_name} map...\")\n", - " opendrive = OpenDrive.parse_from_file(carla_map_path)\n", - " print(f\"Parsing {map_name} map... done\")\n", - "\n", - " print(f\"Converting {map_name} map... done\")\n", - " converter = OpenDriveConverter(opendrive)\n", - " converter.run(f\"carla_{town_name.lower()}\")\n", - " print(f\"Converting {map_name} map... done\")\n", - " # break\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "converter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.maps.map_datatypes import MapLayer\n", - "\n", - "\n", - "str(MapLayer.GENERIC_DRIVABLE).split(\".\")[-1].lower()\n", - "\n", - "\n", - "MapLayer.GENERIC_DRIVABLE.name\n", - "\n", - "MapLayer.deserialize(MapLayer.GENERIC_DRIVABLE.name)\n", - "\n", - "\n", - "MapLayer.GENERIC_DRIVABLE.name.lower().islower()\n", - "\n", - "\n", - "AVAILABLE_MAP_LAYERS = list(MapLayer)\n", - "AVAILABLE_MAP_LAYERS\n", - "\n", - "pyogrio.read_dataframe()." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gdf = converter._extract_walkways_dataframe()\n", - "gdf" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/deprecated/nuplan_map_dataframe.ipynb b/notebooks/deprecated/nuplan_map_dataframe.ipynb deleted file mode 100644 index 113d8391..00000000 --- a/notebooks/deprecated/nuplan_map_dataframe.ipynb +++ /dev/null @@ -1,570 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from nuplan.common.maps.nuplan_map.map_factory import get_maps_api, get_maps_db\n", - "\n", - "MAP_LOCATIONS = {\"sg-one-north\", \"us-ma-boston\", \"us-nv-las-vegas-strip\", \"us-pa-pittsburgh-hazelwood\"}" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "length = {0: 1, 1: 2, 2: 3, 3: 4, 4: 0}\n", - "max(length, key=length.get)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pyogrio\n", - "\n", - "\n", - "# Path to your .gpkg file\n", - "# path_to_gpkg = \"/home/daniel/nuplan/dataset/maps/us-nv-las-vegas-strip/9.15.1915/map.gpkg\"\n", - "path_to_gpkg = \"/home/daniel/nuplan/dataset/maps/us-ma-boston/9.12.1817/map.gpkg\"\n", - "# path_to_gpkg = \"/home/daniel/nuplan/dataset/maps/us-pa-pittsburgh-hazelwood/9.17.1937/map.gpkg\"\n", - "# path_to_gpkg = \"/home/daniel/nuplan/dataset/maps/sg-one-north/9.17.1964/map.gpkg\"\n", - "\n", - "\n", - "# List all available layers\n", - "layers = [\n", - " \"baseline_paths\",\n", - " \"carpark_areas\",\n", - " \"generic_drivable_areas\",\n", - " \"dubins_nodes\",\n", - " \"lane_connectors\",\n", - " \"intersections\",\n", - " \"boundaries\",\n", - " \"crosswalks\",\n", - " \"lanes_polygons\",\n", - " \"lane_group_connectors\",\n", - " \"lane_groups_polygons\",\n", - " \"walkways\",\n", - " \"gen_lane_connectors_scaled_width_polygons\",\n", - " \"meta\",\n", - "]\n", - "\n", - "\n", - "def load_layer(layer_name: str) -> gpd.geodataframe:\n", - "\n", - " map_meta = gpd.read_file(path_to_gpkg, layer=\"meta\", engine=\"pyogrio\")\n", - " projection_system = map_meta[map_meta[\"key\"] == \"projectedCoordSystem\"][\"value\"].iloc[0]\n", - "\n", - " gdf_in_pixel_coords = pyogrio.read_dataframe(path_to_gpkg, layer=layer_name, fid_as_index=True)\n", - " gdf_in_utm_coords = gdf_in_pixel_coords.to_crs(projection_system)\n", - "\n", - " # For backwards compatibility, cast the index to string datatype.\n", - " # and mirror it to the \"fid\" column.\n", - " gdf_in_utm_coords.index = gdf_in_utm_coords.index.map(str)\n", - " gdf_in_utm_coords[\"fid\"] = gdf_in_utm_coords.index\n", - "\n", - " return gdf_in_utm_coords\n", - "\n", - "\n", - "pyogrio.read_dataframe(path_to_gpkg)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "\n", - "layers = [\n", - " \"baseline_paths\",\n", - " \"carpark_areas\",\n", - " \"generic_drivable_areas\",\n", - " \"dubins_nodes\",\n", - " \"lane_connectors\",\n", - " \"intersections\",\n", - " \"boundaries\",\n", - " \"crosswalks\",\n", - " \"lanes_polygons\",\n", - " \"lane_group_connectors\",\n", - " \"lane_groups_polygons\",\n", - " \"road_segments\",\n", - " \"stop_polygons\",\n", - " \"traffic_lights\",\n", - " \"walkways\",\n", - " \"gen_lane_connectors_scaled_width_polygons\",\n", - " \"meta\",\n", - "]\n", - "\n", - "\n", - "def non_nan_set(series: pd.Series) -> set:\n", - " return set(series.dropna().values)\n", - "\n", - "\n", - "lanes = load_layer(\"intersections\")\n", - "boundaries = load_layer(\"boundaries\")\n", - "\n", - "\n", - "def get_lane(id: int):\n", - " return lanes[lanes[\"fid\"] == str(id)]\n", - "\n", - "\n", - "def get_right_boundary(lane):\n", - " return boundaries[boundaries[\"fid\"] == str(lane[\"right_boundary_fid\"].iloc[0])]\n", - "\n", - "\n", - "def get_left_boundary(lane):\n", - " return boundaries[boundaries[\"fid\"] == str(lane[\"left_boundary_fid\"].iloc[0])]\n", - "\n", - "\n", - "# boundaries\n", - "\n", - "# correct_lane_fid = 47716\n", - "# error_lane_fid = 48508\n", - "\n", - "# correct_lane_df = get_lane(correct_lane_fid)\n", - "# error_lane_df = get_lane(error_lane_fid)\n", - "\n", - "\n", - "# 46552,46553\n", - "# test_id = 46553\n", - "\n", - "print(lanes.keys())\n", - "non_nan_set(lanes.intersection_type_fid)\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "baseline_paths = load_layer(\"baseline_paths\")\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from networkx import center\n", - "\n", - "# Create a figure\n", - "fig, ax = plt.subplots(figsize=(15, 12))\n", - "\n", - "# Get unique path_type_fid values\n", - "path_types = baseline_paths[\"path_type_fid\"].unique()\n", - "\n", - "# Define colors for different path types\n", - "colors = [\"blue\", \"red\", \"green\", \"brown\", \"purple\", \"orange\", \"pink\"]\n", - "\n", - "# Plot each path type with a different color\n", - "target_label = 0\n", - "for i, path_type in enumerate(path_types):\n", - " # Filter baseline_paths by path_type\n", - " paths_of_type = baseline_paths[baseline_paths[\"path_type_fid\"] == path_type]\n", - "\n", - " if path_type == target_label:\n", - " alpha = 1.0\n", - " target_fids = paths_of_type.fid.tolist()\n", - " else:\n", - " alpha = 0.5\n", - "\n", - " # Plot these paths with a specific color\n", - " paths_of_type.plot(ax=ax, color=colors[i % len(colors)], label=f\"Path Type {path_type}\", alpha=alpha)\n", - "\n", - "# Add title and legend\n", - "ax.set_title(\"Baseline Paths by Path Type\", fontsize=16)\n", - "ax.legend()\n", - "ax.set_xlabel(\"X Coordinate\")\n", - "ax.set_ylabel(\"Y Coordinate\")\n", - "ax.axis(\"equal\") # Maintain aspect ratio\n", - "\n", - "center_zoom = False # Set to True to zoom into the center of the data\n", - "fid_zoom = target_fids[25]\n", - "\n", - "if center_zoom:\n", - " # Get the bounds of the data for better focusing\n", - " x_min, y_min, x_max, y_max = baseline_paths.total_bounds\n", - "\n", - " # Calculate center coordinates\n", - " center_x = (x_min + x_max) / 2\n", - " center_y = (y_min + y_max) / 2\n", - "\n", - " # Set axis limits to zoom into the center (using 30% of the total range)\n", - " range_x = x_max - x_min\n", - " range_y = y_max - y_min\n", - " zoom_factor = 2.0\n", - "\n", - " ax.set_xlim(center_x - range_x * zoom_factor / 2, center_x + range_x * zoom_factor / 2)\n", - " ax.set_ylim(center_y - range_y * zoom_factor / 2, center_y + range_y * zoom_factor / 2)\n", - "\n", - "elif fid_zoom:\n", - " # Filter to get the specific element with the given FID\n", - " specific_path = baseline_paths[baseline_paths[\"fid\"] == str(fid_zoom)]\n", - "\n", - " if not specific_path.empty:\n", - " # Get the bounds of the specific path\n", - " x_min, y_min, x_max, y_max = specific_path.total_bounds\n", - "\n", - " # Add some padding around the element\n", - " padding = 100 # meters\n", - " ax.set_xlim(x_min - padding, x_max + padding)\n", - " ax.set_ylim(y_min - padding, y_max + padding)\n", - "\n", - " # Highlight the specific element\n", - " # specific_path.plot(ax=ax, color=\"yellow\", linewidth=3, zorder=10)\n", - "\n", - " # Update title to show we're zoomed to a specific FID\n", - " ax.set_title(f\"Baseline Path - FID {fid_zoom}\", fontsize=16)\n", - " else:\n", - " print(f\"FID {fid_zoom} not found in baseline_paths\")\n", - "\n", - "\n", - "# Add a title to indicate we're looking at the center\n", - "ax.set_title(\"Baseline Paths by Path Type (Center Zoom)\", fontsize=16)\n", - "\n", - "\n", - "# Convert the specific path coordinates to WGS84 (latitude/longitude) for Google Maps\n", - "if fid_zoom and not specific_path.empty:\n", - " # Create a copy to avoid modifying the original\n", - " wgs84_path = specific_path.copy()\n", - "\n", - " # Convert from the current projection to WGS84 (EPSG:4326)\n", - " wgs84_path = wgs84_path.to_crs(\"EPSG:4326\")\n", - "\n", - " # Get the centroid of the path for easier lookup\n", - " centroid = wgs84_path.geometry.iloc[0].centroid\n", - "\n", - " # Display the coordinates\n", - " print(f\"\\nGoogle Maps coordinates for FID {fid_zoom}:\")\n", - " print(f\"Latitude: {centroid.y}, Longitude: {centroid.x}\")\n", - " print(f\"Google Maps link: https://www.google.com/maps?q={centroid.y},{centroid.x}\")\n", - "\n", - " # Add a text annotation showing coordinates on the plot\n", - " ax.annotate(\n", - " f\"Lat: {centroid.y:.6f}, Lon: {centroid.x:.6f}\",\n", - " xy=(0.05, 0.05),\n", - " xycoords=\"axes fraction\",\n", - " bbox=dict(boxstyle=\"round,pad=0.5\", fc=\"white\", alpha=0.8),\n", - " fontsize=10,\n", - " )\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "baseline_paths = load_layer(\"baseline_paths\")\n", - "intersections = load_layer(\"intersections\") # Load the intersections layer\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from networkx import center\n", - "\n", - "# Create a figure\n", - "fig, ax = plt.subplots(figsize=(15, 12))\n", - "\n", - "# Get unique intersection types\n", - "intersection_types = intersections[\"intersection_type_fid\"].unique()\n", - "\n", - "# Define colors for different intersection types\n", - "colors = [\"blue\", \"red\", \"green\", \"brown\", \"purple\", \"orange\", \"pink\"]\n", - "\n", - "# Plot each intersection type with a different color\n", - "target_label = 2 # Target intersection type to highlight\n", - "for i, intersection_type in enumerate(intersection_types):\n", - " # Filter intersections by type\n", - " intersections_of_type = intersections[intersections[\"intersection_type_fid\"] == intersection_type]\n", - "\n", - " if intersection_type == target_label:\n", - " alpha = 1.0\n", - " target_fids = intersections_of_type.fid.tolist()\n", - " else:\n", - " alpha = 0.5\n", - "\n", - " # Plot these intersections with a specific color\n", - " intersections_of_type.plot(ax=ax, color=colors[i % len(colors)], \n", - " label=f\"Intersection Type {intersection_type}\", alpha=alpha)\n", - "\n", - "# Add title and legend\n", - "ax.set_title(\"Intersections by Type\", fontsize=16)\n", - "\n", - "ax.set_xlabel(\"X Coordinate\")\n", - "ax.set_ylabel(\"Y Coordinate\")\n", - "ax.axis(\"equal\") # Maintain aspect ratio\n", - "\n", - "center_zoom = False # Set to True to zoom into the center of the data\n", - "fid_zoom = target_fids[0] if 'target_fids' in locals() and len(target_fids) > 0 else None\n", - "\n", - "if center_zoom:\n", - " # Get the bounds of the data for better focusing\n", - " x_min, y_min, x_max, y_max = intersections.total_bounds\n", - "\n", - " # Calculate center coordinates\n", - " center_x = (x_min + x_max) / 2\n", - " center_y = (y_min + y_max) / 2\n", - "\n", - " # Set axis limits to zoom into the center (using 30% of the total range)\n", - " range_x = x_max - x_min\n", - " range_y = y_max - y_min\n", - " zoom_factor = 2.0\n", - "\n", - " ax.set_xlim(center_x - range_x * zoom_factor / 2, center_x + range_x * zoom_factor / 2)\n", - " ax.set_ylim(center_y - range_y * zoom_factor / 2, center_y + range_y * zoom_factor / 2)\n", - "\n", - "elif fid_zoom:\n", - " # Filter to get the specific intersection with the given FID\n", - " specific_intersection = intersections[intersections[\"fid\"] == fid_zoom]\n", - "\n", - " if not specific_intersection.empty:\n", - " # Get the bounds of the specific intersection\n", - " x_min, y_min, x_max, y_max = specific_intersection.total_bounds\n", - "\n", - " # Add some padding around the element\n", - " padding = 100 # meters\n", - " ax.set_xlim(x_min - padding, x_max + padding)\n", - " ax.set_ylim(y_min - padding, y_max + padding)\n", - "\n", - " # Update title to show we're zoomed to a specific FID\n", - " ax.set_title(f\"Intersection - FID {fid_zoom} (Type {specific_intersection['intersection_type_fid'].iloc[0]})\", fontsize=16)\n", - " else:\n", - " print(f\"FID {fid_zoom} not found in intersections\")\n", - "\n", - "\n", - "# Add a title if not zooming to a specific FID\n", - "if not fid_zoom:\n", - " ax.set_title(\"Intersections by Type\", fontsize=16)\n", - "\n", - "# Convert the specific intersection coordinates to WGS84 (latitude/longitude) for Google Maps\n", - "if fid_zoom and 'specific_intersection' in locals() and not specific_intersection.empty:\n", - " # Create a copy to avoid modifying the original\n", - " wgs84_intersection = specific_intersection.copy()\n", - "\n", - " # Convert from the current projection to WGS84 (EPSG:4326)\n", - " wgs84_intersection = wgs84_intersection.to_crs(\"EPSG:4326\")\n", - "\n", - " # Get the centroid of the intersection for easier lookup\n", - " centroid = wgs84_intersection.geometry.iloc[0].centroid\n", - "\n", - " # Display the coordinates\n", - " print(f\"\\nGoogle Maps coordinates for Intersection FID {fid_zoom}:\")\n", - " print(f\"Latitude: {centroid.y}, Longitude: {centroid.x}\")\n", - " print(f\"Google Maps link: https://www.google.com/maps?q={centroid.y},{centroid.x}\")\n", - "\n", - " # Add a text annotation showing coordinates on the plot\n", - " ax.annotate(\n", - " f\"Lat: {centroid.y:.6f}, Lon: {centroid.x:.6f}\",\n", - " xy=(0.05, 0.05),\n", - " xycoords=\"axes fraction\",\n", - " bbox=dict(boxstyle=\"round,pad=0.5\", fc=\"white\", alpha=0.8),\n", - " fontsize=10,\n", - " )\n", - "ax.legend()\n", - "plt.tight_layout()\n", - "plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def _get_set(df, column: str):\n", - " \"\"\"\n", - " Returns a set of unique values in the specified column of the DataFrame.\n", - " \"\"\"\n", - " return set(df[column].unique())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "_get_set(lanes, \"lane_index\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "lanes[lanes[\"left_boundary_fid\"] == test_id].keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_left_boundary(correct_lane_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_right_boundary(correct_lane_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_left_boundary(error_lane_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "get_right_boundary(error_lane_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "lane_df = load_layer(\"generic_drivable_areas\")\n", - "lane_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "walkways_df = load_layer(\"carparks\")\n", - "walkways_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "gen_lane_connectors_scaled_width_polygons_df = load_layer(\"boundaries\")\n", - "\n", - "gen_lane_connectors_scaled_width_polygons_df[gen_lane_connectors_scaled_width_polygons_df[\"fid\"] == \"17950\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "[None] * 10" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd\n", - "\n", - "lane_df = load_layer(\"baseline_paths\")\n", - "lane_df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def mps_to_mph(mph: float) -> float:\n", - " \"\"\"\n", - " Convert meters per second to miles per hour.\n", - " :param mph: miles per hour [mi/h]\n", - " :return: meters per second [m/s]\n", - " \"\"\"\n", - " return mph / 0.44704\n", - "\n", - "\n", - "mps_to_mph(6.705409029950827)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# fix, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "# type = 3\n", - "\n", - "# for i in np.random.choice(len(geoms[type]), 10):\n", - "# ax.plot(*geoms[type][i].coords.xy, color=\"blue\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "geoms[2][1]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "geoms[3][1]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/notebooks/deprecated/scene_rendering.ipynb b/notebooks/deprecated/scene_rendering.ipynb deleted file mode 100644 index 0a44912d..00000000 --- a/notebooks/deprecated/scene_rendering.ipynb +++ /dev/null @@ -1,115 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.dataset.scene.arrow_scene import ArrowScene\n", - "from d123.common.visualization.matplotlib.plots import plot_scene_at_iteration\n", - "\n", - "\n", - "\n", - "# log_name = \"2021.06.07.12.54.00_veh-35_01843_02314\"\n", - "# log_file = Path(f\"/home/daniel/d123_workspace/data/nuplan_mini_val/{log_name}.arrow\")\n", - "\n", - "log_name = \"_Rep0_longest1_route0_07_04_10_18_47\"\n", - "log_file = Path(f\"/home/daniel/d123_workspace/data/carla/{log_name}.arrow\")\n", - "scene = ArrowScene(log_file)\n", - "\n", - "plot_scene_at_iteration(scene, iteration=1000)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "scene._vehicle_parameters" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import traceback\n", - "from d123.common.visualization.matplotlib.plots import render_scene_animation\n", - "\n", - "output_path = Path(\"/home/daniel/d123_logs_videos\")\n", - "# render_scene_as_mp4(scene, output_path, fps=30, end_idx=10000, step=5, dpi=100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "# Create an mp4 animation with a specific FPS\n", - "import traceback\n", - "from d123.common.visualization.matplotlib.plots import render_scene_animation\n", - "\n", - "\n", - "output_path = Path(\"/home/daniel/d123_logs_videos\")\n", - "log_path = Path(\"/home/daniel/d123_workspace/data/nuplan_mini_val\")\n", - "for log_file in log_path.iterdir():\n", - " scene = ArrowScene(log_file)\n", - " try:\n", - " render_scene_animation(scene, output_path, fps=30, end_idx=None, step=5)\n", - " except Exception as e:\n", - " traceback.print_exc()\n", - " erroneous_file = output_path / f\"{log_name}.mp4\"\n", - " if erroneous_file.exists():\n", - " erroneous_file.unlink()\n", - " del scene\n", - " # break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/test_intesection_polygons.ipynb b/notebooks/deprecated/test_intesection_polygons.ipynb deleted file mode 100644 index 4329d274..00000000 --- a/notebooks/deprecated/test_intesection_polygons.ipynb +++ /dev/null @@ -1,293 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "from shapely.geometry import LineString, Polygon, MultiPolygon\n", - "from shapely.ops import unary_union, polygonize\n", - "import matplotlib.pyplot as plt\n", - "import geopandas as gpd\n", - "\n", - "from shapely.geometry import Point" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "def extract_intersection_outline(lanes):\n", - "\n", - " # Step 1: Extract all boundary line segments\n", - " all_polygons = []\n", - " for lane in lanes:\n", - " all_polygons.append(lane['polygon'])\n", - " \n", - " # Step 2: Merge all boundaries and extract the enclosed polygons\n", - " merged_boundaries = unary_union(all_polygons)\n", - "\n", - " # Step 3: Generate polygons from the merged lines\n", - " polygons = list(polygonize(merged_boundaries))\n", - " \n", - " if not polygons:\n", - " # If no polygon is formed, use buffer-based approach\n", - " return buffer_based_outline(lanes)\n", - " \n", - " # Step 4: Select the polygon that represents the intersection\n", - " # Usually it's the largest polygon\n", - " if len(polygons) == 1:\n", - " return polygons[0]\n", - " else:\n", - " # Take the largest polygon if there are multiple\n", - " return max(polygons, key=lambda p: p.area)\n", - "\n", - "def buffer_based_outline(lanes):\n", - " \"\"\"\n", - " Alternative approach using buffer operations when line-based\n", - " polygonization doesn't work well.\n", - " \"\"\"\n", - " # Create a small buffer around each boundary\n", - " buffer_distance = 0.5 # Adjust based on your data's scale\n", - " buffered_lanes = []\n", - " \n", - " for lane in lanes:\n", - " # Buffer each lane as a whole\n", - " left = lane['left_boundary']\n", - " right = lane['right_boundary']\n", - " \n", - " # Create a lane polygon by buffering both sides and connecting ends\n", - " if left.coords[0] != right.coords[0]: # Check if start points need connecting\n", - " start_connector = LineString([left.coords[0], right.coords[0]])\n", - " all_parts = [left, right, start_connector]\n", - " else:\n", - " all_parts = [left, right]\n", - " \n", - " if left.coords[-1] != right.coords[-1]: # Check if end points need connecting\n", - " end_connector = LineString([left.coords[-1], right.coords[-1]])\n", - " all_parts.append(end_connector)\n", - " \n", - " lane_outline = unary_union([line.buffer(buffer_distance) for line in all_parts])\n", - " buffered_lanes.append(lane_outline)\n", - " \n", - " # Merge all lane buffers\n", - " intersection_area = unary_union(buffered_lanes)\n", - " \n", - " # Extract the exterior boundary\n", - " if isinstance(intersection_area, MultiPolygon):\n", - " largest_polygon = max(intersection_area.geoms, key=lambda p: p.area)\n", - " return Polygon(largest_polygon.exterior.coords)\n", - " else:\n", - " return Polygon(intersection_area.exterior.coords)\n", - "\n", - "def is_boundary_part_of_outline(boundary, outline, tolerance=1e-8):\n", - " \"\"\"\n", - " Determine if a boundary linestring is part of the intersection outline.\n", - " \n", - " Parameters:\n", - " boundary -- A shapely LineString representing a lane boundary\n", - " outline -- A shapely Polygon representing the intersection outline\n", - " tolerance -- Distance tolerance for considering a point on the outline\n", - " \n", - " Returns:\n", - " Boolean indicating if the boundary contributes to the outline\n", - " \"\"\"\n", - " # Sample points along the boundary\n", - " num_points = min(20, len(boundary.coords))\n", - " sample_indices = np.linspace(0, len(boundary.coords) - 1, num_points).astype(int)\n", - " sample_points = [boundary.coords[i] for i in sample_indices]\n", - " \n", - " # Check if sampled points are on the outline\n", - " outline_boundary = outline.exterior\n", - " points_on_outline = 0\n", - " \n", - " for point in sample_points:\n", - " distance = outline_boundary.distance(Point(point))\n", - " if distance <= tolerance:\n", - " points_on_outline += 1\n", - " \n", - " # If most points are on the outline, consider it part of the outline\n", - " return points_on_outline / num_points > 0.7\n", - "\n", - "def identify_outline_boundaries(lanes, intersection_outline):\n", - " \"\"\"\n", - " Identify which lane boundaries contribute to the intersection outline.\n", - " \n", - " Parameters:\n", - " lanes -- List of dictionaries with 'left_boundary' and 'right_boundary' keys\n", - " intersection_outline -- Shapely Polygon representing the outline\n", - " \n", - " Returns:\n", - " List of boundary linestrings that form the outline\n", - " \"\"\"\n", - " \n", - " \n", - " outline_boundaries = []\n", - " \n", - " for i, lane in enumerate(lanes):\n", - " # Check left boundary\n", - " if is_boundary_part_of_outline(lane['left_boundary'], intersection_outline):\n", - " outline_boundaries.append({\n", - " 'lane_index': i,\n", - " 'boundary_type': 'left',\n", - " 'linestring': lane['left_boundary']\n", - " })\n", - " \n", - " # Check right boundary\n", - " if is_boundary_part_of_outline(lane['right_boundary'], intersection_outline):\n", - " outline_boundaries.append({\n", - " 'lane_index': i,\n", - " 'boundary_type': 'right',\n", - " 'linestring': lane['right_boundary']\n", - " })\n", - " \n", - " return outline_boundaries\n", - "\n", - "def visualize_intersection(lanes, intersection_outline=None, outline_boundaries=None):\n", - " \"\"\"\n", - " Visualize the intersection, its outline, and contributing boundaries.\n", - " \"\"\"\n", - " plt.figure(figsize=(12, 10))\n", - " \n", - " # Plot all lane boundaries\n", - " for i, lane in enumerate(lanes):\n", - " plt.plot(*lane['left_boundary'].xy, 'b-', alpha=0.5, label='Left boundary' if i == 0 else \"\")\n", - " plt.plot(*lane['right_boundary'].xy, 'g-', alpha=0.5, label='Right boundary' if i == 0 else \"\")\n", - " \n", - " # Plot the intersection outline if provided\n", - " if intersection_outline:\n", - " plt.plot(*intersection_outline.exterior.xy, 'r-', linewidth=2, label='Intersection outline')\n", - " \n", - " # Highlight the contributing boundaries if provided\n", - " if outline_boundaries:\n", - " for boundary in outline_boundaries:\n", - " color = 'purple' if boundary['boundary_type'] == 'right' else 'orange'\n", - " plt.plot(*boundary['linestring'].xy, color=color, linewidth=2.5, \n", - " label=f\"{boundary['boundary_type'].capitalize()} boundary in outline\" if boundary == outline_boundaries[0] else \"\")\n", - " \n", - " plt.axis('equal')\n", - " plt.grid(True)\n", - " plt.legend()\n", - " plt.title('Intersection with Lane Boundaries and Outline')\n", - " plt.show()\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "lane_group_df = gpd.read_file(\"/home/daniel/d123_workspace/d123/notebooks/carla_town05.gpkg\", layer=\"lane_group\")\n", - "lane_group_df = lane_group_df[~lane_group_df[\"intersection_id\"].isna()]\n", - "\n", - "\n", - "intersection_ids = list(set(lane_group_df[\"intersection_id\"]))\n", - "intersection_id = intersection_ids[2]\n", - "intersection_df = lane_group_df[lane_group_df[\"intersection_id\"] == intersection_id]\n", - "\n", - "# fig, ax = plt.subplots(figsize=(10, 10))\n", - "# intersection_df.plot(ax=ax)\n", - "# intersection_df\n", - "\n", - "print(intersection_id)\n", - "\n", - "\n", - "left_boundaries = list(intersection_df[\"left_boundary\"])\n", - "right_boundaries = list(intersection_df[\"right_boundary\"])\n", - "polygons = list(intersection_df[\"geometry\"])\n", - "lanes = []\n", - "for i in range(len(left_boundaries)):\n", - " lane = {\n", - " 'left_boundary': left_boundaries[i],\n", - " 'right_boundary': right_boundaries[i],\n", - " \"polygon\": polygons[i]\n", - " }\n", - " lanes.append(lane)\n", - "\n", - "# lanes = [\n", - "# {\n", - "# 'left_boundary': LineString([(0, 0), (10, 0)]),\n", - "# 'right_boundary': LineString([(0, 5), (10, 5)])\n", - "# },\n", - "# {\n", - "# 'left_boundary': LineString([(10, 0), (20, 0)]),\n", - "# 'right_boundary': LineString([(10, 5), (20, 5)])\n", - "# },\n", - "# # Add more lanes as needed\n", - "# ]\n", - "\n", - "\n", - "outline = extract_intersection_outline(lanes)\n", - "# outline_boundaries = identify_outline_boundaries(lanes, outline)\n", - "# visualize_intersection(lanes, outline, outline_boundaries)\n", - "\n", - "print(isinstance(outline, MultiPolygon))\n", - "\n", - "\n", - "polygons = list(polygonize(outline))\n", - "\n", - "polygons[0]\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "left_boundaries" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "lane_group_df = gpd.read_file(\"/home/daniel/d123_workspace/d123/notebooks/carla_town05.gpkg\", layer=\"intersection\")\n", - "lane_group_df[\"lane_group_ids\"][0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.13.3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/test_scene_builder.ipynb b/notebooks/deprecated/test_scene_builder.ipynb deleted file mode 100644 index 00de24f9..00000000 --- a/notebooks/deprecated/test_scene_builder.ipynb +++ /dev/null @@ -1,258 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "# log_names = [\"_Rep0_bench2drive220_route2_06_12_20_50_31\"]\n", - "log_names = None\n", - "split = \"nuplan_mini_val\"\n", - "scene_tokens = [\"8445a99210185a81\"]\n", - "scene_filter = SceneFilter(split_names=[split], log_names=log_names, duration_s=8.1, scene_tokens=scene_tokens)\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential() \n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(len(scenes))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.common.visualization.matplotlib.plots import render_scene_animation\n", - "\n", - "output_path = Path(\"/home/daniel/d123_route_testing\")\n", - "for scene in scenes:\n", - " render_scene_animation(scene, output_path, fps=20, end_idx=None, step=1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# from pathlib import Path\n", - "# from typing import Optional, Tuple\n", - "\n", - "# import matplotlib.animation as animation\n", - "# import matplotlib.pyplot as plt\n", - "# from tqdm import tqdm\n", - "\n", - "# from d123.common.geometry.base import Point2D, StateSE2\n", - "# from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2\n", - "# from d123.common.visualization.color.default import EGO_VEHICLE_CONFIG\n", - "# from d123.common.visualization.matplotlib.observation import (\n", - "# add_bounding_box_to_ax,\n", - "# add_box_detections_to_ax,\n", - "# add_default_map_on_ax,\n", - "# add_traffic_lights_to_ax,\n", - "# )\n", - "# from d123.dataset.arrow.conversion import TrafficLightDetectionWrapper\n", - "# from d123.dataset.maps.abstract_map import AbstractMap\n", - "# from d123.dataset.observation.detection.detection import BoxDetectionWrapper\n", - "# from d123.dataset.scene.abstract_scene import AbstractScene\n", - "# from nuplan.common.actor_state.ego_state import EgoState\n", - "# import io\n", - "# from PIL import Image\n", - "\n", - "\n", - "# def add_ego_vehicle_to_ax_(ax: plt.Axes, ego_state: EgoState) -> None:\n", - "# bounding_box = BoundingBoxSE2(\n", - "# center=StateSE2(*ego_state.center),\n", - "# length=ego_state.car_footprint.length,\n", - "# width=ego_state.car_footprint.width,\n", - "# )\n", - "# add_bounding_box_to_ax(ax, bounding_box, EGO_VEHICLE_CONFIG)\n", - "\n", - "\n", - "# def _plot_scene_on_ax(\n", - "# ax: plt.Axes,\n", - "# map_api: AbstractMap,\n", - "# ego_state: EgoState,\n", - "# box_detections: BoxDetectionWrapper,\n", - "# traffic_light_detections: TrafficLightDetectionWrapper,\n", - "# radius: float = 80,\n", - "# ) -> plt.Axes:\n", - "\n", - "# point_2d = Point2D(ego_state.center.x, ego_state.center.y)\n", - "# add_default_map_on_ax(ax, map_api, point_2d, radius=radius)\n", - "# add_traffic_lights_to_ax(ax, traffic_light_detections, map_api)\n", - "\n", - "# add_box_detections_to_ax(ax, box_detections)\n", - "# add_ego_vehicle_to_ax_(ax, ego_state)\n", - "\n", - "# ax.set_xlim(point_2d.x - radius, point_2d.x + radius)\n", - "# ax.set_ylim(point_2d.y - radius, point_2d.y + radius)\n", - "\n", - "\n", - "# ax.set_aspect(\"equal\", adjustable=\"box\")\n", - "# return ax\n", - "\n", - "# def plot_scene_to_image(\n", - "# map_api: AbstractMap,\n", - "# ego_state: EgoState,\n", - "# box_detections: BoxDetectionWrapper,\n", - "# traffic_light_detections: TrafficLightDetectionWrapper,\n", - "# radius: float = 80,\n", - "# figsize: Tuple[int, int] = (8, 8),\n", - "# ) -> Image:\n", - "\n", - "# fig, ax = plt.subplots(figsize=figsize)\n", - "# _plot_scene_on_ax(ax, map_api, ego_state, box_detections, traffic_light_detections, radius)\n", - "# ax.set_aspect(\"equal\", adjustable=\"box\")\n", - "# plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)\n", - "# # plt.tight_layout()\n", - " \n", - "# buf = io.BytesIO()\n", - "# fig.savefig(buf, format=\"png\", bbox_inches=\"tight\")\n", - "# plt.close(fig)\n", - "# buf.seek(0)\n", - "# img = Image.open(buf)\n", - "# return img\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# from d123.simulation.gym.demo_gym_env import DemoGymEnv\n", - "\n", - "# images = []\n", - "# action = [1.0, 0.1] # Placeholder action, replace with actual action logic\n", - "# env = DemoGymEnv(scenes)\n", - "# map_api, ego_state, detection_observation = env.reset()\n", - "\n", - "# images.append(\n", - "# plot_scene_to_image(\n", - "# map_api,\n", - "# ego_state,\n", - "# detection_observation.box_detections,\n", - "# detection_observation.traffic_light_detections,\n", - "# )\n", - "# )\n", - "\n", - "# for i in range(150):\n", - "# ego_state, detection_observation, end = env.step(action)\n", - "# images.append(\n", - "# plot_scene_to_image(\n", - "# map_api,\n", - "# ego_state,\n", - "# detection_observation.box_detections,\n", - "# detection_observation.traffic_light_detections,\n", - "# )\n", - "# )\n", - "# if end:\n", - "# print(\"End of scene reached.\")\n", - "# break\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "# import numpy as np\n", - "\n", - "\n", - "# def create_gif(images, output_path, duration=100):\n", - "# \"\"\"\n", - "# Create a GIF from a list of PIL images.\n", - "\n", - "# Args:\n", - "# images (list): List of PIL.Image objects.\n", - "# output_path (str): Path to save the GIF.\n", - "# duration (int): Duration between frames in milliseconds.\n", - "# \"\"\"\n", - "# if images:\n", - "# print(len(images))\n", - "# images_p = [img.convert('P', palette=Image.ADAPTIVE) for img in images]\n", - "# images_p[0].save(\n", - "# output_path,\n", - "# save_all=True,\n", - "# append_images=images_p[1:],\n", - "# duration=duration,\n", - "# loop=0\n", - "# )\n", - "\n", - "# create_gif(images, f\"{split}_{np.random.randint(0, 1000)}_{action}.gif\", duration=20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/deprecated/test_waypoints.ipynb b/notebooks/deprecated/test_waypoints.ipynb deleted file mode 100644 index 86d2ea9c..00000000 --- a/notebooks/deprecated/test_waypoints.ipynb +++ /dev/null @@ -1,167 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.multithreading.worker_sequential import Sequential" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "print_memory_usage()\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "# log_names = [\"_Rep0_bench2drive220_route2_06_12_20_50_31\"]\n", - "log_names = [\"_Rep0_longest1_route0_06_13_17_21_21\"]\n", - "split = \"carla\"\n", - "scene_filter = SceneFilter(split_names=[split], log_names=log_names, timestamp_threshold_s=None, duration_s=None)\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential() \n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(len(scenes))\n", - "print_memory_usage()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from shapely.geometry import LineString\n", - "import numpy as np\n", - "from matplotlib import pyplot as plt\n", - "from d123.dataset.dataset_specific.carla.carla_data_processor import _load_json_gz \n", - "from d123.common.visualization.matplotlib.plots import _plot_scene_on_ax\n", - "json_dict = _load_json_gz(\"/home/daniel/carla_workspace/data/_Rep0_longest1_route0_06_13_17_21_21/boxes/0000000002.json.gz\")\n", - "json_dict\n", - "\n", - "\n", - "scene = scenes[0]\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "_plot_scene_on_ax(ax, scene, 4000, 200)\n", - "\n", - "\n", - "# x,y = json_dict[\"pos_global\"]\n", - "\n", - "# target_points = np.array([json_dict[\"target_point\"], json_dict[\"target_point_next\"],])\n", - "# route = np.array(json_dict[\"route\"])\n", - "# route[...,1] *= (-1)\n", - "# route = route[:500]\n", - "# target_points[...,1] *= (-1)\n", - "# for point in route:\n", - "# print(point)\n", - "\n", - "# linestring = LineString(route)\n", - "\n", - "\n", - "# ax.plot(*route.T, color=\"green\", marker=\"o\", markersize=1, zorder=20, label=\"route\")\n", - "# ax.plot(*target_points.T, color=\"red\", marker=\"o\", markersize=10, zorder=20, label=\"route\")\n", - "# ax.legend()\n", - "# print(route[0,0], route[0,1], linestring.length)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "len(route)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# list(json_dict.keys())\n", - "\n", - "x,y = json_dict[\"pos_global\"]\n", - "\n", - "route = np.array([json_dict[\"target_point\"], json_dict[\"target_point_next\"],])\n", - "route[...,0] += x\n", - "route[...,1] += y\n", - "\n", - "route[...,1] *= (-1)\n", - "\n", - "ax.plot(*route, color=\"red\", marker=\"o\", markersize=10, zorder=20, label=\"Target Points\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "json_dict.keys()\n", - "\n", - "json_dict[\"route\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "route.tolist()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/gym/test_gym.ipynb b/notebooks/gym/test_gym.ipynb deleted file mode 100644 index bb6cf7dc..00000000 --- a/notebooks/gym/test_gym.ipynb +++ /dev/null @@ -1,300 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "# from d123.common.multithreading.worker_ray import RayDistributed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "\n", - "split = \"nuplan_mini_val\"\n", - "\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "scene_tokens = None\n", - "# scene_tokens = [\"2283aea39bc1505e\"]\n", - "log_names = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=[split], log_names=log_names, scene_tokens=scene_tokens, duration_s=15.1, history_s=1.0\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(len(scenes))\n", - "\n", - "for scene in scenes[:10]:\n", - " print(scene.log_name, scene.token)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from typing import Optional, Tuple\n", - "\n", - "import matplotlib.animation as animation\n", - "import matplotlib.pyplot as plt\n", - "from tqdm import tqdm\n", - "\n", - "from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2\n", - "from d123.common.geometry.base import Point2D, StateSE2\n", - "from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2\n", - "from d123.common.visualization.color.default import EGO_VEHICLE_CONFIG\n", - "from d123.common.visualization.matplotlib.observation import (\n", - " add_bounding_box_to_ax,\n", - " add_box_detections_to_ax,\n", - " add_default_map_on_ax,\n", - " add_traffic_lights_to_ax,\n", - " add_ego_vehicle_to_ax,\n", - ")\n", - "from d123.dataset.arrow.conversion import TrafficLightDetectionWrapper\n", - "from d123.dataset.maps.abstract_map import AbstractMap\n", - "from d123.common.datatypes.detection.detection import BoxDetectionWrapper\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "import io\n", - "from PIL import Image\n", - "\n", - "\n", - "\n", - "def _plot_scene_on_ax(\n", - " ax: plt.Axes,\n", - " map_api: AbstractMap,\n", - " ego_state: EgoStateSE2,\n", - " initial_ego_state: Optional[EgoStateSE2],\n", - " box_detections: BoxDetectionWrapper,\n", - " traffic_light_detections: TrafficLightDetectionWrapper,\n", - " radius: float = 120,\n", - ") -> plt.Axes:\n", - "\n", - " if initial_ego_state is not None:\n", - " point_2d = initial_ego_state.center.point_2d\n", - " else:\n", - " point_2d = ego_state.center.point_2d\n", - " add_default_map_on_ax(ax, map_api, point_2d, radius=radius)\n", - " add_traffic_lights_to_ax(ax, traffic_light_detections, map_api)\n", - "\n", - " add_box_detections_to_ax(ax, box_detections)\n", - " add_ego_vehicle_to_ax(ax, ego_state)\n", - "\n", - " ax.set_xlim(point_2d.x - radius, point_2d.x + radius)\n", - " ax.set_ylim(point_2d.y - radius, point_2d.y + radius)\n", - "\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - " return ax\n", - "\n", - "\n", - "def plot_scene_to_image(\n", - " map_api: AbstractMap,\n", - " ego_state: EgoStateSE2,\n", - " initial_ego_state: Optional[EgoStateSE2],\n", - " box_detections: BoxDetectionWrapper,\n", - " traffic_light_detections: TrafficLightDetectionWrapper,\n", - " radius: float = 120,\n", - " figsize: Tuple[int, int] = (8, 8),\n", - ") -> Image:\n", - "\n", - " fig, ax = plt.subplots(figsize=figsize)\n", - " _plot_scene_on_ax(ax, map_api, ego_state, initial_ego_state, box_detections, traffic_light_detections, radius)\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - " plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)\n", - " # plt.tight_layout()\n", - "\n", - " buf = io.BytesIO()\n", - " fig.savefig(buf, format=\"png\", bbox_inches=\"tight\")\n", - " plt.close(fig)\n", - " buf.seek(0)\n", - " img = Image.open(buf)\n", - " return img" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.arrow.conversion import DetectionType\n", - "from d123.simulation.gym.gym_env import GymEnvironment\n", - "from d123.simulation.observation.agents_observation import _filter_agents_by_type\n", - "\n", - "import time\n", - "\n", - "images = []\n", - "agent_rollouts = []\n", - "plot: bool = True\n", - "action = [1.0, -0.0] # Placeholder action, replace with actual action logic\n", - "env = GymEnvironment(scenes)\n", - "\n", - "start = time.time()\n", - "\n", - "map_api, ego_state, detection_observation, current_scene = env.reset(None)\n", - "initial_ego_state = ego_state\n", - "cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE])\n", - "agent_rollouts.append(BoxDetectionWrapper(cars))\n", - "if plot:\n", - " images.append(\n", - " plot_scene_to_image(\n", - " map_api,\n", - " ego_state,\n", - " initial_ego_state,\n", - " detection_observation.box_detections,\n", - " detection_observation.traffic_light_detections,\n", - " )\n", - " )\n", - "\n", - "\n", - "for i in range(160):\n", - " ego_state, detection_observation, end = env.step(action)\n", - " cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE])\n", - " agent_rollouts.append(BoxDetectionWrapper(cars))\n", - " if plot:\n", - " images.append(\n", - " plot_scene_to_image(\n", - " map_api,\n", - " ego_state,\n", - " initial_ego_state,\n", - " detection_observation.box_detections,\n", - " detection_observation.traffic_light_detections,\n", - " )\n", - " )\n", - " if end:\n", - " print(\"End of scene reached.\")\n", - " break\n", - "\n", - "time_s = time.time() - start\n", - "print(time_s)\n", - "print(151/ time_s)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "151/46.239747524261475" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "\n", - "def create_gif(images, output_path, duration=100):\n", - " \"\"\"\n", - " Create a GIF from a list of PIL images.\n", - "\n", - " Args:\n", - " images (list): List of PIL.Image objects.\n", - " output_path (str): Path to save the GIF.\n", - " duration (int): Duration between frames in milliseconds.\n", - " \"\"\"\n", - " if images:\n", - " print(len(images))\n", - " images_p = [img.convert(\"P\", palette=Image.ADAPTIVE) for img in images]\n", - " images_p[0].save(output_path, save_all=True, append_images=images_p[1:], duration=duration, loop=0)\n", - "\n", - "\n", - "if plot:\n", - " create_gif(images, f\"{split}_{current_scene.token}.gif\", duration=20)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import List\n", - "\n", - "from d123.simulation.metrics.sim_agents.sim_agents import get_sim_agents_metrics\n", - "\n", - "\n", - "\n", - "\n", - "result = get_sim_agents_metrics(current_scene, agent_rollouts)\n", - "result\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/gym/test_simulation_2d.ipynb b/notebooks/gym/test_simulation_2d.ipynb deleted file mode 100644 index 60c22ceb..00000000 --- a/notebooks/gym/test_simulation_2d.ipynb +++ /dev/null @@ -1,421 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "# from d123.common.multithreading.worker_ray import RayDistributed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "split = \"nuplan_mini_val\"\n", - "\n", - "log_names = None\n", - "scene_tokens = None\n", - "\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "# scene_tokens = [\"2283aea39bc1505e\"]\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=[split],\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=30.1,\n", - " history_s=1.0,\n", - " timestamp_threshold_s=10,\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Number of scenes: {len(scenes)}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.simulation.controller.action_controller import ActionController\n", - "from d123.simulation.controller.motion_model.kinematic_bicycle_model import KinematicBicycleModel\n", - "from d123.simulation.observation.log_replay_observation import LogReplayObservation\n", - "from d123.simulation.simulation_2d_setup import Simulation2DSetup\n", - "from d123.simulation.simulation_2d import Simulation2D\n", - "\n", - "\n", - "from d123.simulation.time_controller.log_time_controller import LogTimeController\n", - "\n", - "\n", - "def get_simulation_2d_setup():\n", - " return Simulation2DSetup(\n", - " time_controller=LogTimeController(),\n", - " observations=LogReplayObservation(),\n", - " ego_controller=ActionController(KinematicBicycleModel()),\n", - " )\n", - "\n", - "\n", - "simulation_2d_setup = get_simulation_2d_setup()\n", - "simulation2d = Simulation2D(simulation_2d_setup)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "# reset\n", - "import torch\n", - "from d123.simulation.gym.environment.output_converter.action_output_converter import ActionOutputConverter\n", - "from d123.simulation.gym.policy.ppo.ppo_config import GlobalConfig\n", - "from d123.simulation.gym.policy.ppo.ppo_model import PPOPolicy\n", - "\n", - "\n", - "import numpy as np\n", - "import numpy.typing as npt\n", - "\n", - "\n", - "from d123.simulation.gym.environment.gym_observation.raster.raster_gym_observation import RasterGymObservation\n", - "from d123.simulation.gym.environment.gym_observation.raster.raster_renderer import RasterRenderer\n", - "from d123.simulation.gym.environment.helper.environment_area import RectangleEnvironmentArea\n", - "from d123.simulation.planning.abstract_planner import PlannerInitialization, PlannerInput\n", - "from d123.simulation.planning.planner_output.action_planner_output import ActionPlannerOutput\n", - "\n", - "\n", - "environment_area = RectangleEnvironmentArea()\n", - "gym_observation = RasterGymObservation(environment_area, RasterRenderer(environment_area), inference=True)\n", - "gym_observation.reset()\n", - "\n", - "checkpoint_path = \"/home/daniel/carl_workspace/carl_nuplan/checkpoints/nuplan_51892_1B/model_best.pth\"\n", - "config = GlobalConfig()\n", - "output_converter = ActionOutputConverter()\n", - "\n", - "device = \"cpu\"\n", - "\n", - "agent = PPOPolicy(\n", - " gym_observation.get_observation_space(),\n", - " output_converter.get_action_space(),\n", - " config=config,\n", - ").to(device)\n", - "\n", - "state_dict = torch.load(checkpoint_path, map_location=device)\n", - "agent.load_state_dict(state_dict, strict=True)\n", - "agent.to(device)\n", - "agent.eval()\n", - "\n", - "\n", - "def forward_agent(\n", - " planner_initialization: PlannerInitialization,\n", - " planner_input: PlannerInput,\n", - " last_action: npt.NDArray[np.float32],\n", - ") -> npt.NDArray[np.float32]:\n", - " info = {\"last_action\": last_action}\n", - " obs = gym_observation.get_gym_observation(planner_input, planner_initialization, info)\n", - "\n", - " obs_tensor = {\n", - " \"bev_semantics\": torch.Tensor(obs[\"bev_semantics\"][None, ...]).to(device, dtype=torch.float32),\n", - " \"measurements\": torch.Tensor(obs[\"measurements\"][None, ...]).to(device, dtype=torch.float32),\n", - " \"value_measurements\": torch.Tensor(obs[\"value_measurements\"][None, ...]).to(device, dtype=torch.float32),\n", - " }\n", - "\n", - " with torch.no_grad():\n", - " (action, _, _, _, _, _, _, _, _, _, _) = agent.forward(\n", - " obs_tensor, deterministic=True, lstm_state=None, done=None\n", - " )\n", - " action = action.squeeze().detach().cpu().numpy()\n", - " print(action)\n", - " return action\n", - "\n", - "\n", - "last_action = [0.0, 0.0] # placeholder\n", - "rasters = []\n", - "\n", - "idx = 1\n", - "planner_initialization, current_planner_input = simulation2d.reset(scenes[idx])\n", - "\n", - "\n", - "\n", - "def _get_action(planner_input: PlannerInput) -> ActionPlannerOutput:\n", - " ego_state, _ = planner_input.history.current_state\n", - " return ActionPlannerOutput(0.5, 0.0, ego_state)\n", - "\n", - "\n", - "while simulation2d.is_simulation_running():\n", - "\n", - " # 1. trigger planner\n", - " # planner_output = _get_action(current_planner_input)\n", - "\n", - " last_action = forward_agent(planner_initialization, current_planner_input, last_action)\n", - " planner_output = output_converter.get_planner_output(\n", - " action=last_action,\n", - " ego_state=current_planner_input.history.current_state[0],\n", - " info={},\n", - " )\n", - " print(planner_output._acceleration, planner_output._steering_rate)\n", - "\n", - " # 2. step simulation\n", - " current_planner_input = simulation2d.step(planner_output)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "initial_ego_state = simulation2d.history.data[0]\n", - "# iteration\n", - "# ego_state\n", - "# planner_output\n", - "# detections\n", - "\n", - "len(simulation2d.history.data)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from typing import List, Union\n", - "\n", - "import numpy as np\n", - "import numpy.typing as npt\n", - "from PIL import Image\n", - "\n", - "\n", - "from d123.common.visualization.color.color import (\n", - " BLACK,\n", - " DARK_GREY,\n", - " DARKER_GREY,\n", - " ELLIS_5,\n", - " LIGHT_GREY,\n", - " NEW_TAB_10,\n", - ")\n", - "\n", - "\n", - "def numpy_images_to_gif(\n", - " numpy_images: List[npt.NDArray[np.uint8]], gif_path: Union[str, Path], duration: int = 50\n", - ") -> None:\n", - " \"\"\"\n", - " Helper function to convert images into a GIF file.\n", - " :param numpy_images: list of images as uint8 numpy arrays.\n", - " :param gif_path: outout path for the GIF file.\n", - " :param duration: duration between frames (TODO: check), defaults to 50\n", - " \"\"\"\n", - " pil_images = [Image.fromarray(img) for img in numpy_images]\n", - " pil_images[0].save(gif_path, save_all=True, append_images=pil_images[1:], duration=duration, loop=0)\n", - "\n", - "\n", - "def image_to_rbg(image: npt.NDArray[np.uint8]) -> npt.NDArray[np.uint8]:\n", - " \"\"\"\n", - " Helper function to convert an observation image to RGB format.\n", - " :param image: _description_\n", - " :return: _description_\n", - " \"\"\"\n", - " _, width, height = image.shape\n", - " rgb_image = np.zeros((width, height, 3), dtype=np.uint8)\n", - " rgb_image.fill(255)\n", - " # drivable area\n", - " rgb_image[image[0] > 0] = LIGHT_GREY.rgb\n", - " rgb_image[image[1] > 0] = DARK_GREY.rgb\n", - " rgb_image[image[2] > 0] = BLACK.rgb\n", - " rgb_image[image[5] > 0] = DARKER_GREY.rgb\n", - "\n", - " rgb_image[image[3] == 80] = NEW_TAB_10[4].rgb\n", - " rgb_image[image[3] == 255] = NEW_TAB_10[2].rgb\n", - " # rgb_image[image[4] > 0] = ELLIS_5[1].rgb\n", - " rgb_image[image[6] > 0] = ELLIS_5[4].rgb\n", - " rgb_image[image[7] > 0] = NEW_TAB_10[6].rgb\n", - " rgb_image[image[8] > 0] = ELLIS_5[0].rgb\n", - "\n", - " rgb_image = np.rot90(rgb_image[::-1])\n", - " return rgb_image\n", - "\n", - "\n", - "# numpy_images_to_gif(\n", - "# [image_to_rbg(raster[\"bev_semantics\"]) for raster in rasters],\n", - "# gif_path=\"/home/daniel/d123_workspace/d123/notebooks/simulation_2d.gif\",\n", - "# duration=100,\n", - "# )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from typing import Optional, Tuple, Union\n", - "\n", - "import matplotlib.animation as animation\n", - "import matplotlib.pyplot as plt\n", - "from tqdm import tqdm\n", - "\n", - "from d123.common.visualization.matplotlib.observation import (\n", - " add_box_detections_to_ax,\n", - " add_default_map_on_ax,\n", - " add_ego_vehicle_to_ax,\n", - " add_traffic_lights_to_ax,\n", - ")\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "from d123.simulation.history.simulation_history import Simulation2DHistory, Simulation2DHistorySample\n", - "from d123.simulation.planning.abstract_planner import PlannerInitialization\n", - "\n", - "\n", - "def _plot_simulation_history_sample_on_ax(\n", - " ax: plt.Axes,\n", - " simulation_history: Simulation2DHistory,\n", - " iteration: int = 0,\n", - " radius: float = 80,\n", - ") -> plt.Axes:\n", - "\n", - " sample = simulation_history.data[iteration]\n", - " map_api = simulation_history.scene.map_api\n", - "\n", - " ego_state = sample.ego_state\n", - " # planner_output = sample.planner_output\n", - " detections = sample.detections\n", - "\n", - " point_2d = ego_state.center.point_2d\n", - " add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", - " add_traffic_lights_to_ax(ax, detections.traffic_light_detections, map_api)\n", - "\n", - " add_box_detections_to_ax(ax, detections.box_detections)\n", - " add_ego_vehicle_to_ax(ax, ego_state)\n", - "\n", - " ax.set_xlim(point_2d.x - radius, point_2d.x + radius)\n", - " ax.set_ylim(point_2d.y - radius, point_2d.y + radius)\n", - "\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - " ax.set_title(f\"Iteration {iteration}\")\n", - " return ax\n", - "\n", - "\n", - "def plot_simulation_history_at_iteration(\n", - " simulation_history: Simulation2DHistory,\n", - " iteration: int = 0,\n", - " radius: float = 80,\n", - ") -> Tuple[plt.Figure, plt.Axes]:\n", - "\n", - " fig, ax = plt.subplots(figsize=(10, 10))\n", - " _plot_simulation_history_sample_on_ax(\n", - " ax,\n", - " simulation_history,\n", - " iteration,\n", - " radius,\n", - " )\n", - " return fig, ax\n", - "\n", - "\n", - "def render_simulation_history_animation(\n", - " simulation_history: Simulation2DHistory,\n", - " output_path: Union[str, Path],\n", - " start_idx: int = 0,\n", - " end_idx: Optional[int] = None,\n", - " step: int = 10,\n", - " fps: float = 20.0,\n", - " dpi: int = 300,\n", - " format: str = \"mp4\",\n", - " radius: float = 100,\n", - ") -> None:\n", - " assert format in [\"mp4\", \"gif\"], \"Format must be either 'mp4' or 'gif'.\"\n", - " output_path = Path(output_path)\n", - " output_path.mkdir(parents=True, exist_ok=True)\n", - "\n", - " simulation_history.scene.open()\n", - " end_idx = len(simulation_history)\n", - "\n", - " fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - " def update(i):\n", - " ax.clear()\n", - " _plot_simulation_history_sample_on_ax(ax, simulation_history, i, radius)\n", - " plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)\n", - " pbar.update(1)\n", - "\n", - " frames = list(range(start_idx, end_idx, step))\n", - " pbar = tqdm(total=len(frames), desc=f\"Rendering {simulation_history.scene.log_name} as {format}\")\n", - " ani = animation.FuncAnimation(fig, update, frames=frames, repeat=False)\n", - "\n", - " ani.save(\n", - " output_path / f\"{simulation_history.scene.log_name}_{simulation_history.scene.token}.{format}\",\n", - " writer=\"ffmpeg\",\n", - " fps=fps,\n", - " dpi=dpi,\n", - " )\n", - " plt.close(fig)\n", - " simulation_history.scene.close()\n", - "\n", - "\n", - "render_simulation_history_animation(\n", - " simulation_history=simulation2d.history,\n", - " output_path=Path(\"/home/daniel/d123_workspace/d123/notebooks/animations\"),\n", - " format=\"mp4\",\n", - " step=1,\n", - " fps=20.0,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/nuplan/nuplan_sensor_loading.ipynb b/notebooks/nuplan/nuplan_sensor_loading.ipynb deleted file mode 100644 index 3fc654d8..00000000 --- a/notebooks/nuplan/nuplan_sensor_loading.ipynb +++ /dev/null @@ -1,501 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import glob\n", - "from pathlib import Path\n", - "import numpy as np\n", - "import pyarrow as pa\n", - "import pyarrow.ipc as ipc\n", - "from PIL import Image" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "\n", - "np.zeros((3,3)).tolist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import NuplanDataConverter" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "def process_images_to_arrow(input_folder, output_file, batch_size=100):\n", - " \"\"\"\n", - " Process JPEG images from a folder and save them as arrays in Arrow IPC format.\n", - "\n", - " Args:\n", - " input_folder (str): Path to folder containing JPEG images\n", - " output_file (str): Path to output Arrow IPC file\n", - " batch_size (int): Number of images to process in each batch\n", - " \"\"\"\n", - "\n", - " # Get all JPEG files in the folder\n", - " jpeg_extensions = [\"*.jpg\", \"*.jpeg\", \"*.JPG\", \"*.JPEG\"]\n", - " image_files = []\n", - "\n", - " for ext in jpeg_extensions:\n", - " image_files.extend(glob.glob(os.path.join(input_folder, ext)))\n", - "\n", - " if not image_files:\n", - " print(f\"No JPEG files found in {input_folder}\")\n", - " return\n", - "\n", - " print(f\"Found {len(image_files)} JPEG files\")\n", - "\n", - " # Create Arrow schema\n", - " schema = pa.schema(\n", - " [\n", - " # pa.field(\"filename\", pa.string()),\n", - " pa.field(\"image_array\", pa.binary()),\n", - " # pa.field(\"height\", pa.int32()),\n", - " # pa.field(\"width\", pa.int32()),\n", - " # pa.field(\"channels\", pa.int32()),\n", - " ]\n", - " )\n", - "\n", - " # Open Arrow IPC writer\n", - " with open(output_file, \"wb\") as f:\n", - " writer = ipc.new_file(f, schema)\n", - "\n", - " # Process images in batches\n", - " for i in range(0, len(image_files), batch_size):\n", - " batch_files = image_files[i : i + batch_size]\n", - "\n", - " # Lists to store batch data\n", - " filenames = []\n", - " image_arrays = []\n", - " image_bytes = []\n", - " heights = []\n", - " widths = []\n", - " channels = []\n", - "\n", - " print(f\"Processing batch {i//batch_size + 1}/{(len(image_files)-1)//batch_size + 1}\")\n", - "\n", - " for img_path in batch_files:\n", - " try:\n", - " # Load image\n", - " # with Image.open(img_path) as img:\n", - " # # Convert to RGB if not already\n", - " # if img.mode != \"RGB\":\n", - " # img = img.convert(\"RGB\")\n", - " with open(img_path, \"rb\") as f:\n", - " jpg_bytes = f.read()\n", - "\n", - " # Convert to numpy array\n", - " # img_array = np.array(img)\n", - "\n", - " # Store image data\n", - " filenames.append(os.path.basename(img_path))\n", - " image_bytes.append(jpg_bytes)\n", - " # image_arrays.append(img_array.flatten().tolist())\n", - " # heights.append(img_array.shape[0])\n", - " # widths.append(img_array.shape[1])\n", - " # channels.append(img_array.shape[2])\n", - "\n", - " except Exception as e:\n", - " print(f\"Error processing {img_path}: {e}\")\n", - " continue\n", - "\n", - " # Create Arrow arrays for this batch\n", - " if filenames: # Only create batch if we have valid images\n", - " batch_data = pa.record_batch(\n", - " [\n", - " # pa.array(filenames),\n", - " pa.array(image_bytes),\n", - " # pa.array(heights),\n", - " # pa.array(widths),\n", - " # pa.array(channels),\n", - " ],\n", - " schema=schema,\n", - " )\n", - "\n", - " # Write batch to file\n", - " writer.write_batch(batch_data)\n", - "\n", - " writer.close()\n", - "\n", - " print(f\"Successfully saved images to {output_file}\")\n", - "\n", - "\n", - "def read_arrow_file(arrow_file):\n", - " \"\"\"\n", - " Read and display info about the Arrow IPC file.\n", - "\n", - " Args:\n", - " arrow_file (str): Path to Arrow IPC file\n", - " \"\"\"\n", - " with open(arrow_file, \"rb\") as f:\n", - " reader = ipc.open_file(f)\n", - "\n", - " print(f\"Schema: {reader.schema}\")\n", - " print(f\"Number of record batches: {reader.num_record_batches}\")\n", - "\n", - " total_images = 0\n", - " for i in range(reader.num_record_batches):\n", - " batch = reader.get_batch(i)\n", - " total_images += len(batch)\n", - " print(f\"Batch {i}: {len(batch)} images\")\n", - "\n", - " print(f\"Total images: {total_images}\")\n", - "\n", - " # Show first few filenames as example\n", - " if reader.num_record_batches > 0:\n", - " first_batch = reader.get_batch(0)\n", - " print(f\"First few filenames: {first_batch['filename'][:5].to_pylist()}\")\n", - "\n", - "\n", - "def reconstruct_image(arrow_file, filename, output_path):\n", - " \"\"\"\n", - " Reconstruct and save an image from the Arrow file.\n", - "\n", - " Args:\n", - " arrow_file (str): Path to Arrow IPC file\n", - " filename (str): Name of the image file to reconstruct\n", - " output_path (str): Path to save reconstructed image\n", - " \"\"\"\n", - " with open(arrow_file, \"rb\") as f:\n", - " reader = ipc.open_file(f)\n", - "\n", - " for i in range(reader.num_record_batches):\n", - " batch = reader.get_batch(i)\n", - "\n", - " # Find the image by filename\n", - " filenames = batch[\"filename\"].to_pylist()\n", - " if filename in filenames:\n", - " idx = filenames.index(filename)\n", - "\n", - " # Get image data\n", - " img_array = batch[\"image_array\"][idx].to_pylist()\n", - " height = batch[\"height\"][idx].as_py()\n", - " width = batch[\"width\"][idx].as_py()\n", - " channels = batch[\"channels\"][idx].as_py()\n", - "\n", - " # Reconstruct image\n", - " img_array = np.array(img_array, dtype=np.uint8)\n", - " img_array = img_array.reshape(height, width, channels)\n", - "\n", - " # Save image\n", - " img = Image.fromarray(img_array)\n", - " img.save(output_path)\n", - " print(f\"Image reconstructed and saved to {output_path}\")\n", - " return\n", - "\n", - " print(f\"Image {filename} not found in Arrow file\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "# process_images_to_arrow(\"/mnt/nvme/nuplan/dataset/nuplan-v1.1/sensor/2021.07.01.20.35.47_veh-38_00016_00281/F0\", \"test.arrow\", 10)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "# # Read IPC file\n", - "# import io\n", - "\n", - "# import time\n", - "\n", - "# # with pa.OSFile(\"test.arrow\", 'rb') as source:\n", - "# # with ipc.open_file(source) as reader:\n", - "# # table = reader.read_all()\n", - "\n", - "# with pa.ipc.open_file(\n", - "# pa.memory_map(\"/home/daniel/d123_workspace/data/nuplan_private_test/2021.07.25.16.16.23_veh-26_02446_02589.arrow\")\n", - "# ) as reader:\n", - "# # This doesn't load data into memory yet!\n", - "# table = reader.read_all()\n", - "\n", - "\n", - "# print(len(table))\n", - "# start = time.time()\n", - "# # Extract JPG data\n", - "# jpg_data = table[\"front_cam_demo\"][500].as_py()\n", - "# read_image = Image.open(io.BytesIO(jpg_data))\n", - "\n", - "# # read_image = read_image.convert(\"RGB\") # Ensure it's in RGB format\n", - "# read_image = np.array(read_image)\n", - "# print(read_image.dtype)\n", - "# print(f\"Image loaded in {time.time() - start:.4f} seconds\")\n", - "\n", - "# import matplotlib.pyplot as plt\n", - "\n", - "# plt.imshow(read_image)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB\n", - "\n", - "NUPLAN_DATA_ROOT = Path(os.environ[\"NUPLAN_DATA_ROOT\"])\n", - "NUPLAN_SENSOR_ROOT = Path(\"/media/nvme1/nuplan/dataset/nuplan-v1.1/sensor_blobs\")\n", - "\n", - "\n", - "log_path = \"/media/nvme1/nuplan/dataset/nuplan-v1.1/splits/private_test/2021.07.01.21.22.09_veh-14_00016_00656.db\"\n", - "log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None)\n", - "\n", - "\n", - "log_db.lidar_pc.filename" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "import io\n", - "import pickle\n", - "from nuplan.database.nuplan_db.nuplan_scenario_queries import get_images_from_lidar_tokens, get_cameras\n", - "from nuplan.planning.scenario_builder.nuplan_db.nuplan_scenario import NuPlanScenario, CameraChannel, LidarChannel\n", - "\n", - "from nuplan.database.utils.pointclouds.lidar import LidarPointCloud, PointCloud\n", - "from pyquaternion import Quaternion\n", - "\n", - "\n", - "NUPLAN_DB_PATH = \"/media/nvme1/nuplan/dataset/nuplan-v1.1/splits/private_test\"\n", - "\n", - "\n", - "def get_log_cam_info(log):\n", - " log_name = log.logfile\n", - " log_file = os.path.join(NUPLAN_DB_PATH, log_name + \".db\")\n", - "\n", - " log_cam_infos = {}\n", - " for cam in get_cameras(log_file, [str(CameraChannel.F0.value)]):\n", - " intrinsics = np.array(pickle.loads(cam.intrinsic))\n", - " translation = np.array(pickle.loads(cam.translation))\n", - " rotation = np.array(pickle.loads(cam.rotation))\n", - " rotation = Quaternion(rotation).rotation_matrix\n", - " distortion = np.array(pickle.loads(cam.distortion))\n", - " c = dict(\n", - " intrinsic=intrinsics,\n", - " distortion=distortion,\n", - " translation=translation,\n", - " rotation=rotation,\n", - " )\n", - " log_cam_infos[cam.token] = c\n", - "\n", - " return log_cam_infos\n", - "\n", - "\n", - "images = []\n", - "for lidar_pc in log_db.lidar_pc[::2]:\n", - "\n", - " # front_image = get_images_from_lidar_tokens(log_path, [lidar_pc.token], [str(CameraChannel.F0.value)])\n", - " # parameters = get_log_cam_info(log_db.log)\n", - " # print(parameters)\n", - "\n", - " # images.append(list(front_image))\n", - " lidar_path = NUPLAN_SENSOR_ROOT / lidar_pc.filename\n", - " with open(lidar_path, \"rb\") as fp:\n", - " buffer = io.BytesIO(fp.read())\n", - "\n", - " pcd_data = buffer.read() # type: ignore\n", - "\n", - " # headers_list = []\n", - " # with io.BytesIO(pcd_data) as stream:\n", - " # while True:\n", - " # line = stream.readline().decode('utf8').strip()\n", - " # print(line)\n", - " # if line.startswith('#'):\n", - " # continue\n", - " # columns = line.split()\n", - " # key = columns[0].lower()\n", - " # val = columns[1:] if len(columns) > 2 else columns[1]\n", - " # headers_list.append((key, val))\n", - "\n", - " # if key == 'data':\n", - " # break\n", - "\n", - " pc = PointCloud.parse(pcd_data)\n", - "\n", - " # print(pc.header)\n", - " # pcd_data\n", - "\n", - " # break\n", - "\n", - " # print(isinstance(pcd_data, bytes))\n", - "\n", - " # lidar_point_cloud = LidarPointCloud.from_buffer(buffer, \"pcd\")\n", - " # print(lidar_point_cloud.points.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "parameters[\"0872b6c896e85f9f\"][\"rotation\"]\n", - "\n", - "\n", - "# intrinsics = np.array([[1.545e03, 0.000e00, 9.600e02], [0.000e00, 1.545e03, 5.600e02], [0.000e00, 0.000e00, 1.000e00]])\n", - "# distortion = np.array([-0.356123, 0.172545, -0.00213, 0.000464, -0.05231])\n", - "# translation = np.array([ 1.66433035e+00, -1.32379618e-03, 1.57190200e+00])\n", - "# rotation = np.array(\n", - "# [\n", - "# [-0.00395669, -0.03969443, 0.99920403],\n", - "# [-0.99971496, -0.02336898, -0.00488707],\n", - "# [0.02354437, -0.99893856, -0.03959065],\n", - "# ]\n", - "# )\n", - "# distortion\n", - "\n", - "np.array(\n", - " [\n", - " [-0.00395669, -0.03969443, 0.99920403],\n", - " [-0.99971496, -0.02336898, -0.00488707],\n", - " [0.02354437, -0.99893856, -0.03959065],\n", - " ]\n", - ")\n", - "np.array(\n", - " [\n", - " [-0.00395669, -0.03969443, 0.99920403],\n", - " [-0.99971496, -0.02336898, -0.00488707],\n", - " [0.02354437, -0.99893856, -0.03959065],\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "\n", - "\n", - "for cam in get_cameras(log_path, [str(channel.value) for channel in CameraChannel]):\n", - " print(pickle.loads(cam.translation))\n", - " print(pickle.loads(cam.translation))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": {}, - "outputs": [], - "source": [ - "9.600e02, 1920 / 2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12", - "metadata": {}, - "outputs": [], - "source": [ - "import cv2\n", - "\n", - "sensor_root = Path(\"/mnt/nvme/nuplan/dataset/nuplan-v1.1/sensor\")\n", - "\n", - "frames = []\n", - "for image in images:\n", - " if len(image) == 0:\n", - " continue\n", - "\n", - " jpg_name = image[0].filename_jpg\n", - " jpg_path = sensor_root / jpg_name\n", - " with open(jpg_path, \"rb\") as f:\n", - " jpg_data = f.read()\n", - " read_image = Image.open(io.BytesIO(jpg_data))\n", - " read_image = np.array(read_image)\n", - " # Convert RGB to BGR for OpenCV\n", - " frame = cv2.cvtColor(read_image, cv2.COLOR_RGB2BGR)\n", - " frames.append(frame)\n", - "\n", - "# Define video writer\n", - "height, width, _ = frames[0].shape\n", - "out = cv2.VideoWriter(f\"{log_db.name}.mp4\", cv2.VideoWriter_fourcc(*\"mp4v\"), 20, (width, height))\n", - "\n", - "for frame in frames:\n", - " out.write(frame)\n", - "out.release()\n", - "print(\"Video saved as output.mp4\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/scene_rendering.ipynb b/notebooks/scene_rendering.ipynb deleted file mode 100644 index dad53954..00000000 --- a/notebooks/scene_rendering.ipynb +++ /dev/null @@ -1,179 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "\n", - "# from d123.common.multithreading.worker_ray import RayDistributed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "split = \"nuplan_mini_val\"\n", - "# split = \"carla\"\n", - "\n", - "# log_names = [\"2021.06.07.12.54.00_veh-35_01843_02314\"]\n", - "scene_tokens = None\n", - "# scene_tokens = [\"2283aea39bc1505e\"]\n", - "log_names = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=[split],\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=15.1,\n", - " history_s=1.0,\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(\"num scenes found\", len(scenes))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Tuple\n", - "from d123.common.datatypes.detection.detection import BoxDetection\n", - "from d123.common.datatypes.detection.detection_types import DYNAMIC_DETECTION_TYPES, STATIC_DETECTION_TYPES\n", - "from d123.common.geometry.base import StateSE2\n", - "from d123.common.geometry.transform.tranform_2d import translate_along_yaw\n", - "from d123.common.geometry.vector import Vector2D\n", - "from d123.common.visualization.matplotlib.observation import (\n", - " add_box_detections_to_ax,\n", - " add_default_map_on_ax,\n", - " add_ego_vehicle_to_ax,\n", - " add_traffic_lights_to_ax,\n", - ")\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes:\n", - "\n", - " ego_vehicle_state = scene.get_ego_state_at_iteration(iteration)\n", - " box_detections = scene.get_box_detections_at_iteration(iteration)\n", - " traffic_light_detections = scene.get_traffic_light_detections_at_iteration(iteration)\n", - " map_api = scene.map_api\n", - "\n", - " point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d\n", - " add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", - " # add_traffic_lights_to_ax(ax, traffic_light_detections, map_api)\n", - "\n", - " add_box_detections_to_ax(ax, box_detections)\n", - " add_ego_vehicle_to_ax(ax, ego_vehicle_state)\n", - "\n", - " ax.set_xlim(point_2d.x - radius, point_2d.x + radius)\n", - " ax.set_ylim(point_2d.y - radius, point_2d.y + radius)\n", - "\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - " return ax\n", - "\n", - "\n", - "iteration = 20\n", - "scene = scenes[20]\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "_plot_scene_on_ax(ax, scene, iteration, 80)\n", - "\n", - "box_detections = scene.get_box_detections_at_iteration(iteration)\n", - "\n", - "\n", - "def _get_dxy(box_detection: BoxDetection) -> Tuple[float, float]:\n", - " \"\"\"Get the change in x and y coordinates from the box detection.\"\"\"\n", - " center = box_detection.center.state_se2\n", - " # endpoint_x = translate_along_yaw(center, Vector2D(abs(box_detection.velocity.x), 0.0))\n", - " # endpoint_y = translate_along_yaw(center, Vector2D(0.0, abs(box_detection.velocity.y)))\n", - " # print(box_detection.velocity.x, box_detection.velocity.y)\n", - "\n", - " endpoint_x = StateSE2(center.x + box_detection.velocity.x, center.y, center.yaw)\n", - " endpoint_y = StateSE2(center.x, center.y + box_detection.velocity.y, center.yaw)\n", - "\n", - " return endpoint_x, endpoint_y\n", - "\n", - "\n", - "for box_detection in box_detections:\n", - " if box_detection.metadata.detection_type in STATIC_DETECTION_TYPES:\n", - " continue\n", - " endpoint_x, endpoint_y = _get_dxy(box_detection)\n", - " ax.annotate(\n", - " \"\",\n", - " xytext=(box_detection.center.state_se2.point_2d.x, box_detection.center.state_se2.point_2d.y),\n", - " xy=(endpoint_x.x, endpoint_x.y),\n", - " arrowprops=dict(arrowstyle=\"->\"),\n", - " )\n", - " ax.annotate(\n", - " \"\",\n", - " xytext=(box_detection.center.state_se2.point_2d.x, box_detection.center.state_se2.point_2d.y),\n", - " xy=(endpoint_y.x, endpoint_y.y),\n", - " arrowprops=dict(arrowstyle=\"->\"),\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/scene_sensor_loading.ipynb b/notebooks/scene_sensor_loading.ipynb deleted file mode 100644 index ed143de1..00000000 --- a/notebooks/scene_sensor_loading.ipynb +++ /dev/null @@ -1,123 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "# from d123.common.multithreading.worker_ray impo\n", - "# rt RayDistribute\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import os, psutil\n", - "\n", - "\n", - "def print_memory_usage():\n", - " process = psutil.Process(os.getpid())\n", - " memory_info = process.memory_info()\n", - " print(f\"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB\")\n", - "\n", - "\n", - "split = \"nuplan_private_test\"\n", - "\n", - "log_names = [\"2021.07.01.21.22.09_veh-14_00016_00656\"]\n", - "scene_tokens = None\n", - "log_names = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=[split],\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=15.1,\n", - " history_s=1.0,\n", - " timestamp_threshold_s=15.0,\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(\"num scenes found\", len(scenes))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.datatypes.sensor.camera import CameraType\n", - "from d123.dataset.scene.arrow_scene import ArrowScene\n", - "\n", - "scene: ArrowScene = scenes[12]\n", - "scene.open()\n", - "\n", - "camera = scene.get_camera_at_iteration(150, CameraType.CAM_B0)\n", - "\n", - "plt.imshow(camera.image)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/smarty/smart_feature_testing.ipynb b/notebooks/smarty/smart_feature_testing.ipynb deleted file mode 100644 index c134baf9..00000000 --- a/notebooks/smarty/smart_feature_testing.ipynb +++ /dev/null @@ -1,149 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.models.sim_agent.smart.smart import SMART\n", - "from d123.training.models.sim_agent.smart.smart_config import SMARTConfig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "config = SMARTConfig()\n", - "smart_model = SMART(config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.dataset.scene.arrow_scene import ArrowScene\n", - "from d123.common.visualization.matplotlib.plots import plot_scene_at_iteration\n", - "\n", - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "\n", - "\n", - "log_names = None\n", - "split = \"nuplan_mini_val\"\n", - "scene_filter = SceneFilter(\n", - " split_names=[split], log_names=log_names, timestamp_threshold_s=8.0, duration_s=8.0, history_s=1.0\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "scene = scenes[23]\n", - "plot_scene_at_iteration(scene, iteration=0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.feature_builder.smart_feature_builder_ import SMARTFeatureBuilder\n", - "\n", - "feature_builder= SMARTFeatureBuilder()\n", - "\n", - "feature_dict = feature_builder.build_features(scene)\n", - "feature_dict[\"pt_token\"].keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from matplotlib import pyplot as plt\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "traj_pos = feature_dict[\"map_save\"][\"traj_pos\"]\n", - "pl_type = feature_dict[\"pt_token\"][\"pl_type\"]\n", - "print(traj_pos.shape)\n", - "for i in range(\n", - " traj_pos.shape[0],\n", - "):\n", - " if pl_type[i] == 1:\n", - " ax.plot(traj_pos[i, :, 0], traj_pos[i, :, 1])\n", - "\n", - "ax.set_title(\"map_save/traj_pos\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "# for boundary in boundaries:\n", - "# ax.plot(boundary[:, 0], boundary[:, 1], color=\"blue\", linewidth=1.0, alpha=1.0)\n", - "for map_segment in map_segments:\n", - " ax.plot(map_segment[:, 0], map_segment[:, 1], linewidth=1.0, alpha=1.0)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "asim", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.21" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/smarty/smart_rollout.ipynb b/notebooks/smarty/smart_rollout.ipynb deleted file mode 100644 index 3f3aac67..00000000 --- a/notebooks/smarty/smart_rollout.ipynb +++ /dev/null @@ -1,184 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.models.sim_agent.smart.smart import SMART\n", - "from d123.training.models.sim_agent.smart.smart_config import SMARTConfig\n", - "\n", - "from pathlib import Path\n", - "from d123.dataset.scene.arrow_scene import ArrowScene\n", - "from d123.common.visualization.matplotlib.plots import plot_scene_at_iteration\n", - "\n", - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "log_names = None\n", - "split = \"nuplan_mini_val\"\n", - "scene_filter = SceneFilter(\n", - " split_names=[split],\n", - " log_names=log_names,\n", - " timestamp_threshold_s=8.0,\n", - " duration_s=8.1,\n", - " history_s=1.0,\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "scene: ArrowScene = scenes[100]\n", - "plot_scene_at_iteration(scene, iteration=0)\n", - "print(scene.get_number_of_iterations(), scene.get_number_of_history_iterations())" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.models.sim_agent.smart.smart import SMART\n", - "from d123.training.models.sim_agent.smart.smart_config import SMARTConfig\n", - "\n", - "\n", - "checkpoint_path = Path(\"/home/daniel/epoch_027.ckpt\")\n", - "config = SMARTConfig(\n", - " hidden_dim=64,\n", - " num_freq_bands=64,\n", - " num_heads=4,\n", - " head_dim=8,\n", - " dropout=0.1,\n", - " hist_drop_prob=0.1,\n", - " num_map_layers=2,\n", - " num_agent_layers=4,\n", - " pl2pl_radius=10,\n", - " pl2a_radius=20,\n", - " a2a_radius=20,\n", - " time_span=20,\n", - " num_historical_steps=11,\n", - " num_future_steps=80,\n", - ")\n", - "\n", - "smart_model = SMART.load_from_checkpoint(checkpoint_path, config=config, map_location=\"cpu\")\n", - "smart_model.eval()\n", - "# print(smart_model.training)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import torch\n", - "from torch_geometric.data import HeteroData\n", - "from d123.training.feature_builder.smart_feature_builder import SMARTFeatureBuilder\n", - "from d123.training.models.sim_agent.smart.datamodules.target_builder import _numpy_dict_to_torch\n", - "\n", - "feature_builder = SMARTFeatureBuilder()\n", - "features = feature_builder.build_features(scene)\n", - "# features[\"agent\"][\"position\"][:, :40] = 0.0\n", - "_numpy_dict_to_torch(features)\n", - "\n", - "\n", - "torch_features = HeteroData(features)\n", - "\n", - "from torch_geometric.loader import DataLoader\n", - "\n", - "# If you have a dataset\n", - "dataset = [torch_features] # List with single sample\n", - "loader = DataLoader(dataset, batch_size=1, shuffle=False)\n", - "with torch.no_grad():\n", - " for batch in loader:\n", - " pred_traj, pred_z, pred_head = smart_model.test_step(batch, 0)\n", - " break\n", - "\n", - "\n", - "# features[\"agent\"][\"valid_mask\"].sum(-1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "array = pred_traj.numpy()\n", - "\n", - "array.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "from matplotlib import pyplot as plt\n", - "\n", - "from d123.common.geometry.transform.se2_array import convert_relative_to_absolute_point_2d_array\n", - "\n", - "\n", - "origin = scene.get_ego_state_at_iteration(0).bounding_box.center.state_se2\n", - "abs_array = convert_relative_to_absolute_point_2d_array(origin, array)\n", - "\n", - "\n", - "for roll_out in range(abs_array.shape[1]):\n", - " # fig, ax = plt.subplots(figsize=(10, 10))\n", - " fig, ax = plot_scene_at_iteration(scene, iteration=0)\n", - " for i in range(abs_array.shape[0]):\n", - " ax.plot(abs_array[i, roll_out, :, 0], abs_array[i, roll_out, :, 1], label=f\"Agent {i}\", zorder=15, linewidth=3, alpha=0.5)\n", - " # ax.set_aspect('equal', adjustable='box')\n", - " plt.show()\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/smarty/smart_testing.ipynb b/notebooks/smarty/smart_testing.ipynb deleted file mode 100644 index 765325f9..00000000 --- a/notebooks/smarty/smart_testing.ipynb +++ /dev/null @@ -1,307 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.models.sim_agent.smart.smart import SMART\n", - "from d123.training.models.sim_agent.smart.smart_config import SMARTConfig\n", - "\n", - "from d123.common.visualization.color.color import TAB_10\n", - "\n", - "import torch\n", - "\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "config = SMARTConfig()\n", - "smart_model = SMART(config)\n", - "smart_model" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "import pickle\n", - "\n", - "training_path = Path(\"/home/daniel/nuplan_cache/training\")\n", - "pickle_paths = list(training_path.iterdir())\n", - "\n", - "idx = 1\n", - "\n", - "with open(pickle_paths[idx], \"rb\") as f:\n", - " data = pickle.load(f)\n", - "data\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "for key in data.keys():\n", - " print(f\"{key}:\")\n", - " try:\n", - " for part_key, part_data in data[key].items():\n", - " if isinstance(part_data, (torch.Tensor, np.ndarray)):\n", - " print(f\" {part_key}:\")\n", - " print(f\" Tensor: shape: {list(part_data.shape)}, dtype: {part_data.dtype}\")\n", - " else:\n", - " print(f\" {part_key}: {type(part_data)} - {part_data}\")\n", - " print(f\" {type(part_data)} - {part_data}\")\n", - "\n", - " except:\n", - " print(f\" {data[key]}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "\"\"\"\n", - "map_save:\n", - " traj_pos:\n", - " Tensor: shape: [3013, 3, 2], dtype: torch.float32\n", - " traj_theta:\n", - " Tensor: shape: [3013], dtype: torch.float32\n", - "pt_token:\n", - " type:\n", - " Tensor: shape: [3013], dtype: torch.uint8\n", - " pl_type:\n", - " Tensor: shape: [3013], dtype: torch.uint8\n", - " light_type:\n", - " Tensor: shape: [3013], dtype: torch.uint8\n", - " num_nodes: - 3013\n", - " - 3013\n", - "agent:\n", - " num_nodes: - 48\n", - " - 48\n", - " valid_mask:\n", - " Tensor: shape: [48, 91], dtype: torch.bool\n", - " role:\n", - " Tensor: shape: [48, 3], dtype: torch.bool\n", - " id:\n", - " Tensor: shape: [48], dtype: torch.int64\n", - " type:\n", - " Tensor: shape: [48], dtype: torch.uint8\n", - " position:\n", - " Tensor: shape: [48, 91, 3], dtype: torch.float32\n", - " heading:\n", - " Tensor: shape: [48, 91], dtype: torch.float32\n", - " velocity:\n", - " Tensor: shape: [48, 91, 2], dtype: torch.float32\n", - " shape:\n", - " Tensor: shape: [48, 3], dtype: torch.float32\n", - "scenario_id:\n", - " 5e1ba6c841ae6ccd\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "# 1. map_save:\n", - "# traj_pos:\n", - "# Tensor: shape: [3013, 3, 2], dtype: torch.float32\n", - "# traj_theta:\n", - "# Tensor: shape: [3013], dtype: torch.float32\n", - "\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "traj_pos = data['map_save']['traj_pos']\n", - "for i in range(traj_pos.shape[0], ):\n", - " ax.plot(traj_pos[i, :, 0], traj_pos[i, :, 1])\n", - "\n", - "ax.set_title(\"map_save/traj_pos\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "distance = np.linalg.norm(traj_pos[:, :-1] - traj_pos[:, 1:], axis=-1)\n", - "\n", - "# min_x, min_y = np.min(traj_pos[:, :, 0]), np.min(traj_pos[:, :, 1])\n", - "# max_x, max_y = np.max(traj_pos[:, :, 0]), np.max(traj_pos[:, :, 1])\n", - "# print(np.abs(min_y-max_y), np.abs(min_x-max_x))\n", - "\n", - "\n", - "plt.hist(distance)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "# pt_token:\n", - "# type:\n", - "# Tensor: shape: [3013], dtype: torch.uint8\n", - "# pl_type:\n", - "# Tensor: shape: [3013], dtype: torch.uint8\n", - "# light_type:\n", - "# Tensor: shape: [3013], dtype: torch.uint8\n", - "# num_nodes: - 3013\n", - "# - 3013\n", - "\n", - "from d123.common.visualization.matplotlib.utils import add_non_repeating_legend_to_ax\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "traj_pos = data[\"map_save\"][\"traj_pos\"]\n", - "type = data[\"pt_token\"][\"type\"]\n", - "for i in range(traj_pos.shape[0]):\n", - " ax.plot(traj_pos[i, :, 0], traj_pos[i, :, 1], color=TAB_10[type[i] % len(TAB_10)].hex, label=f\"type: {type[i]}\")\n", - "ax.set_title(f\"map_save/traj_pos with type {set(type.tolist())}\")\n", - "add_non_repeating_legend_to_ax(ax)\n", - "plt.show()\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "traj_pos = data[\"map_save\"][\"traj_pos\"]\n", - "pl_type = data[\"pt_token\"][\"pl_type\"]\n", - "for i in range(traj_pos.shape[0]):\n", - " ax.plot(traj_pos[i, :, 0], traj_pos[i, :, 1], color=TAB_10[pl_type[i] % len(TAB_10)].hex, label=f\"pl_type: {pl_type[i]}\")\n", - "ax.set_title(f\"map_save/traj_pos with pl_type {set(pl_type.tolist())}\")\n", - "add_non_repeating_legend_to_ax(ax)\n", - "plt.show()\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "traj_pos = data[\"map_save\"][\"traj_pos\"]\n", - "light_type = data[\"pt_token\"][\"light_type\"]\n", - "for i in range(traj_pos.shape[0]):\n", - " ax.plot(traj_pos[i, :, 0], traj_pos[i, :, 1], color=TAB_10[light_type[i] % len(TAB_10)].hex, label=f\"light_type: {light_type[i]}\")\n", - "ax.set_title(f\"map_save/traj_pos with light_type {set(light_type.tolist())}\")\n", - "add_non_repeating_legend_to_ax(ax)\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "# agent:\n", - "# num_nodes: - 48\n", - "# - 48\n", - "# valid_mask:\n", - "# Tensor: shape: [48, 91], dtype: torch.bool\n", - "# role:\n", - "# Tensor: shape: [48, 3], dtype: torch.bool\n", - "# id:\n", - "# Tensor: shape: [48], dtype: torch.int64\n", - "# type:\n", - "# Tensor: shape: [48], dtype: torch.uint8\n", - "# position:\n", - "# Tensor: shape: [48, 91, 3], dtype: torch.float32\n", - "# heading:\n", - "# Tensor: shape: [48, 91], dtype: torch.float32\n", - "# velocity:\n", - "# Tensor: shape: [48, 91, 2], dtype: torch.float32\n", - "# shape:\n", - "# Tensor: shape: [48, 3], dtype: torch.float32\n", - "\n", - "num_nodes = data[\"agent\"][\"num_nodes\"]\n", - "valid_mask = data[\"agent\"][\"valid_mask\"]\n", - "role = data[\"agent\"][\"role\"].argmax(axis=-1)\n", - "id = data[\"agent\"][\"id\"]\n", - "type = data[\"agent\"][\"type\"]\n", - "position = data[\"agent\"][\"position\"]\n", - "heading = data[\"agent\"][\"heading\"]\n", - "velocity = data[\"agent\"][\"velocity\"]\n", - "shape = data[\"agent\"][\"shape\"]\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "for i in range(num_nodes):\n", - " if type[i] == 1:\n", - " continue\n", - "\n", - " position_mask = valid_mask[i]\n", - " ax.plot(\n", - " position[i, position_mask, 0],\n", - " position[i, position_mask, 1],\n", - " # color=TAB_10[type[i] % len(TAB_10)].hex,\n", - " label=f\"type: {type[i]}, id: {id[i]}, role: {role[i]}\",\n", - " )\n", - "\n", - "# ax.legend()\n", - "ax.set_aspect('equal', adjustable='box')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "asim", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.21" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/smarty/smart_tokens.ipynb b/notebooks/smarty/smart_tokens.ipynb deleted file mode 100644 index c2ff3060..00000000 --- a/notebooks/smarty/smart_tokens.ipynb +++ /dev/null @@ -1,172 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.training.models.sim_agent.smart.smart import SMART\n", - "from d123.training.models.sim_agent.smart.smart_config import SMARTConfig" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "config = SMARTConfig()\n", - "SMART(config)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "\n", - "path = \"/home/daniel/d123_workspace/d123/d123/training/models/sim_agent/smart/tokens/cluster_frame_5_2048.pkl\"\n", - "\n", - "with open(path, \"rb\") as f:\n", - " data = pickle.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "# dict_keys(['token', 'traj', 'token_all'])\n", - "data.keys()\n", - "\n", - "# dict_keys(['veh', 'ped', 'cyc'])\n", - "data[\"token_all\"][\"veh\"].shape\n", - "\n", - "vehicle_tokens = data[\"token_all\"][\"veh\"]\n", - "vehicle_tokens.shape # (2048, 6, 4, 2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "import matplotlib.pyplot as plt\n", - "\n", - "from d123.common.visualization.color.color import TAB_10\n", - "fix, ax = plt.subplots(1,1,figsize=(10, 5))\n", - "\n", - "\n", - "\n", - "for token_idx, token in enumerate(vehicle_tokens[:3]):\n", - " for timestep in token:\n", - " ax.plot(timestep[:, 1], timestep[:, 0], marker='o', markersize=2, linestyle='-', color=TAB_10[token_idx % len(TAB_10)].hex)\n", - "\n", - " \n", - "\n", - "\n", - "ax.set_aspect('equal', adjustable='box')\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "\n", - "path = \"/home/daniel/d123_workspace/d123/d123/training/models/sim_agent/smart/tokens/map_traj_token5.pkl\"\n", - "\n", - "with open(path, \"rb\") as f:\n", - " data = pickle.load(f)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "data.keys() # 'traj_src', 'sample_pt'\n", - "\n", - "import numpy as np \n", - "\n", - "data[\"traj_src\"].shape\n", - "\n", - "\n", - "fix, ax = plt.subplots(1, 1, figsize=(10, 5))\n", - "\n", - "\n", - "traj_tokens = data[\"traj_src\"][np.random.choice(len(data[\"traj_src\"]), size=50, replace=False)]\n", - "\n", - "for token_idx, token in enumerate(traj_tokens):\n", - " # for timestep in token:\n", - " ax.plot(\n", - " token[:, 1], token[:, 0], marker=\"o\", markersize=2, linestyle=\"-\", color=TAB_10[token_idx % len(TAB_10)].hex\n", - " )\n", - "ax.set_aspect(\"equal\", adjustable=\"box\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "data[\"traj_src\"].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "asim", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.21" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tools/merge_videos.ipynb b/notebooks/tools/merge_videos.ipynb deleted file mode 100644 index 257bb8ad..00000000 --- a/notebooks/tools/merge_videos.ipynb +++ /dev/null @@ -1,137 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "import subprocess\n", - "import os\n", - "import tempfile" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "\n", - "def merge_mp4_with_ffmpeg(input_files, output_file):\n", - " \"\"\"\n", - " Merge MP4 files using FFmpeg directly.\n", - " Requires FFmpeg to be installed on the system.\n", - " \n", - " Args:\n", - " input_files (list): List of paths to input MP4 files\n", - " output_file (str): Path for the output merged MP4 file\n", - " \"\"\"\n", - " try:\n", - " # Create a temporary file list for FFmpeg\n", - " with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as f:\n", - " for file in input_files:\n", - " if os.path.exists(file):\n", - " # Escape single quotes and write to file list\n", - " escaped_file = file.replace(\"'\", \"'\\\"'\\\"'\")\n", - " f.write(f\"file '{escaped_file}'\\n\")\n", - " else:\n", - " print(f\"Warning: File not found: {file}\")\n", - " \n", - " filelist_path = f.name\n", - " \n", - " # FFmpeg command to concatenate videos\n", - " cmd = [\n", - " 'ffmpeg',\n", - " '-f', 'concat',\n", - " '-safe', '0',\n", - " '-i', filelist_path,\n", - " '-c', 'copy', # Copy streams without re-encoding (faster)\n", - " '-y', # Overwrite output file if it exists\n", - " output_file\n", - " ]\n", - " \n", - " print(\"Merging videos with FFmpeg...\")\n", - " print(f\"Command: {' '.join(cmd)}\")\n", - " \n", - " # Run FFmpeg\n", - " result = subprocess.run(cmd, capture_output=True, text=True)\n", - " \n", - " # Clean up temporary file\n", - " os.unlink(filelist_path)\n", - " \n", - " if result.returncode == 0:\n", - " print(f\"Successfully merged videos into {output_file}\")\n", - " return True\n", - " else:\n", - " print(f\"FFmpeg error: {result.stderr}\")\n", - " return False\n", - " \n", - " except FileNotFoundError:\n", - " print(\"Error: FFmpeg not found. Please install FFmpeg first.\")\n", - " print(\"Download from: https://ffmpeg.org/download.html\")\n", - " return False\n", - " except Exception as e:\n", - " print(f\"Error during merging: {str(e)}\")\n", - " return False\n", - "\n", - "def check_ffmpeg():\n", - " \"\"\"Check if FFmpeg is available on the system.\"\"\"\n", - " try:\n", - " result = subprocess.run(['ffmpeg', '-version'], capture_output=True)\n", - " return result.returncode == 0\n", - " except FileNotFoundError:\n", - " return False\n", - "\n", - "# List your MP4 files in the order you want them merged\n", - "video_folder = Path(\"/home/daniel/d123_logs_videos/wopd_train\")\n", - "video_files = [str(file) for file in video_folder.glob(\"*.mp4\") if file.is_file()]\n", - "\n", - "\n", - "output_filename = str(video_folder / \"merged_video.mp4\")\n", - "\n", - "# Merge the videos\n", - "success = merge_mp4_with_ffmpeg(video_files, output_filename)\n", - "\n", - "if success:\n", - " print(\"Video merging completed successfully!\")\n", - "else:\n", - " print(\"Video merging failed!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/tools/plot_map_sizes.ipynb b/notebooks/tools/plot_map_sizes.ipynb deleted file mode 100644 index 4bea65c7..00000000 --- a/notebooks/tools/plot_map_sizes.ipynb +++ /dev/null @@ -1,67 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "folder_path = \"/home/daniel/d123_workspace/data/maps\" # Replace with your folder path\n", - "\n", - "# Get all files in the folder (not subfolders)\n", - "files = sorted([f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))])\n", - "sizes = [os.path.getsize(os.path.join(folder_path, f)) / 1024**2 for f in files]\n", - "\n", - "plt.figure(figsize=(10, 6))\n", - "plt.bar(files, sizes)\n", - "plt.ylabel(\"File Size (MB)\")\n", - "plt.title(\"Map Sizes (CARLA/nuPlan)\")\n", - "plt.xticks(rotation=45, ha=\"right\")\n", - "plt.tight_layout()\n", - "plt.show()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/viz/bev_matplotlib_prediction.ipynb b/notebooks/viz/bev_matplotlib_prediction.ipynb deleted file mode 100644 index 041daa72..00000000 --- a/notebooks/viz/bev_matplotlib_prediction.ipynb +++ /dev/null @@ -1,246 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "from d123.common.datatypes.sensor.camera import CameraType" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# split = \"nuplan_private_test\"\n", - "# log_names = [\"2021.09.29.17.35.58_veh-44_00066_00432\"]\n", - "\n", - "\n", - "# splits = [\"wopd_train\"]\n", - "# splits = [\"carla\"]\n", - "# splits = [\"nuplan_private_test\"]\n", - "splits = [\"av2-sensor-mini_train\"]\n", - "# log_names = None\n", - "\n", - "\n", - "log_names = None\n", - "scene_tokens = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=splits,\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=8.0,\n", - " history_s=0.0,\n", - " timestamp_threshold_s=4.0,\n", - " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from typing import List, Optional, Tuple\n", - "import matplotlib.pyplot as plt\n", - "import numpy as np\n", - "from d123.common.geometry.base import Point2D\n", - "from d123.common.visualization.color.color import BLACK, DARK_GREY, DARKER_GREY, LIGHT_GREY, NEW_TAB_10, TAB_10\n", - "from d123.common.visualization.color.config import PlotConfig\n", - "from d123.common.visualization.color.default import CENTERLINE_CONFIG, MAP_SURFACE_CONFIG, ROUTE_CONFIG\n", - "from d123.common.visualization.matplotlib.observation import (\n", - " add_box_detections_to_ax,\n", - " add_box_future_detections_to_ax,\n", - " add_default_map_on_ax,\n", - " add_ego_vehicle_to_ax,\n", - " add_traffic_lights_to_ax,\n", - ")\n", - "from d123.common.visualization.matplotlib.utils import add_shapely_linestring_to_ax, add_shapely_polygon_to_ax\n", - "from d123.dataset.maps.abstract_map import AbstractMap\n", - "from d123.dataset.maps.abstract_map_objects import AbstractLane, AbstractLaneGroup\n", - "from d123.dataset.maps.map_datatypes import MapLayer\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "\n", - "\n", - "\n", - "def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes:\n", - "\n", - " ego_vehicle_state = scene.get_ego_state_at_iteration(iteration)\n", - " box_detections = scene.get_box_detections_at_iteration(iteration)\n", - "\n", - " point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d\n", - " # add_debug_map_on_ax(ax, scene.map_api, point_2d, radius=radiuss, route_lane_group_ids=None)\n", - " add_default_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", - " # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.map_api)\n", - "\n", - " add_box_future_detections_to_ax(ax, scene, iteration=iteration)\n", - " add_box_detections_to_ax(ax, box_detections)\n", - " add_ego_vehicle_to_ax(ax, ego_vehicle_state)\n", - "\n", - " zoom = 1.0\n", - " ax.set_xlim(point_2d.x - radius * zoom, point_2d.x + radius * zoom)\n", - " ax.set_ylim(point_2d.y - radius * zoom, point_2d.y + radius * zoom)\n", - "\n", - " ax.set_aspect(\"equal\", adjustable=\"box\")\n", - " return ax\n", - "\n", - "\n", - "def plot_scene_at_iteration(\n", - " scene: AbstractScene, iteration: int = 0, radius: float = 80\n", - ") -> Tuple[plt.Figure, plt.Axes]:\n", - "\n", - " size = 10\n", - "\n", - " fig, ax = plt.subplots(figsize=(size, size))\n", - " _plot_scene_on_ax(ax, scene, iteration, radius)\n", - " return fig, ax\n", - "\n", - "\n", - "scene_index = 0\n", - "\n", - "for i, scene in enumerate(scenes):\n", - "\n", - " iteration = 0\n", - " fig, ax = plot_scene_at_iteration(scenes[i], iteration=iteration, radius=60)\n", - " Path(f\"/home/daniel/examples/{splits[0]}/\").mkdir(exist_ok=True, parents=True)\n", - " ax.set_xticks([])\n", - " ax.set_yticks([])\n", - " fig.tight_layout()\n", - " fig.savefig(\n", - " f\"/home/daniel/examples/{splits[0]}/{scene.log_name}_{i}.pdf\",\n", - " dpi=300,\n", - " bbox_inches=\"tight\",\n", - " )\n", - "\n", - "# camera = scenes[scene_index].get_camera_at_iteration(\n", - "# iteration=iteration, camera_type=CameraType.CAM_F0\n", - "# )\n", - "\n", - "# plt.imshow(camera.image, cmap=\"gray\", vmin=0, vmax=255)\n", - "# # fig.savefig(f\"/home/daniel/scene_{scene_index}_iteration_1.pdf\", dpi=300, bbox_inches=\"tight\")\n", - "\n", - "# scenes[scene_index].log_name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "scene = scenes[scene_index]\n", - "\n", - "\n", - "scene.get_camera_at_iteration(camera_type=CameraType.CAM_F0, iteration=0)\n", - "\n", - "\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import get_road_edge_linear_rings\n", - "from d123.dataset.maps.gpkg.gpkg_map import GPKGMap\n", - "\n", - "\n", - "map_api: GPKGMap = scenes[scene_index].map_api\n", - "\n", - "drivable_polygons = map_api._gpd_dataframes[MapLayer.LANE]\n", - "\n", - "\n", - "\n", - "linear_rings = get_road_edge_linear_rings(drivable_polygons.geometry.tolist())\n", - "rings_lengths = [ring.length for ring in linear_rings]\n", - "max_length_idx = np.argmax(rings_lengths)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "size = 16\n", - "fig, ax = plt.subplots(figsize=(size, size))\n", - "\n", - "for idx, ring in enumerate(linear_rings):\n", - " if idx == max_length_idx:\n", - " ax.plot(*ring.xy, color=\"black\", linewidth=2, label=\"Longest Road Edge\")\n", - " else:\n", - " ax.plot(*ring.xy)\n", - "\n", - "\n", - "ax.set_aspect(\"equal\", adjustable=\"box\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/viz/camera_matplotlib.ipynb b/notebooks/viz/camera_matplotlib.ipynb deleted file mode 100644 index bbd6221d..00000000 --- a/notebooks/viz/camera_matplotlib.ipynb +++ /dev/null @@ -1,397 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Tuple\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "\n", - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "from d123.dataset.scene.abstract_scene import AbstractScene\n", - "\n", - "from typing import Dict\n", - "from d123.common.datatypes.sensor.camera import CameraType\n", - "from d123.common.visualization.matplotlib.camera import add_camera_ax\n", - "from d123.common.visualization.matplotlib.camera import add_box_detections_to_camera_ax" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "# split = \"nuplan_private_test\"\n", - "# log_names = [\"2021.09.29.17.35.58_veh-44_00066_00432\"]\n", - "\n", - "\n", - "\n", - "\n", - "# splits = [\"carla\"]\n", - "# splits = [\"nuplan_private_test\"]\n", - "# splits = [\"wopd_train\"]\n", - "splits = [\"av2-sensor-mini_train\"]\n", - "# log_names = None\n", - "\n", - "\n", - "\n", - "# splits = [\"nuplan_private_test\"]\n", - "log_names = None\n", - "\n", - "scene_tokens = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=splits,\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=15,\n", - " history_s=0.0,\n", - " timestamp_threshold_s=15,\n", - " shuffle=False,\n", - " camera_types=[CameraType.CAM_F0],\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "\n", - "\n", - "def plot_cameras_frame(scene: AbstractScene, iteration: int) -> Tuple[plt.Figure, plt.Axes]:\n", - " \"\"\"\n", - " Plots 8x cameras and birds-eye-view visualization in 3x3 grid\n", - " :param scene: navsim scene dataclass\n", - " :param frame_idx: index of selected frame\n", - " :return: figure and ax object of matplotlib\n", - " \"\"\"\n", - " scale = 2\n", - " fig, ax = plt.subplots(2, 3, figsize=(scale * 11.5, scale * 4.3))\n", - "\n", - " camera_ax_assignment: Dict[CameraType, plt.Axes] = {\n", - " CameraType.CAM_L0: ax[0, 0],\n", - " CameraType.CAM_F0: ax[0, 1],\n", - " CameraType.CAM_R0: ax[0, 2],\n", - " CameraType.CAM_L2: ax[1, 0],\n", - " CameraType.CAM_B0: ax[1, 1],\n", - " CameraType.CAM_R2: ax[1, 2],\n", - " }\n", - "\n", - " for camera_type, camera_ax in camera_ax_assignment.items():\n", - " assert camera_type in scene.available_camera_types\n", - " camera = scene.get_camera_at_iteration(iteration, camera_type)\n", - " if camera is not None:\n", - " add_camera_ax(camera_ax, camera)\n", - "\n", - " fig.tight_layout()\n", - " fig.subplots_adjust(wspace=0.01, hspace=0.01, left=0.01, right=0.99, top=0.99, bottom=0.01)\n", - "\n", - " return fig, ax\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "def _add_camera_with_detections(ax: plt.Axes, scene: AbstractScene, iteration: int) -> plt.Axes:\n", - "\n", - " camera_ax_assignment: Dict[CameraType, plt.Axes] = {\n", - " CameraType.CAM_L0: ax[0, 0],\n", - " CameraType.CAM_F0: ax[0, 1],\n", - " CameraType.CAM_R0: ax[0, 2],\n", - " CameraType.CAM_L2: ax[1, 0],\n", - " CameraType.CAM_B0: ax[1, 1],\n", - " CameraType.CAM_R2: ax[1, 2],\n", - " }\n", - "\n", - " box_detections = scene.get_box_detections_at_iteration(iteration)\n", - " ego_state_se3 = scene.get_ego_state_at_iteration(iteration)\n", - " for camera_type, camera_ax in camera_ax_assignment.items():\n", - " if camera_type not in scene.available_camera_types:\n", - " continue\n", - " # assert camera_type in scene.available_camera_types\n", - " camera = scene.get_camera_at_iteration(iteration, camera_type)\n", - " if camera is not None:\n", - " add_box_detections_to_camera_ax(camera_ax, camera, box_detections, ego_state_se3)\n", - "\n", - " return ax\n", - "\n", - "\n", - "def plot_cameras_with_detections(scene: AbstractScene, iteration: int) -> Tuple[plt.Figure, plt.Axes]:\n", - " \"\"\"\n", - " Plots 8x cameras and birds-eye-view visualization in 3x3 grid\n", - " :param scene: navsim scene dataclass\n", - " :param frame_idx: index of selected frame\n", - " :return: figure and ax object of matplotlib\n", - " \"\"\"\n", - " scale = 1\n", - " fig, ax = plt.subplots(2, 3, figsize=(scale * 11.5, scale * 4.3))\n", - " _add_camera_with_detections(ax, scene, iteration)\n", - "\n", - " fig.tight_layout()\n", - " fig.subplots_adjust(wspace=0.01, hspace=0.01, left=0.01, right=0.99, top=0.99, bottom=0.01)\n", - "\n", - "\n", - "plot_cameras_with_detections(scenes[9], iteration=20)\n", - "\n", - "\n", - "# def _add_camera_with_detections(ax: plt.Axes, scene: AbstractScene, iteration: int) -> plt.Axes:\n", - "\n", - "# camera_ax_assignment: Dict[CameraType, plt.Axes] = {CameraType.CAM_F0: ax}\n", - "\n", - "# box_detections = scene.get_box_detections_at_iteration(iteration)\n", - "# ego_state_se3 = scene.get_ego_state_at_iteration(iteration)\n", - "# for camera_type, camera_ax in camera_ax_assignment.items():\n", - "# assert camera_type in scene.available_camera_types\n", - "# camera = scene.get_camera_at_iteration(iteration, camera_type)\n", - "# if camera is not None:\n", - "# add_box_detections_to_camera_ax(camera_ax, camera, box_detections, ego_state_se3)\n", - "\n", - "# return ax\n", - "\n", - "\n", - "# def plot_cameras_with_detections(scene: AbstractScene, iteration: int) -> Tuple[plt.Figure, plt.Axes]:\n", - "# \"\"\"\n", - "# Plots 8x cameras and birds-eye-view visualization in 3x3 grid\n", - "# :param scene: navsim scene dataclass\n", - "# :param frame_idx: index of selected frame\n", - "# :return: figure and ax object of matplotlib\n", - "# \"\"\"\n", - "# scale = 2\n", - "# fig, ax = plt.subplots(1, 1, figsize=(scale * 11.5, scale * 4.3))\n", - "# _add_camera_with_detections(ax, scene, iteration)\n", - "\n", - "# fig.tight_layout()\n", - "# fig.subplots_adjust(wspace=0.01, hspace=0.01, left=0.01, right=0.99, top=0.99, bottom=0.01)\n", - "\n", - "\n", - "\n", - "\n", - "\n", - "# def _add_camera_with_detections(ax: plt.Axes, scene: AbstractScene, iteration: int) -> plt.Axes:\n", - "\n", - "# camera_ax_assignment: Dict[CameraType, plt.Axes] = {\n", - "# CameraType.CAM_L1: ax[0],\n", - "# CameraType.CAM_L0: ax[1],\n", - "# CameraType.F0: ax[2],\n", - "# CameraType.CAM_R0: ax[3],\n", - "# CameraType.CAM_R1: ax[4],\n", - "# }\n", - "\n", - "# box_detections = scene.get_box_detections_at_iteration(iteration)\n", - "# ego_state_se3 = scene.get_ego_state_at_iteration(iteration)\n", - "# for camera_type, camera_ax in camera_ax_assignment.items():\n", - "# assert camera_type in scene.available_camera_types\n", - "# camera = scene.get_camera_at_iteration(iteration, camera_type)\n", - "# if camera is not None:\n", - "# add_box_detections_to_camera_ax(camera_ax, camera, box_detections, ego_state_se3)\n", - "\n", - "# return ax\n", - "\n", - "\n", - "# def plot_cameras_with_detections(scene: AbstractScene, iteration: int) -> Tuple[plt.Figure, plt.Axes]:\n", - "# \"\"\"\n", - "# Plots 8x cameras and birds-eye-view visualization in 3x3 grid\n", - "# :param scene: navsim scene dataclass\n", - "# :param frame_idx: index of selected frame\n", - "# :return: figure and ax object of matplotlib\n", - "# \"\"\"\n", - "# scale = 2\n", - "# fig, ax = plt.subplots(1, 5, figsize=(scale * 11.5, scale * 4.3))\n", - "# _add_camera_with_detections(ax, scene, iteration)\n", - "\n", - "# fig.tight_layout()\n", - "# fig.subplots_adjust(wspace=0.01, hspace=0.01, left=0.01, right=0.99, top=0.99, bottom=0.01)\n", - "\n", - "# plot_cameras_with_detections(scenes[0], iteration=0)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "def _add_camera_with_detections(ax: plt.Axes, scene: AbstractScene, iteration: int) -> plt.Axes:\n", - "\n", - " camera_ax_assignment: Dict[CameraType, plt.Axes] = {\n", - " CameraType.CAM_L0: ax[0, 0],\n", - " CameraType.CAM_F0: ax[0, 1],\n", - " CameraType.CAM_R0: ax[0, 2],\n", - " CameraType.CAM_L1: ax[1, 0],\n", - " # CameraType.CAM_B0: ax[1, 1],\n", - " CameraType.CAM_R1: ax[1, 2],\n", - " CameraType.CAM_L2: ax[2, 0],\n", - " CameraType.CAM_R2: ax[2, 2],\n", - " }\n", - "\n", - " box_detections = scene.get_box_detections_at_iteration(iteration)\n", - " ego_state_se3 = scene.get_ego_state_at_iteration(iteration)\n", - " for camera_type, camera_ax in camera_ax_assignment.items():\n", - " if camera_type not in scene.available_camera_types:\n", - " continue\n", - " # assert camera_type in scene.available_camera_types\n", - " camera = scene.get_camera_at_iteration(iteration, camera_type)\n", - " if camera is not None:\n", - " add_box_detections_to_camera_ax(camera_ax, camera, box_detections, ego_state_se3)\n", - "\n", - " return ax\n", - "\n", - "\n", - "def plot_cameras_with_detections(scene: AbstractScene, iteration: int) -> Tuple[plt.Figure, plt.Axes]:\n", - " \"\"\"\n", - " Plots 8x cameras and birds-eye-view visualization in 3x3 grid\n", - " :param scene: navsim scene dataclass\n", - " :param frame_idx: index of selected frame\n", - " :return: figure and ax object of matplotlib\n", - " \"\"\"\n", - " scale = 1\n", - " fig, ax = plt.subplots(3, 3, figsize=(scale * 10, scale * 10))\n", - " _add_camera_with_detections(ax, scene, iteration)\n", - "\n", - " fig.tight_layout()\n", - " fig.subplots_adjust(wspace=0.01, hspace=0.01, left=0.01, right=0.99, top=0.99, bottom=0.01)\n", - "\n", - "\n", - "plot_cameras_with_detections(scenes[9], iteration=20)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "scenes[3].log_name" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from typing import Optional, Union\n", - "\n", - "from matplotlib import animation\n", - "from tqdm import tqdm\n", - "\n", - "\n", - "def render_scene_animation(\n", - " scene: AbstractScene,\n", - " output_path: Union[str, Path],\n", - " start_idx: int = 0,\n", - " end_idx: Optional[int] = None,\n", - " step: int = 10,\n", - " fps: float = 20.0,\n", - " dpi: int = 300,\n", - " format: str = \"mp4\",\n", - ") -> None:\n", - " assert format in [\"mp4\", \"gif\"], \"Format must be either 'mp4' or 'gif'.\"\n", - " output_path = Path(output_path)\n", - " output_path.mkdir(parents=True, exist_ok=True)\n", - "\n", - " scene.open()\n", - "\n", - " if end_idx is None:\n", - " end_idx = scene.get_number_of_iterations()\n", - " end_idx = min(end_idx, scene.get_number_of_iterations())\n", - "\n", - " scale = 1\n", - " fig, ax = plt.subplots(1, 1, figsize=(scale * 16, scale * 9))\n", - "\n", - " def update(i):\n", - " ax.clear()\n", - " # for a in ax.flatten():\n", - " # a.clear()\n", - " _add_camera_with_detections(ax, scene, i)\n", - " plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05)\n", - " pbar.update(1)\n", - "\n", - " frames = list(range(start_idx, end_idx, step))\n", - " pbar = tqdm(total=len(frames), desc=f\"Rendering {scene.log_name} as {format}\")\n", - " ani = animation.FuncAnimation(fig, update, frames=frames, repeat=False)\n", - "\n", - " ani.save(output_path / f\"{scene.log_name}_{scene.token}.{format}\", writer=\"ffmpeg\", fps=fps, dpi=dpi)\n", - " plt.close(fig)\n", - " scene.close()\n", - "\n", - "\n", - "# for i in range(10):\n", - " # render_scene_animation(scenes[i], output_path=\"output\", start_idx=0, end_idx=None, step=1, fps=20, dpi=300, format=\"mp4\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/viz/log_rendering.ipynb b/notebooks/viz/log_rendering.ipynb deleted file mode 100644 index fe88e006..00000000 --- a/notebooks/viz/log_rendering.ipynb +++ /dev/null @@ -1,105 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "from d123.dataset.scene.arrow_scene import ArrowScene\n", - "from d123.common.visualization.matplotlib.plots import plot_scene_at_iteration\n", - "\n", - "\n", - "\n", - "log_name = \"1005081002024129653_5313_150_5333_150\"\n", - "log_file = Path(f\"/home/daniel/d123_workspace/data/wopd_train/{log_name}.arrow\")\n", - "scene = ArrowScene(log_file)\n", - "\n", - "fig, ax = plot_scene_at_iteration(scene, iteration=10)\n", - "\n", - "ax.get_xlim()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import traceback\n", - "from d123.common.visualization.matplotlib.plots import render_scene_animation\n", - "\n", - "output_path = Path(\"/home/daniel/d123_logs_videos\")\n", - "# render_scene_as_mp4(scene, output_path, fps=30, end_idx=10000, step=5, dpi=100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "# Create an mp4 animation with a specific FPS\n", - "import traceback\n", - "from d123.common.visualization.matplotlib.plots import render_scene_animation\n", - "\n", - "split = \"av2-sensor-mini_train\"\n", - "output_path = Path(f\"/home/daniel/d123_logs_videos/{split}\")\n", - "log_path = Path(f\"/home/daniel/d123_workspace/data/{split}\")\n", - "for log_file in log_path.iterdir():\n", - " try:\n", - " scene = ArrowScene(log_file)\n", - " render_scene_animation(scene, output_path, fps=20, end_idx=None, step=1)\n", - " del scene\n", - " except Exception as e:\n", - " traceback.print_exc()\n", - " erroneous_file = output_path / f\"{log_name}.mp4\"\n", - " if erroneous_file.exists():\n", - " erroneous_file.unlink()\n", - " # break\n", - " # break" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123_dev", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/viz/viser_testing_v2_scene.ipynb b/notebooks/viz/viser_testing_v2_scene.ipynb deleted file mode 100644 index 07d91d1d..00000000 --- a/notebooks/viz/viser_testing_v2_scene.ipynb +++ /dev/null @@ -1,121 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.scene.scene_builder import ArrowSceneBuilder\n", - "from d123.dataset.scene.scene_filter import SceneFilter\n", - "\n", - "from d123.common.multithreading.worker_sequential import Sequential\n", - "from d123.common.datatypes.sensor.camera import CameraType" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "import requests\n", - "from PIL import Image\n", - "from io import BytesIO\n", - "\n", - "\n", - "\n", - "# splits = [\"nuplan_private_test\"]\n", - "splits = [\"carla\"]\n", - "# splits = [\"wopd_train\"]\n", - "# splits = [\"av2-sensor-mini_train\"]\n", - "log_names = None\n", - "\n", - "scene_tokens = None\n", - "\n", - "scene_filter = SceneFilter(\n", - " split_names=splits,\n", - " log_names=log_names,\n", - " scene_tokens=scene_tokens,\n", - " duration_s=10,\n", - " history_s=0.0,\n", - " timestamp_threshold_s=10,\n", - " shuffle=False,\n", - " camera_types=[CameraType.CAM_F0],\n", - ")\n", - "scene_builder = ArrowSceneBuilder(\"/home/daniel/d123_workspace/data\")\n", - "worker = Sequential()\n", - "# worker = RayDistributed()\n", - "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.visualization.viser.server import ViserVisualizationServer\n", - "\n", - "\n", - "visualization_server = ViserVisualizationServer(scenes, scene_index=0)" - ] - }, - { - "cell_type": "markdown", - "id": "4", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/waymo_perception/lidar_testing.ipynb b/notebooks/waymo_perception/lidar_testing.ipynb deleted file mode 100644 index 3cd2a77e..00000000 --- a/notebooks/waymo_perception/lidar_testing.ipynb +++ /dev/null @@ -1,301 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from waymo_open_dataset import dataset_pb2\n", - "\n", - "import json\n", - "import os\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "from tqdm import tqdm\n", - "from waymo_open_dataset import label_pb2\n", - "from waymo_open_dataset.protos import camera_segmentation_pb2 as cs_pb2\n", - "from waymo_open_dataset.utils import box_utils\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "\n", - "WOPD_DATA_ROOT = Path(\"/media/nvme1/waymo_perception/training\")\n", - "\n", - "\n", - "tfrecords_file_list = list(WOPD_DATA_ROOT.glob(\"*.tfrecord\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import io\n", - "from pyquaternion import Quaternion\n", - "\n", - "from d123.common.geometry.base import StateSE3\n", - "from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3\n", - "\n", - "from waymo_open_dataset.utils import frame_utils\n", - "\n", - "\n", - "# Frame attributes:\n", - "# context: \n", - "# timestamp_micros: \n", - "# pose: \n", - "# images: List with 5 images\n", - "# lasers: \n", - "# Length: 5\n", - "# laser_labels: \n", - "# Length: 0\n", - "# projected_lidar_labels: \n", - "# Length: 0\n", - "# camera_labels: \n", - "# Length: 0\n", - "# no_label_zones: \n", - "# Length: 0\n", - "# map_features: \n", - "# Length: 0\n", - "# map_pose_offset: \n", - "\n", - "file_idx = 0\n", - "pathname = tfrecords_file_list[file_idx]\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "num_frames = sum(1 for _ in dataset)\n", - "\n", - "\n", - "def read_jpg_image(data: bytes) -> np.ndarray:\n", - " \"\"\"Read a JPEG image from bytes and return it as a numpy array.\"\"\"\n", - " image = Image.open(io.BytesIO(data))\n", - " return np.array(image)\n", - "\n", - "\n", - "ego_state_se3s = []\n", - "front_images = []\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "\n", - "boxes = []\n", - "\n", - "for frame_idx, data in enumerate(dataset):\n", - "\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " # print(frame.camera_labels)\n", - " print(\"Frame attributes:\")\n", - " for field in frame.DESCRIPTOR.fields:\n", - " field_name = field.name\n", - " if hasattr(frame, field_name):\n", - " value = getattr(frame, field_name)\n", - " if field_name != \"images\": # Don't print the whole image data\n", - " print(f\" {field_name}: {type(value)}\")\n", - " if hasattr(value, \"__len__\") and not isinstance(value, (str, bytes)):\n", - " print(f\" Length: {len(value)}\")\n", - " else:\n", - " print(f\" {field_name}: List with {len(value)} images\")\n", - "\n", - "\n", - " # # 1. pose\n", - " pose = np.array(frame.pose.transform).reshape(4, 4)\n", - " yaw_pitch_roll = Quaternion(matrix=pose[:3, :3]).yaw_pitch_roll\n", - " ego_state_se3s.append(\n", - " np.array(\n", - " [\n", - " pose[0, 3], # x\n", - " pose[1, 3], # y\n", - " pose[2, 3], # z\n", - " yaw_pitch_roll[2], # yaw\n", - " yaw_pitch_roll[1], # pitch\n", - " yaw_pitch_roll[0], # roll\n", - " ],\n", - " dtype=np.float64,\n", - " )\n", - " )\n", - "\n", - " # # plt.show()\n", - " if frame_idx == 0:\n", - " break\n", - "\n", - "ego_state_se3s = np.array(ego_state_se3s, dtype=np.float64)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "from typing import Dict, List, Tuple\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "\n", - "from waymo_open_dataset import dataset_pb2\n", - "from waymo_open_dataset.utils import range_image_utils\n", - "from waymo_open_dataset.utils import transform_utils\n", - "\n", - "RangeImages = Dict[\"dataset_pb2.LaserName.Name\", List[dataset_pb2.MatrixFloat]]\n", - "CameraProjections = Dict[\"dataset_pb2.LaserName.Name\", List[dataset_pb2.MatrixInt32]]\n", - "SegmentationLabels = Dict[\"dataset_pb2.LaserName.Name\", List[dataset_pb2.MatrixInt32]]\n", - "ParsedFrame = Tuple[RangeImages, CameraProjections, SegmentationLabels, dataset_pb2.MatrixFloat]\n", - "\n", - "\n", - "def parse_range_image_and_camera_projection(frame: dataset_pb2.Frame) -> ParsedFrame:\n", - " \"\"\"Parse range images and camera projections given a frame.\n", - "\n", - " Args:\n", - " frame: open dataset frame proto\n", - "\n", - " Returns:\n", - " range_images: A dict of {laser_name,\n", - " [range_image_first_return, range_image_second_return]}.\n", - " camera_projections: A dict of {laser_name,\n", - " [camera_projection_from_first_return,\n", - " camera_projection_from_second_return]}.\n", - " seg_labels: segmentation labels, a dict of {laser_name,\n", - " [seg_label_first_return, seg_label_second_return]}\n", - " range_image_top_pose: range image pixel pose for top lidar.\n", - " \"\"\"\n", - " range_images = {}\n", - " camera_projections = {}\n", - " seg_labels = {}\n", - " range_image_top_pose: dataset_pb2.MatrixFloat = dataset_pb2.MatrixFloat()\n", - " for laser in frame.lasers:\n", - " if len(laser.ri_return1.range_image_compressed) > 0: # pylint: disable=g-explicit-length-test\n", - " range_image_str_tensor = tf.io.decode_compressed(laser.ri_return1.range_image_compressed, \"ZLIB\")\n", - " ri = dataset_pb2.MatrixFloat()\n", - " ri.ParseFromString(range_image_str_tensor.numpy())\n", - " range_images[laser.name] = [ri]\n", - "\n", - " if laser.name == dataset_pb2.LaserName.TOP:\n", - " range_image_top_pose_str_tensor = tf.io.decode_compressed(\n", - " laser.ri_return1.range_image_pose_compressed, \"ZLIB\"\n", - " )\n", - " range_image_top_pose = dataset_pb2.MatrixFloat()\n", - " range_image_top_pose.ParseFromString(range_image_top_pose_str_tensor.numpy())\n", - "\n", - " camera_projection_str_tensor = tf.io.decode_compressed(\n", - " laser.ri_return1.camera_projection_compressed, \"ZLIB\"\n", - " )\n", - " cp = dataset_pb2.MatrixInt32()\n", - " cp.ParseFromString(camera_projection_str_tensor.numpy())\n", - " camera_projections[laser.name] = [cp]\n", - "\n", - " if len(laser.ri_return1.segmentation_label_compressed) > 0: # pylint: disable=g-explicit-length-test\n", - " seg_label_str_tensor = tf.io.decode_compressed(laser.ri_return1.segmentation_label_compressed, \"ZLIB\")\n", - " seg_label = dataset_pb2.MatrixInt32()\n", - " seg_label.ParseFromString(seg_label_str_tensor.numpy())\n", - " seg_labels[laser.name] = [seg_label]\n", - " if len(laser.ri_return2.range_image_compressed) > 0: # pylint: disable=g-explicit-length-test\n", - " range_image_str_tensor = tf.io.decode_compressed(laser.ri_return2.range_image_compressed, \"ZLIB\")\n", - " ri = dataset_pb2.MatrixFloat()\n", - " ri.ParseFromString(range_image_str_tensor.numpy())\n", - " range_images[laser.name].append(ri)\n", - "\n", - " camera_projection_str_tensor = tf.io.decode_compressed(\n", - " laser.ri_return2.camera_projection_compressed, \"ZLIB\"\n", - " )\n", - " cp = dataset_pb2.MatrixInt32()\n", - " cp.ParseFromString(camera_projection_str_tensor.numpy())\n", - " camera_projections[laser.name].append(cp)\n", - "\n", - " if len(laser.ri_return2.segmentation_label_compressed) > 0: # pylint: disable=g-explicit-length-test\n", - " seg_label_str_tensor = tf.io.decode_compressed(laser.ri_return2.segmentation_label_compressed, \"ZLIB\")\n", - " seg_label = dataset_pb2.MatrixInt32()\n", - " seg_label.ParseFromString(seg_label_str_tensor.numpy())\n", - " seg_labels[laser.name].append(seg_label)\n", - " return range_images, camera_projections, seg_labels, range_image_top_pose\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from waymo_open_dataset.utils import frame_utils\n", - "\n", - "dataset = tf.data.TFRecordDataset(tfrecords_file_list[file_idx], compression_type=\"\")\n", - "for data in dataset:\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy()) # No need for bytearray conversion\n", - " break\n", - "\n", - "(range_images, camera_projections, _, range_image_top_pose) = parse_range_image_and_camera_projection(frame)\n", - "points, cp_points = frame_utils.convert_range_image_to_point_cloud(\n", - " frame=frame,\n", - " range_images=range_images,\n", - " camera_projections=camera_projections,\n", - " range_image_top_pose=range_image_top_pose,\n", - " keep_polar_features=True,\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "pc = points[0]\n", - "\n", - "\n", - "plt.scatter(pc[:, 3], pc[:, 4], s=0.1, c=pc[:, 5], cmap='viridis')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "cp_points[0].shape, points[0].shape" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/waymo_perception/map_testing.ipynb b/notebooks/waymo_perception/map_testing.ipynb deleted file mode 100644 index c29fc212..00000000 --- a/notebooks/waymo_perception/map_testing.ipynb +++ /dev/null @@ -1,725 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from waymo_open_dataset import dataset_pb2\n", - "\n", - "import json\n", - "import os\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "from tqdm import tqdm\n", - "from waymo_open_dataset import label_pb2\n", - "from waymo_open_dataset.protos import camera_segmentation_pb2 as cs_pb2\n", - "from waymo_open_dataset.utils import box_utils\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "\n", - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "\n", - "WOPD_DATA_ROOT = Path(\"/media/nvme1/waymo_perception/training\")\n", - "\n", - "\n", - "tfrecords_file_list = list(WOPD_DATA_ROOT.glob(\"*.tfrecord\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import io\n", - "from pyquaternion import Quaternion\n", - "\n", - "from d123.common.geometry.base import StateSE3\n", - "from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3\n", - "\n", - "from waymo_open_dataset.utils import frame_utils\n", - "\n", - "\n", - "# Frame attributes:\n", - "# context: \n", - "# timestamp_micros: \n", - "# pose: \n", - "# images: List with 5 images\n", - "# lasers: \n", - "# Length: 5\n", - "# laser_labels: \n", - "# Length: 0\n", - "# projected_lidar_labels: \n", - "# Length: 0\n", - "# camera_labels: \n", - "# Length: 0\n", - "# no_label_zones: \n", - "# Length: 0\n", - "# map_features: \n", - "# Length: 0\n", - "# map_pose_offset: \n", - "\n", - "file_idx = 0\n", - "pathname = tfrecords_file_list[file_idx]\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "num_frames = sum(1 for _ in dataset)\n", - "\n", - "\n", - "def read_jpg_image(data: bytes) -> np.ndarray:\n", - " \"\"\"Read a JPEG image from bytes and return it as a numpy array.\"\"\"\n", - " image = Image.open(io.BytesIO(data))\n", - " return np.array(image)\n", - "\n", - "\n", - "ego_state_se3s = []\n", - "front_images = []\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "\n", - "boxes = []\n", - "\n", - "for frame_idx, data in enumerate(dataset):\n", - "\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " print(\"Frame attributes:\")\n", - " for field in frame.DESCRIPTOR.fields:\n", - " field_name = field.name\n", - " if hasattr(frame, field_name):\n", - " value = getattr(frame, field_name)\n", - " if field_name != \"images\": # Don't print the whole image data\n", - " print(f\" {field_name}: {type(value)}\")\n", - " if hasattr(value, \"__len__\") and not isinstance(value, (str, bytes)):\n", - " print(f\" Length: {len(value)}\")\n", - " else:\n", - " print(f\" {field_name}: List with {len(value)} images\")\n", - "\n", - " # # plt.show()\n", - " if frame_idx == 0:\n", - " break\n", - "\n", - "ego_state_se3s = np.array(ego_state_se3s, dtype=np.float64)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "frame.context.stats.location" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "from collections import defaultdict\n", - "\n", - "from d123.common.geometry.units import mph_to_mps\n", - "\n", - "\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "for frame_idx, data in enumerate(dataset):\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " break\n", - "\n", - "driveways = {}\n", - "stop_signs = {}\n", - "crosswalks = {}\n", - "road_edges = {}\n", - "road_lines = {}\n", - "\n", - "lanes = {}\n", - "lanes_type = {}\n", - "lanes_speed_limit_mps = {}\n", - "lanes_interpolate = {}\n", - "lanes_successors = defaultdict(list)\n", - "lanes_predecessors = defaultdict(list)\n", - "\n", - "lanes_map_features = {}\n", - "\n", - "for map_feature in frame.map_features:\n", - "\n", - " if map_feature.HasField(\"lane\"):\n", - " lanes[map_feature.id] = np.array([[p.x, p.y, p.z] for p in map_feature.lane.polyline])\n", - " lanes_map_features[map_feature.id] = map_feature\n", - " lanes_type[map_feature.id] = map_feature.lane.type\n", - " lanes_speed_limit_mps[map_feature.id] = mph_to_mps(map_feature.lane.speed_limit_mph)\n", - " for lane_id_ in map_feature.lane.exit_lanes:\n", - " lanes_successors[map_feature.id].append(lane_id_)\n", - " for lane_id_ in map_feature.lane.exit_lanes:\n", - " lanes_predecessors[map_feature.id].append(lane_id_)\n", - "\n", - " elif map_feature.HasField(\"road_line\"):\n", - " road_lines[map_feature.id] = np.array([[p.x, p.y, p.z] for p in map_feature.road_line.polyline])\n", - " elif map_feature.HasField(\"road_edge\"):\n", - " road_edges[map_feature.id] = np.array([[p.x, p.y, p.z] for p in map_feature.road_edge.polyline])\n", - " elif map_feature.HasField(\"stop_sign\"):\n", - " pass\n", - " elif map_feature.HasField(\"crosswalk\"):\n", - " outline = np.array([[p.x, p.y, p.z] for p in map_feature.crosswalk.polygon])\n", - " crosswalks[map_feature.id] = outline\n", - " elif map_feature.HasField(\"speed_bump\"):\n", - " pass\n", - " elif map_feature.HasField(\"driveway\"):\n", - " # print(map_feature.driveway)\n", - " outline = np.array([[p.x, p.y, p.z] for p in map_feature.driveway.polygon])\n", - " driveways[map_feature.id] = outline\n", - "\n", - " # print(f\"Roadline: {map_feature.road_line}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.visualization.matplotlib.utils import add_non_repeating_legend_to_ax\n", - "\n", - "\n", - "fig, ax = plt.subplots(figsize=(30, 30))\n", - "\n", - "for road_edge in road_edges.values():\n", - " # print(len(driveway))\n", - " ax.plot(road_edge[:, 0], road_edge[:, 1], color=\"blue\", label=\"road_edge\")\n", - "\n", - "for lane in lanes.values():\n", - " # print(len(driveway))\n", - " ax.plot(lane[:, 0], lane[:, 1], color=\"gray\", label=\"lane\")\n", - "\n", - "\n", - "for road_line in road_lines.values():\n", - " # print(len(driveway))\n", - " ax.plot(road_line[:, 0], road_line[:, 1], color=\"orange\", label=\"road_line\")\n", - "\n", - "for driveway in driveways.values():\n", - " # print(len(driveway))\n", - " ax.plot(driveway[:, 0], driveway[:, 1], color=\"green\", label=\"driveway\")\n", - "\n", - "\n", - "for crosswalk in crosswalks.values():\n", - " # print(len(driveway))\n", - " ax.plot(crosswalk[:, 0], crosswalk[:, 1], color=\"violet\", label=\"crosswalk\")\n", - "\n", - "\n", - "add_non_repeating_legend_to_ax(ax)\n", - "\n", - "ax.set_aspect(\"equal\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.dataset.dataset_specific.wopd.wopd_map_utils import extract_lane_boundaries\n", - "\n", - "\n", - "left_boundaries, right_boundaries = extract_lane_boundaries(\n", - " lanes=lanes,\n", - " lanes_successors=lanes_successors,\n", - " lanes_predecessors=lanes_predecessors,\n", - " road_lines=road_lines,\n", - " road_edges=road_edges,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [ - "size = 20\n", - "fig, ax = plt.subplots(figsize=(size, size))\n", - "\n", - "for lane_id, lane_ in lanes.items():\n", - " if lane_id not in left_boundaries or lane_id not in right_boundaries:\n", - " continue\n", - " left_boundary = left_boundaries[lane_id].array\n", - " right_boundary = right_boundaries[lane_id].array\n", - "\n", - " assert len(left_boundary) > 0 and len(right_boundary) > 0\n", - " ax.plot(left_boundary[:, 0], left_boundary[:, 1], color=\"lime\")\n", - " ax.plot(right_boundary[:, 0], right_boundary[:, 1], color=\"red\")\n", - "\n", - " ax.plot(lane_[:, 0], lane_[:, 1], color=\"black\")\n", - "\n", - "\n", - "\n", - "ax.set_aspect(\"equal\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8", - "metadata": {}, - "outputs": [], - "source": [ - "# " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9", - "metadata": {}, - "outputs": [], - "source": [ - "lane_idx = 158\n", - "\n", - "lane = lanes_map_features[lane_idx].lane\n", - "lane_id = lanes_map_features[lane_idx].id\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "ax.plot(lanes[lane_id][:, 0], lanes[lane_id][:, 1], color=\"black\", label=\"Lane Centerline\", alpha=0.5)\n", - "\n", - "for left_boundary in lane.left_boundaries:\n", - " print(\"left\", left_boundary)\n", - " try:\n", - " boundary = road_lines[left_boundary.boundary_feature_id]\n", - "\n", - " except KeyError:\n", - " print(f\"Boundary feature ID {left_boundary.boundary_feature_id} not found.\")\n", - " boundary = road_edges[left_boundary.boundary_feature_id]\n", - " ax.plot(\n", - " boundary[:, 0],\n", - " boundary[:, 1],\n", - " color=\"lime\",\n", - " linestyle=\"--\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[lane_id][left_boundary.lane_start_index : left_boundary.lane_end_index, 0],\n", - " lanes[lane_id][left_boundary.lane_start_index : left_boundary.lane_end_index, 1],\n", - " color=\"lime\",\n", - " linestyle=\"-\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - "\n", - "\n", - "for right_boundary in lane.right_boundaries:\n", - " print(\"right\", right_boundary)\n", - " try:\n", - " boundary = road_lines[right_boundary.boundary_feature_id]\n", - "\n", - " except KeyError:\n", - " print(f\"Boundary feature ID {right_boundary.boundary_feature_id} not found.\")\n", - " boundary = road_edges[right_boundary.boundary_feature_id]\n", - "\n", - " ax.plot(\n", - " boundary[:, 0],\n", - " boundary[:, 1],\n", - " color=\"red\",\n", - " linestyle=\"--\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[lane_id][right_boundary.lane_start_index : right_boundary.lane_end_index, 0],\n", - " lanes[lane_id][right_boundary.lane_start_index : right_boundary.lane_end_index, 1],\n", - " color=\"red\",\n", - " linestyle=\"-\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - "\n", - "\n", - "ax.set_aspect(\"equal\")\n", - "# lanes\n", - "\n", - "zoom_out = 10\n", - "lane = lanes[lane_id]\n", - "ax.set_xlim(lane[:, 0].min() - zoom_out, lane[:, 0].max() + zoom_out)\n", - "ax.set_ylim(lane[:, 1].min() - zoom_out, lane[:, 1].max() + zoom_out)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "10", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "11", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.geometry.line.polylines import Polyline3D\n", - "import numpy as np\n", - "\n", - "\n", - "polyline_3d = Polyline3D.from_array(lanes[0])\n", - "\n", - "\n", - "# Create left and right boundaries for the lane\n", - "\n", - "\n", - "def create_lane_boundaries(polyline_3d, width=2.0):\n", - " \"\"\"\n", - " Create left and right boundaries for a lane by offsetting perpendicular to the heading.\n", - "\n", - " Args:\n", - " polyline_3d: Polyline3D object representing the centerline\n", - " width: Total width of the lane in meters\n", - "\n", - " Returns:\n", - " left_boundary: numpy array representing the left boundary\n", - " right_boundary: numpy array representing the right boundary\n", - " \"\"\"\n", - " points = polyline_3d.array\n", - " half_width = width / 2.0\n", - "\n", - " # Calculate the direction vectors between consecutive points\n", - " directions = np.diff(points, axis=0)\n", - "\n", - " # Normalize the direction vectors\n", - " directions_norm = np.linalg.norm(directions, axis=1, keepdims=True)\n", - " directions_normalized = directions / directions_norm\n", - "\n", - " # Calculate perpendicular vectors in the xy plane (z remains 0)\n", - " perpendiculars = np.zeros_like(directions)\n", - " perpendiculars[:, 0] = -directions_normalized[:, 1] # -dy\n", - " perpendiculars[:, 1] = directions_normalized[:, 0] # dx\n", - "\n", - " # Create boundaries (need to handle the last point separately)\n", - " left_boundary = points[:-1] + perpendiculars * half_width\n", - " right_boundary = points[:-1] - perpendiculars * half_width\n", - "\n", - " # Handle the last point based on the last direction\n", - " last_perp = perpendiculars[-1]\n", - " left_boundary = np.vstack([left_boundary, points[-1] + last_perp * half_width])\n", - " right_boundary = np.vstack([right_boundary, points[-1] - last_perp * half_width])\n", - "\n", - " return left_boundary, right_boundary\n", - "\n", - "\n", - "# Create the boundaries with a 4m wide lane\n", - "left_boundary, right_boundary = create_lane_boundaries(polyline_3d, width=4.0)\n", - "\n", - "# Plot the centerline and the boundaries\n", - "plt.figure(figsize=(12, 8))\n", - "plt.plot(polyline_3d.array[:, 0], polyline_3d.array[:, 1], \"g-\", label=\"Centerline\")\n", - "plt.plot(left_boundary[:, 0], left_boundary[:, 1], \"b-\", label=\"Left Boundary\")\n", - "plt.plot(right_boundary[:, 0], right_boundary[:, 1], \"r-\", label=\"Right Boundary\")\n", - "plt.axis(\"equal\")\n", - "plt.grid(True)\n", - "plt.legend()\n", - "plt.title(\"Lane with Left and Right Boundaries\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "12", - "metadata": {}, - "outputs": [], - "source": [ - "lane_idx = 59\n", - "\n", - "lane = lanes_map_features[lane_idx].lane\n", - "lane_id = lanes_map_features[lane_idx].id\n", - "\n", - "\n", - "# for right_neighbor in lane.right_neighbors:\n", - "# print(right_neighbor)\n", - "\n", - "# lane\n", - "\n", - "import matplotlib.pyplot as plt\n", - "from torch import le\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "ax.plot(lanes[lane_id][:, 0], lanes[lane_id][:, 1], color=\"blue\", label=\"Lane Centerline\")\n", - "\n", - "for right_neighbor in lane.right_neighbors:\n", - " # print(\"right\", right_neighbor)\n", - " ax.plot(\n", - " lanes[right_neighbor.feature_id][:, 0],\n", - " lanes[right_neighbor.feature_id][:, 1],\n", - " color=\"red\",\n", - " linestyle=\"--\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[right_neighbor.feature_id][right_neighbor.neighbor_start_index: right_neighbor.neighbor_end_index, 0],\n", - " lanes[right_neighbor.feature_id][right_neighbor.neighbor_start_index: right_neighbor.neighbor_end_index, 1],\n", - " color=\"red\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - "\n", - "\n", - "for left_neighbor in lane.left_neighbors:\n", - " # print(\"left\", left_neighbor)\n", - " ax.plot(\n", - " lanes[left_neighbor.feature_id][:, 0],\n", - " lanes[left_neighbor.feature_id][:, 1],\n", - " color=\"lime\",\n", - " linestyle=\"--\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[left_neighbor.feature_id][left_neighbor.neighbor_start_index: left_neighbor.neighbor_end_index, 0],\n", - " lanes[left_neighbor.feature_id][left_neighbor.neighbor_start_index: left_neighbor.neighbor_end_index, 1],\n", - " color=\"lime\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - "\n", - "\n", - "ax.set_aspect(\"equal\")\n", - "# lanes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "13", - "metadata": {}, - "outputs": [], - "source": [ - "lane_idx = 48\n", - "\n", - "lane = lanes_map_features[lane_idx].lane\n", - "lane_id = lanes_map_features[lane_idx].id\n", - "\n", - "\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "ax.scatter(lanes[lane_id][0, 0], lanes[lane_id][0, 1], color=\"black\", label=\"Lane Centerline\", alpha=0.5)\n", - "ax.scatter(lanes[lane_id][-1, 0], lanes[lane_id][-1, 1], color=\"blue\", label=\"Lane Centerline\", alpha=0.5)\n", - "\n", - "for entry_lane in lane.entry_lanes:\n", - " # print(\"right\", right_neighbor)\n", - " ax.plot(\n", - " lanes[entry_lane][:, 0],\n", - " lanes[entry_lane][:, 1],\n", - " color=\"red\",\n", - " linestyle=\"--\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - "\n", - "\n", - "for exit_lane in lane.exit_lanes:\n", - " # print(\"left\", left_neighbor)\n", - " ax.plot(\n", - " lanes[exit_lane][:, 0],\n", - " lanes[exit_lane][:, 1],\n", - " color=\"lime\",\n", - " linestyle=\"--\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - "\n", - "\n", - "ax.set_aspect(\"equal\")\n", - "# lanes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14", - "metadata": {}, - "outputs": [], - "source": [ - "lane_idx = 196\n", - "\n", - "lane = lanes_map_features[lane_idx].lane\n", - "lane_id = lanes_map_features[lane_idx].id\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "\n", - "fig, ax = plt.subplots(figsize=(10, 10))\n", - "\n", - "ax.plot(lanes[lane_id][:, 0], lanes[lane_id][:, 1], color=\"black\", label=\"Lane Centerline\", alpha=0.5)\n", - "\n", - "for left_boundary in lane.left_boundaries:\n", - " print(\"left\", left_boundary)\n", - " try:\n", - " boundary = road_lines[left_boundary.boundary_feature_id]\n", - "\n", - " except KeyError:\n", - " print(f\"Boundary feature ID {left_boundary.boundary_feature_id} not found.\")\n", - " boundary = road_edges[left_boundary.boundary_feature_id]\n", - " ax.plot(\n", - " boundary[:, 0],\n", - " boundary[:, 1],\n", - " color=\"lime\",\n", - " linestyle=\"--\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[lane_id][left_boundary.lane_start_index : left_boundary.lane_end_index, 0],\n", - " lanes[lane_id][left_boundary.lane_start_index : left_boundary.lane_end_index, 1],\n", - " color=\"lime\",\n", - " linestyle=\"-\",\n", - " label=\"Left Neighbor\",\n", - " )\n", - "\n", - "\n", - "for right_boundary in lane.right_boundaries:\n", - " print(\"right\", right_boundary)\n", - " try:\n", - " boundary = road_lines[right_boundary.boundary_feature_id]\n", - "\n", - " except KeyError:\n", - " print(f\"Boundary feature ID {right_boundary.boundary_feature_id} not found.\")\n", - " boundary = road_edges[right_boundary.boundary_feature_id]\n", - "\n", - " ax.plot(\n", - " boundary[:, 0],\n", - " boundary[:, 1],\n", - " color=\"red\",\n", - " linestyle=\"--\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - " ax.plot(\n", - " lanes[lane_id][right_boundary.lane_start_index : right_boundary.lane_end_index, 0],\n", - " lanes[lane_id][right_boundary.lane_start_index : right_boundary.lane_end_index, 1],\n", - " color=\"red\",\n", - " linestyle=\"-\",\n", - " label=\"Right Neighbor\",\n", - " )\n", - "\n", - "\n", - "ax.set_aspect(\"equal\")\n", - "# lanes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.geometry.base import StateSE2\n", - "# from d123.common.geometry.line.polylines import PolylineSE2\n", - "# from d123.common.geometry.transform.tranform_2d import translate_along_yaw\n", - "# from d123.common.geometry.vector import Vector2D\n", - "\n", - "# size = 30\n", - "# fig, ax = plt.subplots(figsize=(size, size))\n", - "\n", - "# lane_id = 114\n", - "# lane_ = lanes[lane_id]\n", - "\n", - "# lane_polyline_se2 = PolylineSE2.from_array(lane_[:, :2])\n", - "# BOUNDARY_STEP_SIZE = 0.5\n", - "\n", - "\n", - "# distances_se2 = np.linspace(\n", - "# 0, lane_polyline_se2.length, int(lane_polyline_se2.length / BOUNDARY_STEP_SIZE) + 1, endpoint=True\n", - "# )\n", - "# lane_query_se2 = lane_polyline_se2.interpolate(distances_se2)\n", - "\n", - "\n", - "# left_boundary = left_boundaries[lane_id].array\n", - "# right_boundary = right_boundaries[lane_id].array\n", - "\n", - "# assert len(left_boundary) > 0 and len(right_boundary) > 0\n", - "# ax.plot(left_boundary[:, 0], left_boundary[:, 1], color=\"lime\")\n", - "# ax.plot(right_boundary[:, 0], right_boundary[:, 1], color=\"red\")\n", - "\n", - "# ax.scatter(lane_query_se2[:, 0], lane_query_se2[:, 1], c=lane_query_se2[:, 2] / np.pi, cmap=\"viridis\")\n", - "\n", - "\n", - "# MAX_LANE_WIDTH = 25\n", - "# for state_se2_array in lane_query_se2:\n", - "# for sign in [1.0, -1.0]:\n", - "# perp_start_point = translate_along_yaw(StateSE2.from_array(state_se2_array), Vector2D(0.0, sign * 0.1))\n", - "# perp_end_point = translate_along_yaw(\n", - "# StateSE2.from_array(state_se2_array), Vector2D(0.0, sign * MAX_LANE_WIDTH / 2.0)\n", - "# )\n", - "# ax.plot(\n", - "# [perp_start_point.x, perp_end_point.x],\n", - "# [perp_start_point.y, perp_end_point.y],\n", - "# color=\"lime\" if sign > 0 else \"red\",\n", - "# linestyle=\"--\",\n", - "# alpha=0.5,\n", - "# )\n", - "\n", - "\n", - "# for road_edge in road_edges.values():\n", - "# # print(len(driveway))\n", - "# ax.plot(road_edge[:, 0], road_edge[:, 1], color=\"black\", label=\"road_edge\", linestyle=\"dashdot\")\n", - "\n", - "\n", - "\n", - "# for road_line in road_lines.values():\n", - "# # print(len(driveway))\n", - "# ax.plot(road_line[:, 0], road_line[:, 1], color=\"orange\", label=\"road_line\")\n", - "\n", - "# ax.set_aspect(\"equal\")\n", - "\n", - "# ax.set_xlim(lane_[:, 0].min() - 10, lane_[:, 0].max() + 10)\n", - "# ax.set_ylim(lane_[:, 1].min() - 10, lane_[:, 1].max() + 10)\n", - "\n", - "# lanes_map_features[lane_id]" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/notebooks/waymo_perception/testing.ipynb b/notebooks/waymo_perception/testing.ipynb deleted file mode 100644 index 19745f8f..00000000 --- a/notebooks/waymo_perception/testing.ipynb +++ /dev/null @@ -1,279 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "0", - "metadata": {}, - "outputs": [], - "source": [ - "from waymo_open_dataset import dataset_pb2\n", - "\n", - "import json\n", - "import os\n", - "\n", - "import numpy as np\n", - "import tensorflow as tf\n", - "from PIL import Image\n", - "from tqdm import tqdm\n", - "from waymo_open_dataset import label_pb2\n", - "from waymo_open_dataset.protos import camera_segmentation_pb2 as cs_pb2\n", - "from waymo_open_dataset.utils import box_utils\n", - "\n", - "\n", - "import matplotlib.pyplot as plt\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"-1\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1", - "metadata": {}, - "outputs": [], - "source": [ - "from pathlib import Path\n", - "\n", - "\n", - "WOPD_DATA_ROOT = Path(\"/media/nvme1/waymo_perception/training\")\n", - "\n", - "\n", - "tfrecords_file_list = list(WOPD_DATA_ROOT.glob(\"*.tfrecord\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2", - "metadata": {}, - "outputs": [], - "source": [ - "import io\n", - "from pyquaternion import Quaternion\n", - "\n", - "from d123.common.geometry.base import StateSE3\n", - "from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3\n", - "\n", - "from waymo_open_dataset.utils import frame_utils\n", - "\n", - "\n", - "# Frame attributes:\n", - "# context: \n", - "# timestamp_micros: \n", - "# pose: \n", - "# images: List with 5 images\n", - "# lasers: \n", - "# Length: 5\n", - "# laser_labels: \n", - "# Length: 0\n", - "# projected_lidar_labels: \n", - "# Length: 0\n", - "# camera_labels: \n", - "# Length: 0\n", - "# no_label_zones: \n", - "# Length: 0\n", - "# map_features: \n", - "# Length: 0\n", - "# map_pose_offset: \n", - "\n", - "file_idx = 0\n", - "pathname = tfrecords_file_list[file_idx]\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "num_frames = sum(1 for _ in dataset)\n", - "\n", - "\n", - "def read_jpg_image(data: bytes) -> np.ndarray:\n", - " \"\"\"Read a JPEG image from bytes and return it as a numpy array.\"\"\"\n", - " image = Image.open(io.BytesIO(data))\n", - " return np.array(image)\n", - "\n", - "dataset = tf.data.TFRecordDataset(pathname, compression_type=\"\")\n", - "\n", - "\n", - "for frame_idx, data in enumerate(dataset):\n", - "\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " print(frame.context)\n", - "\n", - " # # plt.show()\n", - " if frame_idx == 0:\n", - " break\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3", - "metadata": {}, - "outputs": [], - "source": [ - "for laser_calibration in frame.context.laser_calibrations:\n", - " print(np.array(laser_calibration.extrinsic.transform).reshape(4, 4))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4", - "metadata": {}, - "outputs": [], - "source": [ - "for frame_idx, data in enumerate(dataset):\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " if frame_idx == 2:\n", - " break\n", - "\n", - "print(\"Ego\")\n", - "ego_transform = np.array(frame.pose.transform).reshape(4, 4)\n", - "print(ego_transform[:3, 3])\n", - "\n", - "# 1 [ 1.5441613 -0.02302364 2.11557864]\n", - "# 2 [1.49672397 0.0954948 2.11616463]\n", - "# 3 [ 1.49442485 -0.09637497 2.11519385]\n", - "# 4 [1.43213651 0.11612398 2.11625087]\n", - "# 5 [ 1.42936162 -0.11545043 2.1150792 ]\n", - "\n", - "\n", - "# frame.map_pose_offset" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5", - "metadata": {}, - "outputs": [], - "source": [ - "plt.figure(figsize=(64, 20))\n", - "\n", - "\n", - "def plot_range_image_helper(data, name, layout, vmin=0, vmax=1, cmap=\"gray\"):\n", - " \"\"\"Plots range image.\n", - "\n", - " Args:\n", - " data: range image data\n", - " name: the image title\n", - " layout: plt layout\n", - " vmin: minimum value of the passed data\n", - " vmax: maximum value of the passed data\n", - " cmap: color map\n", - " \"\"\"\n", - " plt.subplot(*layout)\n", - " plt.imshow(data, cmap=cmap, vmin=vmin, vmax=vmax)\n", - " plt.title(name)\n", - " plt.grid(False)\n", - " plt.axis(\"off\")\n", - "\n", - "\n", - "def get_range_image(laser_name, return_index):\n", - " \"\"\"Returns range image given a laser name and its return index.\"\"\"\n", - " return range_images[laser_name][return_index]\n", - "\n", - "\n", - "# def show_range_image(range_image, layout_index_start=1):\n", - "# \"\"\"Shows range image.\n", - "\n", - "# Args:\n", - "# range_image: the range image data from a given lidar of type MatrixFloat.\n", - "# layout_index_start: layout offset\n", - "# \"\"\"\n", - "# range_image_tensor = tf.convert_to_tensor(range_image.data)\n", - "# range_image_tensor = tf.reshape(range_image_tensor, range_image.shape.dims)\n", - "# lidar_image_mask = tf.greater_equal(range_image_tensor, 0)\n", - "# range_image_tensor = tf.where(lidar_image_mask, range_image_tensor, tf.ones_like(range_image_tensor) * 1e10)\n", - "# range_image_range = range_image_tensor[..., 0]\n", - "# range_image_intensity = range_image_tensor[..., 1]\n", - "# range_image_elongation = range_image_tensor[..., 2]\n", - "# plot_range_image_helper(range_image_range.numpy(), \"range\", [8, 1, layout_index_start], vmax=75, cmap=\"gray\")\n", - "# plot_range_image_helper(\n", - "# range_image_intensity.numpy(), \"intensity\", [8, 1, layout_index_start + 1], vmax=1.5, cmap=\"gray\"\n", - "# )\n", - "# plot_range_image_helper(\n", - "# range_image_elongation.numpy(), \"elongation\", [8, 1, layout_index_start + 2], vmax=1.5, cmap=\"gray\"\n", - "# )\n", - "\n", - "\n", - "def show_range_image(range_image, layout_index_start=1):\n", - " \"\"\"Shows range image.\n", - "\n", - " Args:\n", - " range_image: the range image data from a given lidar of type MatrixFloat.\n", - " layout_index_start: layout offset\n", - " \"\"\"\n", - " range_image_tensor = np.array([data for data in range_image.data]).reshape(range_image.shape.dims)\n", - " lidar_image_mask = np.greater_equal(range_image_tensor, 0)\n", - " range_image_tensor = np.where(lidar_image_mask, range_image_tensor, np.ones_like(range_image_tensor) * 1e10)\n", - " range_image_range = range_image_tensor[..., 0]\n", - " range_image_intensity = range_image_tensor[..., 1]\n", - " range_image_elongation = range_image_tensor[..., 2]\n", - " plot_range_image_helper(range_image_range, \"range\", [8, 1, layout_index_start], vmax=75, cmap=\"gray\")\n", - " plot_range_image_helper(range_image_intensity, \"intensity\", [8, 1, layout_index_start + 1], vmax=1.5, cmap=\"gray\")\n", - " plot_range_image_helper(range_image_elongation, \"elongation\", [8, 1, layout_index_start + 2], vmax=1.5, cmap=\"gray\")\n", - "\n", - "\n", - "frame.lasers.sort(key=lambda laser: laser.name)\n", - "show_range_image(get_range_image(open_dataset.LaserName.TOP, 0), 1)\n", - "# show_range_image(get_range_image(open_dataset.LaserName.TOP, 1), 4)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6", - "metadata": {}, - "outputs": [], - "source": [ - "from d123.common.datatypes.time.time_point import TimePoint\n", - "\n", - "\n", - "for frame_idx, data in enumerate(dataset):\n", - " frame = dataset_pb2.Frame()\n", - " frame.ParseFromString(data.numpy())\n", - " if frame_idx == 4:\n", - " break\n", - " break\n", - "\n", - "\n", - "# for calibration in frame.context.camera_calibrations:\n", - "\n", - "frame.timestamp_micros, frame.images[0].pose_timestamp\n", - "# frame.images[0]\n", - "\n", - "frame_timestamp = TimePoint.from_us(frame.timestamp_micros)\n", - "image_timestamp = TimePoint.from_s(frame.images[0].pose_timestamp)\n", - "frame_timestamp.time_s, frame_timestamp.time_s" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "d123", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.11" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/pyproject.toml b/pyproject.toml index d5419010..655a2612 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,58 +12,50 @@ classifiers = [ "Programming Language :: Python :: 3.9", "License :: OSI Approved :: Apache Software License", ] -name = "d123" -version = "v0.0.6" +name = "py123d" +version = "v0.0.7" authors = [{ name = "Daniel Dauner", email = "daniel.dauner@gmail.com" }] description = "TODO" readme = "README.md" requires-python = ">=3.9" license = {text = "Apache-2.0"} dependencies = [ - "aioboto3", - "aiofiles", "bokeh", - "casadi", - "control", - "Fiona", "geopandas", - "guppy3", "joblib", "matplotlib", - "nest_asyncio", "numpy", "opencv-python", "pandas", "Pillow", "psutil", "pyarrow", - "pyinstrument", "pyogrio", "pyquaternion", "pytest", "rasterio", "ray", - "retry", "rtree", "scipy", - "selenium", "setuptools", "shapely>=2.0.0", - "SQLAlchemy==1.4.27", - "sympy", - "tornado", "tqdm", - "ujson", "notebook", "pre-commit", - "cachetools", "hydra_colorlog", "hydra-core", "lxml", "trimesh", "viser", + "laspy[lazrs]", + "DracoPy", ] +[project.scripts] +py123d-viser = "py123d.script.run_viser:main" +py123d-conversion = "py123d.script.run_conversion:main" + + [project.optional-dependencies] dev = [ "black", @@ -76,24 +68,54 @@ docs = [ "sphinx-rtd-theme", "sphinx-autobuild", "myst-parser", + "sphinx-copybutton", + "furo", ] nuplan = [ "nuplan-devkit @ git+https://github.com/motional/nuplan-devkit/@nuplan-devkit-v1.2", + "ujson", + "tornado", + "sympy", + "SQLAlchemy==1.4.27", + "selenium", + "nest_asyncio", + "cachetools", + "aioboto3", + "aiofiles", + "casadi", + "control", + "pyinstrument", + "Fiona", + "guppy3", + "retry", +] +nuscenes = [ + "lanelet2", + "nuscenes-devkit==1.2.0", +] +nuscenes_expanded = [ + "nuscenes-devkit==1.2.0", + "pycocotools==2.0.10", + "laspy==2.6.1", + "embreex==2.17.7.post6", + "lanelet2==1.2.2", + "protobuf==4.25.3", + "pycollada==0.9.2", + "vhacdx==0.0.8.post2", + "yourdfpy==0.0.58", ] waymo = [ - "tensorflow==2.11.0", - "waymo-open-dataset-tf-2-11-0", - "intervaltree", + "protobuf==4.21.0", + "tensorflow==2.13.0", + "waymo-open-dataset-tf-2-12-0==1.6.6", ] -av2 = [ - "av2==0.2.1", +ffmpeg = [ + "imageio[ffmpeg]", ] [tool.setuptools.packages.find] -where = ["."] -include = ["d123*"] # Only include d123 package -exclude = ["notebooks*", "tests*", "docs*"] # Explicitly exclude notebooks +where = ["src"] [project.urls] -"Homepage" = "https://github.com/DanielDauner/d123" -"Bug Tracker" = "https://github.com/DanielDauner/d123/issues" +"Homepage" = "https://github.com/DanielDauner/py123d" +"Bug Tracker" = "https://github.com/DanielDauner/py123d/issues" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 4c17c209..00000000 --- a/requirements.txt +++ /dev/null @@ -1,58 +0,0 @@ -# nuplan-devkit @ git+https://github.com/motional/nuplan-devkit/@nuplan-devkit-v1.2 - -# # # nuplan requirements -# aioboto3 -# aiofiles -# bokeh -# casadi -# control -# Fiona -# geopandas -# guppy3 -# joblib -# matplotlib -# nest_asyncio -# numpy -# opencv-python -# pandas -# Pillow -# psutil -# pyarrow -# pyinstrument -# pyogrio -# pyquaternion -# pytest -# rasterio -# ray -# retry -# rtree -# scipy -# selenium -# setuptools -# shapely>=2.0.0 -# SQLAlchemy==1.4.27 -# sympy -# tornado -# tqdm -# ujson -# notebook -# pre-commit -# cachetools - -# # hydra -# hydra_colorlog -# hydra-core - -# # d123 -# lxml -# trimesh -# viser -# gym==0.17.2 -# gymnasium==0.26.3 - -# # torch & lighting -# torch==2.6.0 -# torchvision==0.21.0 -# lightning -# tensorboard -# protobuf==4.25.3 diff --git a/scripts/conversion/av2_sensor_conversion.sh b/scripts/conversion/av2_sensor_conversion.sh new file mode 100644 index 00000000..b386972e --- /dev/null +++ b/scripts/conversion/av2_sensor_conversion.sh @@ -0,0 +1,2 @@ +py123d-conversion datasets=["av2_sensor_dataset"] \ +dataset_paths.av2_data_root="/media/nvme1/argoverse" diff --git a/scripts/conversion/kitti360_conversion.sh b/scripts/conversion/kitti360_conversion.sh new file mode 100644 index 00000000..1e939ad5 --- /dev/null +++ b/scripts/conversion/kitti360_conversion.sh @@ -0,0 +1,3 @@ +export KITTI360_DATA_ROOT="/home/daniel/kitti_360/KITTI-360" + +py123d-conversion datasets=["kitti360_dataset"] map_writer.remap_ids=true diff --git a/scripts/conversion/nuplan_mini_conversion.sh b/scripts/conversion/nuplan_mini_conversion.sh new file mode 100644 index 00000000..13ec7a53 --- /dev/null +++ b/scripts/conversion/nuplan_mini_conversion.sh @@ -0,0 +1 @@ +py123d-conversion datasets=["nuplan_mini_dataset"] diff --git a/scripts/conversion/nuscenes_mini_conversion.sh b/scripts/conversion/nuscenes_mini_conversion.sh new file mode 100644 index 00000000..b9a9a7d1 --- /dev/null +++ b/scripts/conversion/nuscenes_mini_conversion.sh @@ -0,0 +1,3 @@ +export NUSCENES_DATA_ROOT="/home/daniel/nuscenes_mini/" + +py123d-conversion datasets=["nuscenes_mini_dataset"] map_writer.remap_ids=true diff --git a/scripts/conversion/pandaset_conversion.sh b/scripts/conversion/pandaset_conversion.sh new file mode 100644 index 00000000..0131e60f --- /dev/null +++ b/scripts/conversion/pandaset_conversion.sh @@ -0,0 +1 @@ +py123d-conversion datasets=[pandaset_dataset] diff --git a/scripts/conversion/wopd_conversion.sh b/scripts/conversion/wopd_conversion.sh new file mode 100644 index 00000000..e4f4b3d4 --- /dev/null +++ b/scripts/conversion/wopd_conversion.sh @@ -0,0 +1,6 @@ +py123d-conversion datasets=[wopd_dataset] + + +# pip install protobuf==6.30.2 +# pip install tensorflow==2.13.0 +# pip install waymo-open-dataset-tf-2-12-0==1.6.6 diff --git a/scripts/dataset/run_log_caching.sh b/scripts/dataset/run_log_caching.sh deleted file mode 100644 index 34ff923c..00000000 --- a/scripts/dataset/run_log_caching.sh +++ /dev/null @@ -1,7 +0,0 @@ - - - -python $D123_DEVKIT_ROOT/d123/script/run_dataset_conversion.py \ -experiment_name="caching" \ -worker.threads_per_node=32 -# worker=single_machine_thread_pool diff --git a/scripts/download/download_kitti_360.sh b/scripts/download/download_kitti_360.sh new file mode 100644 index 00000000..1cb3e540 --- /dev/null +++ b/scripts/download/download_kitti_360.sh @@ -0,0 +1,86 @@ +# 2D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Fisheye Images (355G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_fisheye.zip + +# Fisheye Calibration Images (11G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_fisheye_calibration.zip + + +# Perspective Images for Train & Val (128G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_perspective.zip + +# Test Semantic (1.5G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test.zip + +# Test NVS 50% Drop (0.3G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop50.zip + +# est NVS 90% Drop (0.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop90.zip + +# Test SLAM (14G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test_slam.zip + + +# Semantics of Left Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics.zip + +# Semantics of Right Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics_image_01.zip + + +# Confidence of Left Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence.zip + +# Confidence of Right Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence_image_01.zip + + + +# 3D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Raw Velodyne Scans (119G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_velodyne.zip + +# Test SLAM (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_3d_raw/data_3d_test_slam.zip + +# Test Completion (35M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_ssc_test.zip + + +# Raw SICK Scans (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_sick.zip + + +# Accumulated Point Clouds for Train & Val (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics.zip + +# Test Semantic (1.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics_test.zip + + +# 3D Bounding Boxes (30M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ffa164387078f48a20f0188aa31b0384bb19ce60/data_3d_bboxes.zip + + + +# Calibrations & Poses +# ---------------------------------------------------------------------------------------------------------------------- + +# Calibrations (3K) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/384509ed5413ccc81328cf8c55cc6af078b8c444/calibration.zip + + +# Vechicle Poses (8.9M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses.zip + + +# OXTS Sync Measurements (37.3M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts.zip + +# OXTS Raw Measurements (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts_extract.zip diff --git a/scripts/install/install_smart.sh b/scripts/install/install_smart.sh deleted file mode 100644 index 0cc90c0f..00000000 --- a/scripts/install/install_smart.sh +++ /dev/null @@ -1,17 +0,0 @@ -wget https://data.pyg.org/whl/torch-2.6.0%2Bcu124/torch_cluster-1.6.3+pt26cu124-cp312-cp312-linux_x86_64.whl -python3 -m pip install torch_cluster-1.6.3+pt26cu124-cp312-cp312-linux_x86_64.whl -rm torch_cluster-1.6.3+pt26cu124-cp312-cp312-linux_x86_64.whl - -wget https://data.pyg.org/whl/torch-2.6.0%2Bcu124/torch_scatter-2.1.2%2Bpt26cu124-cp312-cp312-linux_x86_64.whl -python3 -m pip install torch_scatter-2.1.2+pt26cu124-cp312-cp312-linux_x86_64.whl -rm torch_scatter-2.1.2+pt26cu124-cp312-cp312-linux_x86_64.whl - -wget https://data.pyg.org/whl/torch-2.6.0%2Bcu124/torch_sparse-0.6.18+pt26cu124-cp312-cp312-linux_x86_64.whl -python3 -m pip install torch_sparse-0.6.18+pt26cu124-cp312-cp312-linux_x86_64.whl -rm torch_sparse-0.6.18+pt26cu124-cp312-cp312-linux_x86_64.whl - -wget https://data.pyg.org/whl/torch-2.6.0%2Bcu124/torch_spline_conv-1.2.2%2Bpt26cu124-cp312-cp312-linux_x86_64.whl` -python3 -m pip install torch_spline_conv-1.2.2+pt26cu124-cp312-cp312-linux_x86_64.whl -rm torch_spline_conv-1.2.2+pt26cu124-cp312-cp312-linux_x86_64.whl - -pip install torch_geometric diff --git a/scripts/preprocessing/preprocess_smart.sh b/scripts/preprocessing/preprocess_smart.sh deleted file mode 100644 index c4eb615f..00000000 --- a/scripts/preprocessing/preprocess_smart.sh +++ /dev/null @@ -1,17 +0,0 @@ - - -CACHE_PATH=/home/daniel/cache_test - - -python $D123_DEVKIT_ROOT/d123/script/run_preprocessing.py \ -experiment_name="smart_preprocessing" \ -scene_filter="nuplan_mini_train" \ -scene_filter.max_num_scenes=1000 \ -cache_path="${CACHE_PATH}/training" - - -python $D123_DEVKIT_ROOT/d123/script/run_preprocessing.py \ -experiment_name="smart_preprocessing" \ -scene_filter="nuplan_mini_val" \ -scene_filter.max_num_scenes=1000 \ -cache_path="${CACHE_PATH}/validation" diff --git a/scripts/simulation/run_sim_agents.sh b/scripts/simulation/run_sim_agents.sh deleted file mode 100644 index 8f245e04..00000000 --- a/scripts/simulation/run_sim_agents.sh +++ /dev/null @@ -1,7 +0,0 @@ -# nuplan_sim_agent_mini - - -python $D123_DEVKIT_ROOT/d123/script/run_simulation.py \ -scene_filter="nuplan_sim_agent" \ -experiment_name="sim_agent" \ -# worker=single_machine_thread_pool diff --git a/scripts/training/train_smart.sh b/scripts/training/train_smart.sh deleted file mode 100644 index 3b9fbedb..00000000 --- a/scripts/training/train_smart.sh +++ /dev/null @@ -1,6 +0,0 @@ - - - -python $D123_DEVKIT_ROOT/d123/script/run_training.py \ -experiment_name="smart_forecasting" \ -# worker=single_machine_thread_pool diff --git a/scripts/viz/run_viser.sh b/scripts/viz/run_viser.sh index 0dafc0a3..436e7643 100644 --- a/scripts/viz/run_viser.sh +++ b/scripts/viz/run_viser.sh @@ -1,6 +1,6 @@ -python $D123_DEVKIT_ROOT/d123/script/run_viser.py \ +py123d-viser \ scene_filter=log_scenes \ scene_filter.shuffle=True \ worker=sequential diff --git a/src/py123d/__init__.py b/src/py123d/__init__.py new file mode 100644 index 00000000..6526deb4 --- /dev/null +++ b/src/py123d/__init__.py @@ -0,0 +1 @@ +__version__ = "0.0.7" diff --git a/d123/common/__init__.py b/src/py123d/common/__init__.py similarity index 100% rename from d123/common/__init__.py rename to src/py123d/common/__init__.py diff --git a/d123/common/multithreading/ray_execution.py b/src/py123d/common/multithreading/ray_execution.py similarity index 98% rename from d123/common/multithreading/ray_execution.py rename to src/py123d/common/multithreading/ray_execution.py index 4e44c56f..2ca1d472 100644 --- a/d123/common/multithreading/ray_execution.py +++ b/src/py123d/common/multithreading/ray_execution.py @@ -10,7 +10,7 @@ from ray.remote_function import RemoteFunction from tqdm import tqdm -from d123.common.multithreading.worker_pool import Task +from py123d.common.multithreading.worker_pool import Task def _ray_object_iterator(initial_ids: List[ray.ObjectRef]) -> Iterator[Tuple[ray.ObjectRef, Any]]: diff --git a/d123/common/multithreading/worker_parallel.py b/src/py123d/common/multithreading/worker_parallel.py similarity index 97% rename from d123/common/multithreading/worker_parallel.py rename to src/py123d/common/multithreading/worker_parallel.py index 737bd486..9183a0fc 100644 --- a/d123/common/multithreading/worker_parallel.py +++ b/src/py123d/common/multithreading/worker_parallel.py @@ -6,7 +6,7 @@ from tqdm import tqdm -from d123.common.multithreading.worker_pool import ( +from py123d.common.multithreading.worker_pool import ( Task, WorkerPool, WorkerResources, diff --git a/d123/common/multithreading/worker_pool.py b/src/py123d/common/multithreading/worker_pool.py similarity index 100% rename from d123/common/multithreading/worker_pool.py rename to src/py123d/common/multithreading/worker_pool.py diff --git a/d123/common/multithreading/worker_ray.py b/src/py123d/common/multithreading/worker_ray.py similarity index 97% rename from d123/common/multithreading/worker_ray.py rename to src/py123d/common/multithreading/worker_ray.py index 8bb12b04..48b06f77 100644 --- a/d123/common/multithreading/worker_ray.py +++ b/src/py123d/common/multithreading/worker_ray.py @@ -7,8 +7,8 @@ import ray from psutil import cpu_count -from d123.common.multithreading.ray_execution import ray_map -from d123.common.multithreading.worker_pool import Task, WorkerPool, WorkerResources +from py123d.common.multithreading.ray_execution import ray_map +from py123d.common.multithreading.worker_pool import Task, WorkerPool, WorkerResources logger = logging.getLogger(__name__) diff --git a/d123/common/multithreading/worker_sequential.py b/src/py123d/common/multithreading/worker_sequential.py similarity index 96% rename from d123/common/multithreading/worker_sequential.py rename to src/py123d/common/multithreading/worker_sequential.py index e3d436cb..c0106e86 100644 --- a/d123/common/multithreading/worker_sequential.py +++ b/src/py123d/common/multithreading/worker_sequential.py @@ -4,7 +4,7 @@ from tqdm import tqdm -from d123.common.multithreading.worker_pool import ( +from py123d.common.multithreading.worker_pool import ( Task, WorkerPool, WorkerResources, diff --git a/d123/common/multithreading/worker_utils.py b/src/py123d/common/multithreading/worker_utils.py similarity index 95% rename from d123/common/multithreading/worker_utils.py rename to src/py123d/common/multithreading/worker_utils.py index fbe0a753..ce79d6df 100644 --- a/d123/common/multithreading/worker_utils.py +++ b/src/py123d/common/multithreading/worker_utils.py @@ -3,7 +3,7 @@ import numpy as np from psutil import cpu_count -from d123.common.multithreading.worker_pool import Task, WorkerPool +from py123d.common.multithreading.worker_pool import Task, WorkerPool def chunk_list(input_list: List[Any], num_chunks: Optional[int] = None) -> List[List[Any]]: diff --git a/d123/common/datatypes/detection/__init__.py b/src/py123d/common/utils/__init__.py similarity index 100% rename from d123/common/datatypes/detection/__init__.py rename to src/py123d/common/utils/__init__.py diff --git a/src/py123d/common/utils/arrow_helper.py b/src/py123d/common/utils/arrow_helper.py new file mode 100644 index 00000000..7e1b8e47 --- /dev/null +++ b/src/py123d/common/utils/arrow_helper.py @@ -0,0 +1,45 @@ +from functools import lru_cache +from pathlib import Path +from typing import Final, Union + +import pyarrow as pa + +# TODO: Tune Parameters and add to config? +MAX_LRU_CACHED_TABLES: Final[int] = 4096 + + +def open_arrow_table(arrow_file_path: Union[str, Path]) -> pa.Table: + with pa.memory_map(str(arrow_file_path), "rb") as source: + table: pa.Table = pa.ipc.open_file(source).read_all() + return table + + +def write_arrow_table(table: pa.Table, arrow_file_path: Union[str, Path]) -> None: + # compression: Optional[Literal["lz4", "zstd"]] = "lz4" + # codec = pa.Codec("zstd", compression_level=100) if compression is not None else None + # options = pa.ipc.IpcWriteOptions(compression=codec) + with pa.OSFile(str(arrow_file_path), "wb") as sink: + # with pa.ipc.new_file(sink, table.schema, options=options) as writer: + with pa.ipc.new_file(sink, table.schema) as writer: + writer.write_table(table) + + +@lru_cache(maxsize=MAX_LRU_CACHED_TABLES) +def get_lru_cached_arrow_table(arrow_file_path: Union[str, Path]) -> pa.Table: + """Get a memory-mapped arrow table from the LRU cache or load it from disk. + + :param arrow_file_path: The path to the arrow file. + :return: The cached memory-mapped arrow table. + """ + + # NOTE @DanielDauner: The number of memory maps that a process can have is limited by the + # linux kernel parameter /proc/sys/vm/max_map_count (default: 65530 in most distributions). + # Thus, we cache memory-mapped arrow tables with an LRU cache to avoid + # hitting this limit, specifically since many scenes/routines access the same table. + # During cache eviction, the functools implementation calls __del__ on the + # evicted cache entry, which closes the memory map, if no other references to the table exist. + # Thus it is beneficial to keep track of all references to the table, otherwise the memory map + # will not be closed and the limit can still be hit. + # Not fully satisfied with this solution. Please reach out if you have a better idea. + + return open_arrow_table(str(arrow_file_path)) diff --git a/src/py123d/common/utils/dependencies.py b/src/py123d/common/utils/dependencies.py new file mode 100644 index 00000000..38d7a88f --- /dev/null +++ b/src/py123d/common/utils/dependencies.py @@ -0,0 +1,18 @@ +from typing import List, Union + + +def check_dependencies(modules: Union[str, List[str,]], optional_name: str) -> None: + """Checks if the given modules can be imported, otherwise raises an ImportError with a message + + :param modules: Module name or list of module names to check + :param optional_name: Name of the optional feature + :raises ImportError: If any of the modules cannot be imported + """ + modules = modules if isinstance(modules, list) else [modules] + for module in modules: + try: + __import__(module) + except ImportError: + raise ImportError( + f"Missing '{module}'. Install with: `pip install py123d[{optional_name}]` or `pip install -e .[{optional_name}]`" + ) diff --git a/d123/common/utils/enums.py b/src/py123d/common/utils/enums.py similarity index 63% rename from d123/common/utils/enums.py rename to src/py123d/common/utils/enums.py index 33300c00..9f7d233e 100644 --- a/d123/common/utils/enums.py +++ b/src/py123d/common/utils/enums.py @@ -2,6 +2,8 @@ from enum import IntEnum +from pyparsing import Union + class classproperty(object): def __init__(self, f): @@ -27,3 +29,15 @@ def deserialize(cls, key: str) -> SerialIntEnum: def from_int(cls, value: int) -> SerialIntEnum: """Get the enum from an int.""" return cls(value) + + @classmethod + def from_arbitrary(cls, value: Union[int, str, SerialIntEnum]) -> SerialIntEnum: + """Get the enum from an int, string, or enum instance.""" + if isinstance(value, cls): + return value + elif isinstance(value, int): + return cls.from_int(value) + elif isinstance(value, str): + return cls.deserialize(value) + else: + raise ValueError(f"Invalid value for {cls.__name__}: {value}") diff --git a/src/py123d/common/utils/mixin.py b/src/py123d/common/utils/mixin.py new file mode 100644 index 00000000..2935f99f --- /dev/null +++ b/src/py123d/common/utils/mixin.py @@ -0,0 +1,60 @@ +from __future__ import annotations + +import numpy as np +import numpy.typing as npt + +# import pyarrow as pa + + +class ArrayMixin: + """Mixin class for object entities.""" + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> ArrayMixin: + """Create an instance from a NumPy array.""" + raise NotImplementedError + + @classmethod + def from_list(cls, values: list) -> ArrayMixin: + """Create an instance from a list of values.""" + return cls.from_array(np.array(values, dtype=np.float64), copy=False) + + @property + def array(self) -> npt.NDArray[np.float64]: + """The array representation of the geometric entity.""" + raise NotImplementedError + + def __array__(self, dtype: npt.DtypeLike = None, copy: bool = False) -> npt.NDArray: + array = self.array + return array if dtype is None else array.astype(dtype=dtype, copy=copy) + + def __len__(self) -> int: + """Return the length of the array.""" + return len(self.array) + + def __getitem__(self, key): + """Allow indexing into the array.""" + return self.array[key] + + def __eq__(self, other) -> bool: + """Equality comparison based on array values.""" + if isinstance(other, ArrayMixin): + return np.array_equal(self.array, other.array) + return False + + @property + def shape(self) -> tuple: + """Return the shape of the array.""" + return self.array.shape + + def tolist(self) -> list: + """Convert the array to a Python list.""" + return self.array.tolist() + + def copy(self) -> ArrayMixin: + """Return a copy of the object with a copied array.""" + return self.__class__.from_array(self.array, copy=True) + + def __repr__(self) -> str: + """String representation of the ArrayMixin instance.""" + return f"{self.__class__.__name__}(array={self.array})" diff --git a/d123/common/utils/timer.py b/src/py123d/common/utils/timer.py similarity index 90% rename from d123/common/utils/timer.py rename to src/py123d/common/utils/timer.py index adcc9752..17558977 100644 --- a/d123/common/utils/timer.py +++ b/src/py123d/common/utils/timer.py @@ -55,30 +55,21 @@ def end(self) -> None: self._time_logs[self._end_key].append(time.perf_counter() - self._start_time) - def stats(self, verbose: bool = True) -> Optional[pd.DataFrame]: + def to_pandas(self) -> Optional[pd.DataFrame]: """ Returns a DataFrame with statistics of the logged times. - :param verbose: whether to print the timings, defaults to True - :return: pandas dataframe.F + :return: pandas dataframe. """ statistics = {} - for key, timings in self._time_logs.items(): - timings_array = np.array(timings) timings_statistics = {} - for name, function in self._statistic_functions.items(): timings_statistics[name] = function(timings_array) - statistics[key] = timings_statistics - dataframe = pd.DataFrame.from_dict(statistics).transpose() - if verbose: - print(dataframe.to_string()) - return dataframe def info(self) -> Dict[str, float]: @@ -96,3 +87,8 @@ def flush(self) -> None: self._time_logs: Dict[str, List[float]] = {} self._start_time: Optional[float] = None self._iteration_time: Optional[float] = None + + def __str__(self) -> str: + """String representation of the Timer.""" + dataframe = self.to_pandas() + return dataframe.to_string() if dataframe is not None else "No timings logged" diff --git a/src/py123d/common/utils/uuid_utils.py b/src/py123d/common/utils/uuid_utils.py new file mode 100644 index 00000000..d4d2678a --- /dev/null +++ b/src/py123d/common/utils/uuid_utils.py @@ -0,0 +1,25 @@ +import uuid +from typing import Final + +# Fixed namespace UUID for all UUIDs generated by 123D, do not change! +UUID_NAMESPACE_123D: Final[uuid.UUID] = uuid.UUID("123D123D-123D-123D-123D-123D123D123D") + + +def create_deterministic_uuid(split: str, log_name: str, timestamp_us: int, misc: str = None) -> uuid.UUID: + """Create a universally unique identifier (UUID) based on identifying fields. + + :param split: The data split (in the format {dataset_name}_{train, val, test}) + :param log_name: The name of the log without file extension + :param timestamp_us: The timestamp in microseconds + :param misc: Any additional information to include in the UUID, defaults to None + :return: The generated deterministic UUID + """ + # https://en.wikipedia.org/wiki/Universally_unique_identifier#Versions_3_and_5_(namespace_name-based) + + # Create a unique string from all identifying fields + unique_string = f"{split}:{log_name}:{timestamp_us}" + if misc: + unique_string += f":{misc}" + + # Generate UUIDv5 (SHA-1 based, deterministic) + return uuid.uuid5(UUID_NAMESPACE_123D, unique_string) diff --git a/d123/common/datatypes/recording/__init__.py b/src/py123d/conversion/__init__.py similarity index 100% rename from d123/common/datatypes/recording/__init__.py rename to src/py123d/conversion/__init__.py diff --git a/src/py123d/conversion/abstract_dataset_converter.py b/src/py123d/conversion/abstract_dataset_converter.py new file mode 100644 index 00000000..25052da9 --- /dev/null +++ b/src/py123d/conversion/abstract_dataset_converter.py @@ -0,0 +1,40 @@ +import abc + +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter + + +class AbstractDatasetConverter(abc.ABC): + """Abstract base class for dataset converters. + + A dataset converter for implementing all dataset specific conversion logic. + + """ + + def __init__(self, dataset_converter_config: DatasetConverterConfig) -> None: + self.dataset_converter_config = dataset_converter_config + + @abc.abstractmethod + def get_number_of_maps(self) -> int: + """Returns the number of available raw data maps for conversion.""" + + @abc.abstractmethod + def get_number_of_logs(self) -> int: + """Returns the number of available raw data logs for conversion.""" + + @abc.abstractmethod + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """ + Convert a single map in raw data format to the uniform 123D format. + :param map_index: The index of the map to convert. + :param map_writer: The map writer to use for writing the converted map. + """ + + @abc.abstractmethod + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """ + Convert a single log in raw data format to the uniform 123D format. + :param log_index: The index of the log to convert. + :param log_writer: The log writer to use for writing the converted log. + """ diff --git a/src/py123d/conversion/dataset_converter_config.py b/src/py123d/conversion/dataset_converter_config.py new file mode 100644 index 00000000..f9264cd7 --- /dev/null +++ b/src/py123d/conversion/dataset_converter_config.py @@ -0,0 +1,55 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Literal + + +@dataclass +class DatasetConverterConfig: + + force_log_conversion: bool = False + force_map_conversion: bool = False + + # Map + include_map: bool = False + + # Ego + include_ego: bool = False + + # Box Detections + include_box_detections: bool = False + include_box_lidar_points: bool = False + + # Traffic Lights + include_traffic_lights: bool = False + + # Pinhole Cameras + include_pinhole_cameras: bool = False + pinhole_camera_store_option: Literal["path", "binary", "mp4"] = "path" + + # Fisheye MEI Cameras + include_fisheye_mei_cameras: bool = False + fisheye_mei_camera_store_option: Literal["path", "binary", "mp4"] = "path" + + # LiDARs + include_lidars: bool = False + lidar_store_option: Literal["path", "path_merged", "binary"] = "path" + + # Scenario tag / Route + # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + include_scenario_tags: bool = False + include_route: bool = False + + def __post_init__(self): + + assert self.pinhole_camera_store_option in [ + "path", + "binary", + "mp4", + ], f"Invalid camera store option, got {self.pinhole_camera_store_option}." + + assert self.lidar_store_option in [ + "path", + "path_merged", + "binary", + ], f"Invalid LiDAR store option, got {self.lidar_store_option}." diff --git a/d123/common/datatypes/time/__init__.py b/src/py123d/conversion/datasets/__init__.py similarity index 100% rename from d123/common/datatypes/time/__init__.py rename to src/py123d/conversion/datasets/__init__.py diff --git a/d123/dataset/dataset_specific/av2/av2_map_conversion.py b/src/py123d/conversion/datasets/av2/av2_map_conversion.py similarity index 55% rename from d123/dataset/dataset_specific/av2/av2_map_conversion.py rename to src/py123d/conversion/datasets/av2/av2_map_conversion.py index 390cbdb2..a55a9cf4 100644 --- a/d123/dataset/dataset_specific/av2/av2_map_conversion.py +++ b/src/py123d/conversion/datasets/av2/av2_map_conversion.py @@ -1,24 +1,31 @@ -import warnings +import json from pathlib import Path from typing import Any, Dict, Final, List import geopandas as gpd import numpy as np import numpy.typing as npt -import pandas as pd import shapely import shapely.geometry as geom -from flask import json - -from d123.common.geometry.base import Point3DIndex -from d123.common.geometry.line.polylines import Polyline2D, Polyline3D -from d123.common.geometry.occupancy_map import OccupancyMap2D -from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import split_line_geometry_by_max_length -from d123.dataset.conversion.map.road_edge.road_edge_3d_utils import ( - get_road_edges_3d_from_generic_drivable_area_df, + +from py123d.conversion.datasets.av2.utils.av2_constants import AV2_ROAD_LINE_TYPE_MAPPING +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCrosswalk, + CacheGenericDrivable, + CacheIntersection, + CacheLane, + CacheLaneGroup, + CacheRoadEdge, + CacheRoadLine, ) -from d123.dataset.dataset_specific.av2.av2_constants import AV2_ROAD_LINE_TYPE_MAPPING -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType +from py123d.datatypes.maps.map_datatypes import RoadEdgeType +from py123d.geometry import OccupancyMap2D, Point3DIndex, Polyline2D, Polyline3D LANE_GROUP_MARK_TYPES: List[str] = [ "DASHED_WHITE", @@ -30,7 +37,7 @@ MAX_ROAD_EDGE_LENGTH: Final[float] = 100.0 # TODO: Add to config -def convert_av2_map(source_log_path: Path, map_file_path: Path) -> None: +def convert_av2_map(source_log_path: Path, map_writer: AbstractMapWriter) -> None: def _extract_polyline(data: List[Dict[str, float]], close: bool = False) -> Polyline3D: polyline = np.array([[p["x"], p["y"], p["z"]] for p in data], dtype=np.float64) @@ -46,7 +53,6 @@ def _extract_polyline(data: List[Dict[str, float]], close: bool = False) -> Poly log_map_archive = json.load(f) drivable_areas: Dict[int, Polyline3D] = {} - for drivable_area_id, drivable_area_dict in log_map_archive["drivable_areas"].items(): # keys: ["area_boundary", "id"] drivable_areas[int(drivable_area_id)] = _extract_polyline(drivable_area_dict["area_boundary"], close=True) @@ -79,52 +85,16 @@ def _extract_polyline(data: List[Dict[str, float]], close: bool = False) -> Poly lane_group_dict = _extract_lane_group_dict(log_map_archive["lane_segments"]) intersection_dict = _extract_intersection_dict(log_map_archive["lane_segments"], lane_group_dict) - dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} - - dataframes[MapLayer.LANE] = get_lane_df(log_map_archive["lane_segments"]) - dataframes[MapLayer.LANE_GROUP] = get_lane_group_df(lane_group_dict) - dataframes[MapLayer.INTERSECTION] = get_intersections_df(intersection_dict) - dataframes[MapLayer.CROSSWALK] = get_crosswalk_df(log_map_archive["pedestrian_crossings"]) - dataframes[MapLayer.GENERIC_DRIVABLE] = get_generic_drivable_df(drivable_areas) - dataframes[MapLayer.ROAD_EDGE] = get_road_edge_df(dataframes[MapLayer.GENERIC_DRIVABLE]) - dataframes[MapLayer.ROAD_LINE] = get_road_line_df(log_map_archive["lane_segments"]) - # NOTE: AV2 does not provide walkways or carparks, so we create an empty DataFrame. - dataframes[MapLayer.WALKWAY] = get_empty_gdf() - dataframes[MapLayer.CARPARK] = get_empty_gdf() - - map_file_path.unlink(missing_ok=True) - if not map_file_path.parent.exists(): - map_file_path.parent.mkdir(parents=True, exist_ok=True) - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", message="'crs' was not provided") - for layer, gdf in dataframes.items(): - gdf.to_file(map_file_path, layer=layer.serialize(), driver="GPKG", mode="a") - - -def get_empty_gdf() -> gpd.GeoDataFrame: - ids = [] - outlines = [] - geometries = [] - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_lane_df(lanes: Dict[int, Any]) -> gpd.GeoDataFrame: - - ids = [int(lane_id) for lane_id in lanes.keys()] - lane_types = [0] * len(ids) # TODO: Add lane types - lane_group_ids = [] - speed_limits_mps = [] - predecessor_ids = [] - successor_ids = [] - left_boundaries = [] - right_boundaries = [] - left_lane_ids = [] - right_lane_ids = [] - baseline_paths = [] - geometries = [] + _write_av2_lanes(log_map_archive["lane_segments"], map_writer) + _write_av2_lane_group(lane_group_dict, map_writer) + _write_av2_intersections(intersection_dict, map_writer) + _write_av2_crosswalks(log_map_archive["pedestrian_crossings"], map_writer) + _write_av2_generic_drivable(drivable_areas, map_writer) + _write_av2_road_edge(drivable_areas, map_writer) + _write_av2_road_lines(log_map_archive["lane_segments"], map_writer) + + +def _write_av2_lanes(lanes: Dict[int, Any], map_writer: AbstractMapWriter) -> None: def _get_centerline_from_boundaries( left_boundary: Polyline3D, right_boundary: Polyline3D, resolution: float = 0.1 @@ -138,206 +108,124 @@ def _get_centerline_from_boundaries( return Polyline3D.from_array(np.mean([right_array, left_array], axis=0)) for lane_id, lane_dict in lanes.items(): - # keys = [ - # "id", - # "is_intersection", - # "lane_type", - # "left_lane_boundary", - # "left_lane_mark_type", - # "right_lane_boundary", - # "right_lane_mark_type", - # "successors", - # "predecessors", - # "right_neighbor_id", - # "left_neighbor_id", - # ] lane_centerline = _get_centerline_from_boundaries( left_boundary=lane_dict["left_lane_boundary"], right_boundary=lane_dict["right_lane_boundary"], ) - lane_speed_limit_mps = None # TODO: Consider using geo reference to retrieve speed limits. - lane_group_ids.append(lane_id) - speed_limits_mps.append(lane_speed_limit_mps) - predecessor_ids.append(lane_dict["predecessors"]) - successor_ids.append(lane_dict["successors"]) - left_boundaries.append(lane_dict["left_lane_boundary"].linestring) - right_boundaries.append(lane_dict["right_lane_boundary"].linestring) - left_lane_ids.append(lane_dict["left_neighbor_id"]) - right_lane_ids.append(lane_dict["right_neighbor_id"]) - baseline_paths.append(lane_centerline.linestring) - - geometry = geom.Polygon( - np.vstack( - [ - lane_dict["left_lane_boundary"].array[:, :2], - lane_dict["right_lane_boundary"].array[:, :2][::-1], - ] + + # NOTE @DanielDauner: Some neighbor lane IDs might not be present in the dataset. + left_lane_id = lane_dict["left_neighbor_id"] if lane_dict["left_neighbor_id"] in lanes else None + right_lane_id = lane_dict["right_neighbor_id"] if lane_dict["right_neighbor_id"] in lanes else None + + map_writer.write_lane( + CacheLane( + object_id=lane_id, + lane_group_id=lane_dict["lane_group_id"], + left_boundary=lane_dict["left_lane_boundary"], + right_boundary=lane_dict["right_lane_boundary"], + centerline=lane_centerline, + left_lane_id=left_lane_id, + right_lane_id=right_lane_id, + predecessor_ids=lane_dict["predecessors"], + successor_ids=lane_dict["successors"], + speed_limit_mps=None, + outline=None, # Inferred from boundaries + geometry=None, ) ) - geometries.append(geometry) - - data = pd.DataFrame( - { - "id": ids, - "lane_type": lane_types, - "lane_group_id": lane_group_ids, - "speed_limit_mps": speed_limits_mps, - "predecessor_ids": predecessor_ids, - "successor_ids": successor_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - "left_lane_id": left_lane_ids, - "right_lane_id": right_lane_ids, - "baseline_path": baseline_paths, - } - ) - - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf -def get_lane_group_df(lane_group_dict: Dict[int, Any]) -> gpd.GeoDataFrame: - - ids = list(lane_group_dict.keys()) - lane_ids = [] - intersection_ids = [] - predecessor_lane_group_ids = [] - successor_lane_group_ids = [] - left_boundaries = [] - right_boundaries = [] - geometries = [] +def _write_av2_lane_group(lane_group_dict: Dict[int, Any], map_writer: AbstractMapWriter) -> None: for lane_group_id, lane_group_values in lane_group_dict.items(): - lane_ids.append(lane_group_values["lane_ids"]) - intersection_ids.append(lane_group_values["intersection_id"]) - - predecessor_lane_group_ids.append(lane_group_values["predecessor_ids"]) - successor_lane_group_ids.append(lane_group_values["successor_ids"]) - left_boundaries.append(lane_group_values["left_boundary"].linestring) - right_boundaries.append(lane_group_values["right_boundary"].linestring) - geometry = geom.Polygon( - np.vstack( - [ - lane_group_values["left_boundary"].array[:, :2], - lane_group_values["right_boundary"].array[:, :2][::-1], - lane_group_values["left_boundary"].array[0, :2][None, ...], - ] + map_writer.write_lane_group( + CacheLaneGroup( + object_id=lane_group_id, + lane_ids=lane_group_values["lane_ids"], + left_boundary=lane_group_values["left_boundary"], + right_boundary=lane_group_values["right_boundary"], + intersection_id=lane_group_values["intersection_id"], + predecessor_ids=lane_group_values["predecessor_ids"], + successor_ids=lane_group_values["successor_ids"], + outline=None, + geometry=None, ) ) - geometries.append(geometry) - - data = pd.DataFrame( - { - "id": ids, - "lane_ids": lane_ids, - "intersection_id": intersection_ids, - "predecessor_ids": predecessor_lane_group_ids, - "successor_ids": successor_lane_group_ids, - "left_boundary": left_boundaries, - "right_boundary": right_boundaries, - } - ) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - -def get_intersections_df(intersection_dict: Dict[int, Any]) -> gpd.GeoDataFrame: - ids = [] - lane_group_ids = [] - outlines = [] - geometries = [] +def _write_av2_intersections(intersection_dict: Dict[int, Any], map_writer: AbstractMapWriter) -> None: for intersection_id, intersection_values in intersection_dict.items(): - ids.append(intersection_id) - lane_group_ids.append(intersection_values["lane_group_ids"]) - outlines.append(intersection_values["outline_3d"].linestring) - geometries.append(geom.Polygon(intersection_values["outline_3d"].array[:, Point3DIndex.XY])) - - data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_carpark_df(carparks) -> gpd.GeoDataFrame: - ids = list(carparks.keys()) - outlines = [geom.LineString(outline) for outline in carparks.values()] - geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in carparks.values()] - - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def get_walkway_df() -> gpd.GeoDataFrame: - ids = [] - geometries = [] - - # NOTE: WOPD does not provide walkways, so we create an empty DataFrame. - data = pd.DataFrame({"id": ids}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - + map_writer.write_intersection( + CacheIntersection( + object_id=intersection_id, + lane_group_ids=intersection_values["lane_group_ids"], + outline=intersection_values["outline_3d"], + ) + ) -def get_crosswalk_df(crosswalks: Dict[int, npt.NDArray[np.float64]]) -> gpd.GeoDataFrame: - ids = list(crosswalks.keys()) - outlines = [] - geometries = [] - for crosswalk_dict in crosswalks.values(): - outline = crosswalk_dict["outline"] - outlines.append(outline.linestring) - geometries.append(geom.Polygon(outline.array[:, Point3DIndex.XY])) - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf +def _write_av2_crosswalks(crosswalks: Dict[int, npt.NDArray[np.float64]], map_writer: AbstractMapWriter) -> None: + for cross_walk_id, crosswalk_dict in crosswalks.items(): + map_writer.write_crosswalk( + CacheCrosswalk( + object_id=cross_walk_id, + outline=crosswalk_dict["outline"], + ) + ) -def get_generic_drivable_df(drivable_areas: Dict[int, Polyline3D]) -> gpd.GeoDataFrame: - ids = list(drivable_areas.keys()) - outlines = [drivable_area.linestring for drivable_area in drivable_areas.values()] - geometries = [geom.Polygon(drivable_area.array[:, Point3DIndex.XY]) for drivable_area in drivable_areas.values()] +def _write_av2_generic_drivable(drivable_areas: Dict[int, Polyline3D], map_writer: AbstractMapWriter) -> None: + for drivable_area_id, drivable_area_outline in drivable_areas.items(): + map_writer.write_generic_drivable( + CacheGenericDrivable( + object_id=drivable_area_id, + outline=drivable_area_outline, + ) + ) - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf +def _write_av2_road_edge(drivable_areas: Dict[int, Polyline3D], map_writer: AbstractMapWriter) -> None: -def get_road_edge_df(generic_drivable_area_df: gpd.GeoDataFrame) -> gpd.GeoDataFrame: - road_edges = get_road_edges_3d_from_generic_drivable_area_df(generic_drivable_area_df) - road_edges = split_line_geometry_by_max_length(road_edges, MAX_ROAD_EDGE_LENGTH) + # NOTE @DanielDauner: We merge all drivable areas in 2D and lift the outlines to 3D. + # Currently the method assumes that the drivable areas do not overlap and all road surfaces are included. - ids = np.arange(len(road_edges), dtype=np.int64).tolist() - # TODO @DanielDauner: Figure out if other types should/could be assigned here. - road_edge_types = [int(RoadEdgeType.ROAD_EDGE_BOUNDARY)] * len(road_edges) - geometries = road_edges - return gpd.GeoDataFrame(pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}), geometry=geometries) + drivable_polygons = [geom.Polygon(drivable_area.array[:, :2]) for drivable_area in drivable_areas.values()] + road_edges_2d = get_road_edge_linear_rings(drivable_polygons) + non_conflicting_road_edges = lift_road_edges_to_3d(road_edges_2d, list(drivable_areas.values())) + non_conflicting_road_edges_linestrings = [polyline.linestring for polyline in non_conflicting_road_edges] + road_edges = split_line_geometry_by_max_length(non_conflicting_road_edges_linestrings, MAX_ROAD_EDGE_LENGTH) + for idx, road_edge in enumerate(road_edges): -def get_road_line_df(lanes: Dict[int, Any]) -> gpd.GeoDataFrame: + # TODO @DanielDauner: Figure out if other road edge types should/could be assigned here. + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edge), + ) + ) - # TODO @DanielDauner: Allow lanes to reference road line dataframe. - ids = [] - road_lines_type = [] - geometries = [] +def _write_av2_road_lines(lanes: Dict[int, Any], map_writer: AbstractMapWriter) -> None: - running_id = 0 + running_road_line_id = 0 for lane in lanes.values(): for side in ["left", "right"]: - # NOTE: We currently ignore lane markings that are NONE in the AV2 dataset. - # TODO: Review if the road line system should be changed in the future. + # NOTE @DanielDauner: We currently ignore lane markings that are NONE in the AV2 dataset. if lane[f"{side}_lane_mark_type"] == "NONE": continue - ids.append(running_id) - road_lines_type.append(AV2_ROAD_LINE_TYPE_MAPPING[lane[f"{side}_lane_mark_type"]]) - geometries.append(lane[f"{side}_lane_boundary"].linestring) - running_id += 1 + map_writer.write_road_line( + CacheRoadLine( + object_id=running_road_line_id, + road_line_type=AV2_ROAD_LINE_TYPE_MAPPING[lane[f"{side}_lane_mark_type"]], + polyline=lane[f"{side}_lane_boundary"], + ) + ) - data = pd.DataFrame({"id": ids, "road_line_type": road_lines_type}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf + running_road_line_id += 1 def _extract_lane_group_dict(lanes: Dict[int, Any]) -> gpd.GeoDataFrame: @@ -365,6 +253,7 @@ def _get_lane_group_ids_of_lanes_ids(lane_ids: List[str]) -> List[int]: predecessor_lanes = [] for lane_id in lane_group_set: lane_dict = lanes[str(lane_id)] + lane_dict["lane_group_id"] = lane_group_id # Assign lane to lane group. successor_lanes.extend(lane_dict["successors"]) predecessor_lanes.extend(lane_dict["predecessors"]) diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py new file mode 100644 index 00000000..1e5a192a --- /dev/null +++ b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py @@ -0,0 +1,398 @@ +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import pandas as pd + +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.av2.av2_map_conversion import convert_av2_map +from py123d.conversion.datasets.av2.utils.av2_constants import AV2_CAMERA_TYPE_MAPPING, AV2_SENSOR_SPLITS +from py123d.conversion.datasets.av2.utils.av2_helper import ( + build_sensor_dataframe, + build_synchronization_dataframe, + find_closest_target_fpath, + get_slice_with_timestamp_ns, +) +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import AV2SensorBoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import AVSensorLiDARIndex +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_av2_ford_fusion_hybrid_parameters, + rear_axle_se3_to_center_se3, +) +from py123d.geometry import BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex +from py123d.geometry.bounding_box import BoundingBoxSE3 +from py123d.geometry.transform.transform_se3 import convert_relative_to_absolute_se3_array + + +class AV2SensorConverter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + av2_data_root: Union[Path, str], + dataset_converter_config: DatasetConverterConfig, + ) -> None: + super().__init__(dataset_converter_config) + assert av2_data_root is not None, "The variable `av2_data_root` must be provided." + for split in splits: + assert ( + split in AV2_SENSOR_SPLITS + ), f"Split {split} is not available. Available splits: {self.available_splits}" + + self._splits: List[str] = splits + self._av2_data_root: Path = Path(av2_data_root) + self._log_paths_and_split: Dict[str, List[Path]] = self._collect_log_paths() + + def _collect_log_paths(self) -> Dict[str, List[Path]]: + log_paths_and_split: List[Tuple[Path, str]] = [] + + for split in self._splits: + dataset_name = split.split("_")[0] + split_type = split.split("_")[-1] + assert split_type in ["train", "val", "test"] + + if "av2-sensor" == dataset_name: + log_folder = self._av2_data_root / "sensor" / split_type + else: + raise ValueError(f"Unknown dataset name {dataset_name} in split {split}.") + + log_paths_and_split.extend([(log_path, split) for log_path in log_folder.iterdir()]) + + return log_paths_and_split + + def get_number_of_maps(self) -> int: + """Inherited, see superclass.""" + return len(self._log_paths_and_split) + + def get_number_of_logs(self) -> int: + """Inherited, see superclass.""" + return len(self._log_paths_and_split) + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """Inherited, see superclass.""" + + source_log_path, split = self._log_paths_and_split[map_index] + + # 1. Initialize map metadata + map_metadata = _get_av2_sensor_map_metadata(split, source_log_path) + + # 2. Prepare map writer + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + + # 3. Process source map data + if map_needs_writing: + convert_av2_map(source_log_path, map_writer) + + # 4. Finalize map writing + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """Inherited, see superclass.""" + + source_log_path, split = self._log_paths_and_split[log_index] + + # 1. Initialize Metadata + map_metadata = _get_av2_sensor_map_metadata(split, source_log_path) + log_metadata = LogMetadata( + dataset="av2-sensor", + split=split, + log_name=source_log_path.name, + location=map_metadata.location, + timestep_seconds=0.1, + box_detection_label_class=AV2SensorBoxDetectionLabel, + vehicle_parameters=get_av2_ford_fusion_hybrid_parameters(), + pinhole_camera_metadata=_get_av2_pinhole_camera_metadata(source_log_path, self.dataset_converter_config), + lidar_metadata=_get_av2_lidar_metadata(source_log_path, self.dataset_converter_config), + map_metadata=map_metadata, + ) + + # 2. Prepare log writer + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + # 3. Process source log data + if log_needs_writing: + + sensor_df = build_sensor_dataframe(source_log_path) + synchronization_df = build_synchronization_dataframe(sensor_df) + + lidar_sensor = sensor_df.xs(key="lidar", level=2) + lidar_timestamps_ns = np.sort([int(idx_tuple[2]) for idx_tuple in lidar_sensor.index]) + + annotations_df = ( + pd.read_feather(source_log_path / "annotations.feather") + if (source_log_path / "annotations.feather").exists() + else None + ) + city_se3_egovehicle_df = pd.read_feather(source_log_path / "city_SE3_egovehicle.feather") + egovehicle_se3_sensor_df = pd.read_feather( + source_log_path / "calibration" / "egovehicle_SE3_sensor.feather" + ) + + for lidar_timestamp_ns in lidar_timestamps_ns: + ego_state = _extract_av2_sensor_ego_state(city_se3_egovehicle_df, lidar_timestamp_ns) + log_writer.write( + timestamp=TimePoint.from_ns(int(lidar_timestamp_ns)), + ego_state=ego_state, + box_detections=_extract_av2_sensor_box_detections(annotations_df, lidar_timestamp_ns, ego_state), + pinhole_cameras=_extract_av2_sensor_pinhole_cameras( + lidar_timestamp_ns, + egovehicle_se3_sensor_df, + synchronization_df, + source_log_path, + self.dataset_converter_config, + ), + lidars=_extract_av2_sensor_lidars( + source_log_path, + lidar_timestamp_ns, + self.dataset_converter_config, + ), + ) + + # 4. Finalize log writing + log_writer.close() + + +def _get_av2_sensor_map_metadata(split: str, source_log_path: Path) -> MapMetadata: + # NOTE: We need to get the city name from the map folder. + # see: https://github.com/argoverse/av2-api/blob/main/src/av2/datasets/sensor/av2_sensor_dataloader.py#L163 + map_folder = source_log_path / "map" + log_map_archive_path = next(map_folder.glob("log_map_archive_*.json")) + location = log_map_archive_path.name.split("____")[1].split("_")[0] + return MapMetadata( + dataset="av2-sensor", + split=split, + log_name=source_log_path.name, + location=location, # TODO: Add location information, e.g. city name. + map_has_z=True, + map_is_local=True, + ) + + +def _get_av2_pinhole_camera_metadata( + source_log_path: Path, dataset_converter_config: DatasetConverterConfig +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: + intrinsics_file = source_log_path / "calibration" / "intrinsics.feather" + intrinsics_df = pd.read_feather(intrinsics_file) + for _, row in intrinsics_df.iterrows(): + row = row.to_dict() + camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] + pinhole_camera_metadata[camera_type] = PinholeCameraMetadata( + camera_type=camera_type, + width=row["width_px"], + height=row["height_px"], + intrinsics=PinholeIntrinsics(fx=row["fx_px"], fy=row["fy_px"], cx=row["cx_px"], cy=row["cy_px"]), + distortion=PinholeDistortion(k1=row["k1"], k2=row["k2"], p1=0.0, p2=0.0, k3=row["k3"]), + ) + return pinhole_camera_metadata + + +def _get_av2_lidar_metadata( + source_log_path: Path, dataset_converter_config: DatasetConverterConfig +) -> Dict[LiDARType, LiDARMetadata]: + + metadata: Dict[LiDARType, LiDARMetadata] = {} + + if dataset_converter_config.include_lidars: + + # Load calibration feather file + calibration_file = source_log_path / "calibration" / "egovehicle_SE3_sensor.feather" + calibration_df = pd.read_feather(calibration_file) + + # NOTE: AV2 has two two stacked lidars: up_lidar and down_lidar. + # We store these as separate LiDARType entries. + + # top lidar: + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=AVSensorLiDARIndex, + extrinsic=_row_dict_to_state_se3( + calibration_df[calibration_df["sensor_name"] == "up_lidar"].iloc[0].to_dict() + ), + ) + # down lidar: + metadata[LiDARType.LIDAR_DOWN] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_DOWN, + lidar_index=AVSensorLiDARIndex, + extrinsic=_row_dict_to_state_se3( + calibration_df[calibration_df["sensor_name"] == "down_lidar"].iloc[0].to_dict() + ), + ) + return metadata + + +def _extract_av2_sensor_box_detections( + annotations_df: Optional[pd.DataFrame], + lidar_timestamp_ns: int, + ego_state_se3: EgoStateSE3, +) -> BoxDetectionWrapper: + + # TODO: Extract velocity from annotations_df if available. + + if annotations_df is None: + return BoxDetectionWrapper(box_detections=[]) + + annotations_slice = get_slice_with_timestamp_ns(annotations_df, lidar_timestamp_ns) + num_detections = len(annotations_slice) + + detections_state = np.zeros((num_detections, len(BoundingBoxSE3Index)), dtype=np.float64) + detections_velocity = np.zeros((num_detections, len(Vector3DIndex)), dtype=np.float64) + detections_token: List[str] = annotations_slice["track_uuid"].tolist() + detections_labels: List[AV2SensorBoxDetectionLabel] = [] + + for detection_idx, (_, row) in enumerate(annotations_slice.iterrows()): + row = row.to_dict() + + detections_state[detection_idx, BoundingBoxSE3Index.XYZ] = [row["tx_m"], row["ty_m"], row["tz_m"]] + detections_state[detection_idx, BoundingBoxSE3Index.QUATERNION] = [row["qw"], row["qx"], row["qy"], row["qz"]] + detections_state[detection_idx, BoundingBoxSE3Index.EXTENT] = [row["length_m"], row["width_m"], row["height_m"]] + + detections_label = AV2SensorBoxDetectionLabel.deserialize(row["category"]) + detections_labels.append(detections_label) + + detections_state[:, BoundingBoxSE3Index.STATE_SE3] = convert_relative_to_absolute_se3_array( + origin=ego_state_se3.rear_axle_se3, + se3_array=detections_state[:, BoundingBoxSE3Index.STATE_SE3], + ) + + box_detections: List[BoxDetectionSE3] = [] + for detection_idx in range(num_detections): + box_detections.append( + BoxDetectionSE3( + metadata=BoxDetectionMetadata( + label=detections_labels[detection_idx], + timepoint=None, + track_token=detections_token[detection_idx], + confidence=None, + ), + bounding_box_se3=BoundingBoxSE3.from_array(detections_state[detection_idx]), + velocity=Vector3D.from_array(detections_velocity[detection_idx]), + ) + ) + + return BoxDetectionWrapper(box_detections=box_detections) + + +def _extract_av2_sensor_ego_state(city_se3_egovehicle_df: pd.DataFrame, lidar_timestamp_ns: int) -> EgoStateSE3: + ego_state_slice = get_slice_with_timestamp_ns(city_se3_egovehicle_df, lidar_timestamp_ns) + assert ( + len(ego_state_slice) == 1 + ), f"Expected exactly one ego state for timestamp {lidar_timestamp_ns}, got {len(ego_state_slice)}." + + ego_pose_dict = ego_state_slice.iloc[0].to_dict() + rear_axle_pose = _row_dict_to_state_se3(ego_pose_dict) + + vehicle_parameters = get_av2_ford_fusion_hybrid_parameters() + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + + # TODO: Add script to calculate the dynamic state from log sequence. + dynamic_state = DynamicStateSE3( + velocity=Vector3D(x=0.0, y=0.0, z=0.0), + acceleration=Vector3D(x=0.0, y=0.0, z=0.0), + angular_velocity=Vector3D(x=0.0, y=0.0, z=0.0), + ) + + return EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, + ) + + +def _extract_av2_sensor_pinhole_cameras( + lidar_timestamp_ns: int, + egovehicle_se3_sensor_df: pd.DataFrame, + synchronization_df: pd.DataFrame, + source_log_path: Path, + dataset_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + + camera_data_list: List[CameraData] = [] + split = source_log_path.parent.name + log_id = source_log_path.name + + if dataset_converter_config.include_pinhole_cameras: + av2_sensor_data_root = source_log_path.parent.parent + + for _, row in egovehicle_se3_sensor_df.iterrows(): + row = row.to_dict() + if row["sensor_name"] not in AV2_CAMERA_TYPE_MAPPING: + continue + + pinhole_camera_name = row["sensor_name"] + pinhole_camera_type = AV2_CAMERA_TYPE_MAPPING[pinhole_camera_name] + + relative_image_path = find_closest_target_fpath( + split=split, + log_id=log_id, + src_sensor_name="lidar", + src_timestamp_ns=lidar_timestamp_ns, + target_sensor_name=pinhole_camera_name, + synchronization_df=synchronization_df, + ) + if relative_image_path is not None: + absolute_image_path = av2_sensor_data_root / relative_image_path + assert absolute_image_path.exists() + + camera_data = CameraData( + camera_type=pinhole_camera_type, + extrinsic=_row_dict_to_state_se3(row), + dataset_root=av2_sensor_data_root, + relative_path=relative_image_path, + ) + camera_data_list.append(camera_data) + + return camera_data_list + + +def _extract_av2_sensor_lidars( + source_log_path: Path, lidar_timestamp_ns: int, dataset_converter_config: DatasetConverterConfig +) -> List[LiDARData]: + lidars: List[LiDARData] = [] + if dataset_converter_config.include_lidars: + av2_sensor_data_root = source_log_path.parent.parent + split_type = source_log_path.parent.name + log_name = source_log_path.name + + relative_feather_path = f"{split_type}/{log_name}/sensors/lidar/{lidar_timestamp_ns}.feather" + lidar_feather_path = av2_sensor_data_root / relative_feather_path + assert lidar_feather_path.exists(), f"LiDAR feather file not found: {lidar_feather_path}" + + lidar_data = LiDARData( + lidar_type=LiDARType.LIDAR_MERGED, + dataset_root=av2_sensor_data_root, + relative_path=relative_feather_path, + ) + lidars.append(lidar_data) + + return lidars + + +def _row_dict_to_state_se3(row_dict: Dict[str, float]) -> StateSE3: + """Helper function to convert a row dictionary to a StateSE3 object.""" + return StateSE3( + x=row_dict["tx_m"], + y=row_dict["ty_m"], + z=row_dict["tz_m"], + qw=row_dict["qw"], + qx=row_dict["qx"], + qy=row_dict["qy"], + qz=row_dict["qz"], + ) diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_io.py b/src/py123d/conversion/datasets/av2/av2_sensor_io.py new file mode 100644 index 00000000..81a3de3a --- /dev/null +++ b/src/py123d/conversion/datasets/av2/av2_sensor_io.py @@ -0,0 +1,22 @@ +from pathlib import Path +from typing import Dict, Union + +import numpy as np +import pandas as pd + +from py123d.datatypes.sensors.lidar import LiDARType + + +def load_av2_sensor_lidar_pcs_from_file(feather_path: Union[Path, str]) -> Dict[LiDARType, np.ndarray]: + # NOTE: The AV2 dataset stores both top and down LiDAR data in the same feather file. + # We need to separate them based on the laser_number field. + # See here: https://github.com/argoverse/av2-api/issues/77#issuecomment-1178040867 + all_lidar_df = pd.read_feather(feather_path, columns=["x", "y", "z", "intensity", "laser_number"]) + + lidar_down = all_lidar_df[all_lidar_df["laser_number"] >= 32] + lidar_top = all_lidar_df[all_lidar_df["laser_number"] < 32] + + lidar_down = lidar_down.drop(columns=["laser_number"]) + lidar_top = lidar_top.drop(columns=["laser_number"]) + + return {LiDARType.LIDAR_DOWN: lidar_down.to_numpy(), LiDARType.LIDAR_TOP: lidar_top.to_numpy()} diff --git a/src/py123d/conversion/datasets/av2/utils/av2_constants.py b/src/py123d/conversion/datasets/av2/utils/av2_constants.py new file mode 100644 index 00000000..30b59fa2 --- /dev/null +++ b/src/py123d/conversion/datasets/av2/utils/av2_constants.py @@ -0,0 +1,44 @@ +from typing import Dict, Final, Set + +from py123d.datatypes.maps.map_datatypes import RoadLineType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType + +AV2_SENSOR_SPLITS: Set[str] = {"av2-sensor_train", "av2-sensor_val", "av2-sensor_test"} + + +AV2_CAMERA_TYPE_MAPPING: Dict[str, PinholeCameraType] = { + "ring_front_center": PinholeCameraType.PCAM_F0, + "ring_front_left": PinholeCameraType.PCAM_L0, + "ring_front_right": PinholeCameraType.PCAM_R0, + "ring_side_left": PinholeCameraType.PCAM_L1, + "ring_side_right": PinholeCameraType.PCAM_R1, + "ring_rear_left": PinholeCameraType.PCAM_L2, + "ring_rear_right": PinholeCameraType.PCAM_R2, + "stereo_front_left": PinholeCameraType.PCAM_STEREO_L, + "stereo_front_right": PinholeCameraType.PCAM_STEREO_R, +} + +# AV2_LIDAR_TYPES: Dict[str, str] = { + + +AV2_ROAD_LINE_TYPE_MAPPING: Dict[str, RoadLineType] = { + "NONE": RoadLineType.NONE, + "UNKNOWN": RoadLineType.UNKNOWN, + "DASH_SOLID_YELLOW": RoadLineType.DASH_SOLID_YELLOW, + "DASH_SOLID_WHITE": RoadLineType.DASH_SOLID_WHITE, + "DASHED_WHITE": RoadLineType.DASHED_WHITE, + "DASHED_YELLOW": RoadLineType.DASHED_YELLOW, + "DOUBLE_SOLID_YELLOW": RoadLineType.DOUBLE_SOLID_YELLOW, + "DOUBLE_SOLID_WHITE": RoadLineType.DOUBLE_SOLID_WHITE, + "DOUBLE_DASH_YELLOW": RoadLineType.DOUBLE_DASH_YELLOW, + "DOUBLE_DASH_WHITE": RoadLineType.DOUBLE_DASH_WHITE, + "SOLID_YELLOW": RoadLineType.SOLID_YELLOW, + "SOLID_WHITE": RoadLineType.SOLID_WHITE, + "SOLID_DASH_WHITE": RoadLineType.SOLID_DASH_WHITE, + "SOLID_DASH_YELLOW": RoadLineType.SOLID_DASH_YELLOW, + "SOLID_BLUE": RoadLineType.SOLID_BLUE, +} + + +AV2_SENSOR_CAM_SHUTTER_INTERVAL_MS: Final[float] = 50.0 +AV2_SENSOR_LIDAR_SWEEP_INTERVAL_W_BUFFER_NS: Final[float] = 102000000.0 diff --git a/d123/dataset/dataset_specific/av2/av2_helper.py b/src/py123d/conversion/datasets/av2/utils/av2_helper.py similarity index 88% rename from d123/dataset/dataset_specific/av2/av2_helper.py rename to src/py123d/conversion/datasets/av2/utils/av2_helper.py index 8ad881cc..cd0c1f62 100644 --- a/d123/dataset/dataset_specific/av2/av2_helper.py +++ b/src/py123d/conversion/datasets/av2/utils/av2_helper.py @@ -1,25 +1,13 @@ from pathlib import Path -from typing import Final, List, Literal, Optional +from typing import List, Literal, Optional import pandas as pd -from d123.dataset.dataset_specific.av2.av2_constants import AV2_CAMERA_TYPE_MAPPING - -AV2_SENSOR_CAM_SHUTTER_INTERVAL_MS: Final[float] = 50.0 -AV2_SENSOR_LIDAR_SWEEP_INTERVAL_W_BUFFER_NS: Final[float] = 102000000.0 - - -AV2_SENSOR_CAMERA_NAMES = [ - "ring_front_center", - "ring_front_left", - "ring_front_right", - "ring_rear_left", - "ring_rear_right", - "ring_side_left", - "ring_side_right", - "stereo_front_left", - "stereo_front_right", -] +from py123d.conversion.datasets.av2.utils.av2_constants import ( + AV2_CAMERA_TYPE_MAPPING, + AV2_SENSOR_CAM_SHUTTER_INTERVAL_MS, + AV2_SENSOR_LIDAR_SWEEP_INTERVAL_W_BUFFER_NS, +) def get_dataframe_from_file(file_path: Path) -> pd.DataFrame: @@ -40,15 +28,15 @@ def get_slice_with_timestamp_ns(dataframe: pd.DataFrame, timestamp_ns: int): return dataframe[dataframe["timestamp_ns"] == timestamp_ns] -def build_sensor_dataframe(raw_log_path: Path) -> pd.DataFrame: +def build_sensor_dataframe(source_log_path: Path) -> pd.DataFrame: # https://github.com/argoverse/av2-api/blob/main/src/av2/datasets/sensor/sensor_dataloader.py#L209 - split = raw_log_path.parent.name - log_id = raw_log_path.name + split = source_log_path.parent.name + log_id = source_log_path.name - lidar_path = raw_log_path / "sensors" / "lidar" - cameras_path = raw_log_path / "sensors" / "cameras" + lidar_path = source_log_path / "sensors" / "lidar" + cameras_path = source_log_path / "sensors" / "cameras" # Find all the lidar records and timestamps from file names. lidar_records = populate_sensor_records(lidar_path, split, log_id) @@ -56,7 +44,7 @@ def build_sensor_dataframe(raw_log_path: Path) -> pd.DataFrame: # Find all the camera records and timestamps from file names. camera_records = [] for camera_folder in cameras_path.iterdir(): - assert camera_folder.name in AV2_SENSOR_CAMERA_NAMES + assert camera_folder.name in AV2_CAMERA_TYPE_MAPPING.keys() camera_record = populate_sensor_records(camera_folder, split, log_id) camera_records.append(camera_record) diff --git a/d123/common/datatypes/vehicle_state/__init__.py b/src/py123d/conversion/datasets/kitti360/__init__.py similarity index 100% rename from d123/common/datatypes/vehicle_state/__init__.py rename to src/py123d/conversion/datasets/kitti360/__init__.py diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py new file mode 100644 index 00000000..22d84229 --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py @@ -0,0 +1,793 @@ +import datetime +import logging +import pickle +import re +import xml.etree.ElementTree as ET +from collections import defaultdict +from pathlib import Path +from typing import Any, Dict, Final, List, Optional, Tuple, Union + +import numpy as np +import yaml + +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.kitti360.kitti360_map_conversion import convert_kitti360_map_with_writer +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_kitti360_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KIITI360_DETECTION_NAME_DICT, + kittiId2label, +) +from py123d.conversion.datasets.kitti360.utils.preprocess_detection import process_detection +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import KITTI360BoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex +from py123d.datatypes.detections.box_detections import ( + BoxDetectionMetadata, + BoxDetectionSE3, + BoxDetectionWrapper, +) +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import ( + FisheyeMEICameraMetadata, + FisheyeMEICameraType, + FisheyeMEIDistortion, + FisheyeMEIProjection, +) +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_kitti360_vw_passat_parameters, + rear_axle_se3_to_center_se3, +) +from py123d.geometry import BoundingBoxSE3, Quaternion, StateSE3, Vector3D +from py123d.geometry.transform.transform_se3 import convert_se3_array_between_origins, translate_se3_along_body_frame + +KITTI360_DT: Final[float] = 0.1 + +KITTI360_PINHOLE_CAMERA_TYPES = { + PinholeCameraType.PCAM_STEREO_L: "image_00", + PinholeCameraType.PCAM_STEREO_R: "image_01", +} + +KITTI360_FISHEYE_MEI_CAMERA_TYPES = { + FisheyeMEICameraType.FCAM_L: "image_02", + FisheyeMEICameraType.FCAM_R: "image_03", +} + +KITTI360_SPLITS: List[str] = ["kitti360_train", "kitti360_val", "kitti360_test"] +KITTI360_ALL_SEQUENCES: Final[List[str]] = [ + "2013_05_28_drive_0000_sync", + "2013_05_28_drive_0002_sync", + "2013_05_28_drive_0003_sync", + "2013_05_28_drive_0004_sync", + "2013_05_28_drive_0005_sync", + "2013_05_28_drive_0006_sync", + "2013_05_28_drive_0007_sync", + "2013_05_28_drive_0008_sync", + "2013_05_28_drive_0009_sync", + "2013_05_28_drive_0010_sync", + "2013_05_28_drive_0018_sync", +] + +DIR_ROOT = "root" +DIR_2D_RAW = "data_2d_raw" +DIR_2D_SMT = "data_2d_semantics" +DIR_3D_RAW = "data_3d_raw" +DIR_3D_SMT = "data_3d_semantics" +DIR_3D_BBOX = "data_3d_bboxes" +DIR_POSES = "data_poses" +DIR_CALIB = "calibration" + + +def _get_kitti360_paths_from_root(kitti_data_root: Path) -> Dict[str, Path]: + return { + DIR_ROOT: kitti_data_root, + DIR_2D_RAW: kitti_data_root / DIR_2D_RAW, + DIR_2D_SMT: kitti_data_root / DIR_2D_SMT, + DIR_3D_RAW: kitti_data_root / DIR_3D_RAW, + DIR_3D_SMT: kitti_data_root / DIR_3D_SMT, + DIR_3D_BBOX: kitti_data_root / DIR_3D_BBOX, + DIR_POSES: kitti_data_root / DIR_POSES, + DIR_CALIB: kitti_data_root / DIR_CALIB, + } + + +def _get_kitti360_required_modality_roots(kitti360_folders: Dict[str, Path]) -> Dict[str, Path]: + return { + DIR_2D_RAW: kitti360_folders[DIR_2D_RAW], + DIR_3D_RAW: kitti360_folders[DIR_3D_RAW], + DIR_POSES: kitti360_folders[DIR_POSES], + DIR_3D_BBOX: kitti360_folders[DIR_3D_BBOX] / "train", + } + + +class Kitti360Converter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + kitti360_data_root: Union[Path, str], + detection_cache_root: Union[Path, str], + detection_radius: float, + dataset_converter_config: DatasetConverterConfig, + train_sequences: List[str], + val_sequences: List[str], + test_sequences: List[str], + ) -> None: + assert kitti360_data_root is not None, "The variable `kitti360_data_root` must be provided." + super().__init__(dataset_converter_config) + for split in splits: + assert split in KITTI360_SPLITS, f"Split {split} is not available. Available splits: {KITTI360_SPLITS}" + + self._splits: List[str] = splits + self._kitti360_data_root: Path = Path(kitti360_data_root) + self._kitti360_folders: Dict[str, Path] = _get_kitti360_paths_from_root(self._kitti360_data_root) + + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + self._detection_cache_root: Path = Path(detection_cache_root) + self._detection_radius: float = detection_radius + + self._train_sequences: List[str] = train_sequences + self._val_sequences: List[str] = val_sequences + self._test_sequences: List[str] = test_sequences + + self._log_names_and_split: List[Tuple[str, str]] = self._collect_valid_logs() + self._total_maps = len(self._log_names_and_split) # Each log has its own map + self._total_logs = len(self._log_names_and_split) + + def _collect_valid_logs(self) -> List[Tuple[str, str]]: + """Helper function to collect valid KITTI sequences ("logs") from the dataset root + + :raises FileNotFoundError: If required modality roots are missing + :return: A list of tuples containing the log name and split name + """ + + def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: + if modality_name == DIR_3D_BBOX: + # expected: data_3d_bboxes/train/.xml + xml_path = root / f"{seq_name}.xml" + return xml_path.exists() + else: + return (root / seq_name).exists() + + required_modality_roots = _get_kitti360_required_modality_roots(self._kitti360_folders) + missing_roots = [str(p) for p in required_modality_roots.values() if not p.exists()] + if missing_roots: + raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") + + # Find all sequences in the 2D raw data directory, and add to split + split_sequence_candidates: Dict[str, List[str]] = defaultdict(list) + for sequence_path in required_modality_roots[DIR_2D_RAW].iterdir(): + if sequence_path.is_dir() and sequence_path.name.endswith("_sync"): + seq_name = sequence_path.name + if seq_name in self._train_sequences: + split_sequence_candidates["kitti360_train"].append(seq_name) + elif seq_name in self._val_sequences: + split_sequence_candidates["kitti360_val"].append(seq_name) + elif seq_name in self._test_sequences: + split_sequence_candidates["kitti360_test"].append(seq_name) + + # Iterate all candidates, check that modalities available, and add to flat list + log_paths_and_split: List[Tuple[Path, str]] = [] + for split, sequence_names in split_sequence_candidates.items(): + if split not in self._splits: + continue + for sequence_name in sequence_names: + missing_modalities = [ + modality_name + for modality_name, root in required_modality_roots.items() + if not _has_modality(sequence_name, modality_name, root) + ] + if len(missing_modalities) == 0: + log_paths_and_split.append((sequence_name, split)) + else: + logging.info( + f"Sequence '{sequence_name}' skipped: missing modalities {missing_modalities}. " + f"Root: {self._kitti360_data_root}" + ) + + logging.info(f"Valid sequences found: {len(log_paths_and_split)}") + return log_paths_and_split + + def get_number_of_maps(self) -> int: + """Returns the number of available raw data maps for conversion.""" + return self._total_maps + + def get_number_of_logs(self) -> int: + """Returns the number of available raw data logs for conversion.""" + return self._total_logs + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """ + Convert a single map in raw data format to the uniform 123D format. + :param map_index: The index of the map to convert. + :param map_writer: The map writer to use for writing the converted map. + """ + log_name, split = self._log_names_and_split[map_index] + map_metadata = _get_kitti360_map_metadata(split, log_name) + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + if map_needs_writing: + convert_kitti360_map_with_writer(log_name, map_writer) + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """ + Convert a single log in raw data format to the uniform 123D format. + :param log_index: The index of the log to convert. + :param log_writer: The log writer to use for writing the converted log. + """ + log_name, split = self._log_names_and_split[log_index] + + # Create log metadata + log_metadata = LogMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + timestep_seconds=KITTI360_DT, + box_detection_label_class=KITTI360BoxDetectionLabel, + vehicle_parameters=get_kitti360_vw_passat_parameters(), + pinhole_camera_metadata=_get_kitti360_pinhole_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + fisheye_mei_camera_metadata=_get_kitti360_fisheye_mei_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + lidar_metadata=_get_kitti360_lidar_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + map_metadata=_get_kitti360_map_metadata(split, log_name), + ) + + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + if log_needs_writing: + ts_list: List[TimePoint] = _read_timestamps(log_name, self._kitti360_folders) + ego_state_all, valid_timestamp = _extract_ego_state_all(log_name, self._kitti360_folders) + ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all], dtype=np.float64) + box_detection_wrapper_all = _extract_kitti360_box_detections_all( + log_name, + len(ts_list), + ego_states_xyz, + valid_timestamp, + self._kitti360_folders, + self._detection_cache_root, + self._detection_radius, + ) + camera_calibration = _load_kitti_360_calibration(self._kitti360_data_root) + logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") + + for idx in range(len(valid_timestamp)): + valid_idx = valid_timestamp[idx] + + pinhole_cameras = _extract_kitti360_pinhole_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + fisheye_cameras = _extract_kitti360_fisheye_mei_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + lidars = _extract_kitti360_lidar( + log_name, + valid_idx, + self._kitti360_folders, + self.dataset_converter_config, + ) + + log_writer.write( + timestamp=ts_list[valid_idx], + ego_state=ego_state_all[idx], + box_detections=box_detection_wrapper_all[valid_idx], + pinhole_cameras=pinhole_cameras, + fisheye_mei_cameras=fisheye_cameras, + lidars=lidars, + ) + + log_writer.close() + + +def _get_kitti360_pinhole_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + pinhole_cam_metadatas: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: + persp = kitti360_folders[DIR_CALIB] / "perspective.txt" + assert persp.exists() + persp_result = {"image_00": {}, "image_01": {}} + + with open(persp, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + key, value = ln.split(" ", 1) + cam_id = key.split("_")[-1][:2] + if key.startswith("P_rect_"): + persp_result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) + elif key.startswith("S_rect_"): + persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] + elif key.startswith("D_"): + persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + + for pcam_type, pcam_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + pinhole_cam_metadatas[pcam_type] = PinholeCameraMetadata( + camera_type=pcam_type, + width=persp_result[pcam_name]["wh"][0], + height=persp_result[pcam_name]["wh"][1], + intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[pcam_name]["intrinsic"])), + distortion=PinholeDistortion.from_array(np.array(persp_result[pcam_name]["distortion"])), + ) + + return pinhole_cam_metadatas + + +def _get_kitti360_fisheye_mei_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata]: + fisheye_cam_metadatas: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = {} + if dataset_converter_config.include_fisheye_mei_cameras: + + fisheye_camera02_path = kitti360_folders[DIR_CALIB] / "image_02.yaml" + fisheye_camera03_path = kitti360_folders[DIR_CALIB] / "image_03.yaml" + + assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() + fisheye02 = _readYAMLFile(fisheye_camera02_path) + fisheye03 = _readYAMLFile(fisheye_camera03_path) + fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} + + for fcam_type, fcam_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + + distortion_params = fisheye_result[fcam_name]["distortion_parameters"] + distortion = FisheyeMEIDistortion( + k1=distortion_params["k1"], + k2=distortion_params["k2"], + p1=distortion_params["p1"], + p2=distortion_params["p2"], + ) + + projection_params = fisheye_result[fcam_name]["projection_parameters"] + projection = FisheyeMEIProjection( + gamma1=projection_params["gamma1"], + gamma2=projection_params["gamma2"], + u0=projection_params["u0"], + v0=projection_params["v0"], + ) + + fisheye_cam_metadatas[fcam_type] = FisheyeMEICameraMetadata( + camera_type=fcam_type, + width=fisheye_result[fcam_name]["image_width"], + height=fisheye_result[fcam_name]["image_height"], + mirror_parameter=float(fisheye_result[fcam_name]["mirror_parameters"]["xi"]), + distortion=distortion, + projection=projection, + ) + + return fisheye_cam_metadatas + + +def _get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: + return MapMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + map_has_z=True, + map_is_local=True, + ) + + +def _read_projection_matrix(p_line: str) -> np.ndarray: + parts = p_line.split(" ", 1) + if len(parts) != 2: + raise ValueError(f"Bad projection line: {p_line}") + vals = [float(x) for x in parts[1].strip().split()] + P = np.array(vals, dtype=np.float64).reshape(3, 4) + K = P[:, :3] + return K + + +def _readYAMLFile(fileName: Path) -> Dict[str, Any]: + """make OpenCV YAML file compatible with python""" + ret = {} + skip_lines = 1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" + with open(fileName) as fin: + for i in range(skip_lines): + fin.readline() + yamlFileOut = fin.read() + myRe = re.compile(r":([^ ])") # Add space after ":", if it doesn't exist. Python yaml requirement + yamlFileOut = myRe.sub(r": \1", yamlFileOut) + ret = yaml.safe_load(yamlFileOut) + return ret + + +def _get_kitti360_lidar_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: + metadata: Dict[LiDARType, LiDARMetadata] = {} + if dataset_converter_config.include_lidars: + extrinsic = get_kitti360_lidar_extrinsic(kitti360_folders[DIR_CALIB]) + extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) + extrinsic_state_se3 = _extrinsic_from_imu_to_rear_axle(extrinsic_state_se3) + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LiDARIndex, + extrinsic=extrinsic_state_se3, + ) + return metadata + + +def _read_timestamps(log_name: str, kitti360_folders: Dict[str, Path]) -> Optional[List[TimePoint]]: + """ + Read KITTI-360 timestamps for the given sequence and return Unix epoch timestamps. + """ + ts_files = [ + kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_00" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_01" / "timestamps.txt", + ] + + if log_name == "2013_05_28_drive_0002_sync": + ts_files = ts_files[1:] + + for ts_file in ts_files: + if ts_file.exists(): + tps: List[TimePoint] = [] + with open(ts_file, "r") as f: + for line in f: + s = line.strip() + if not s: + continue + dt_str, ns_str = s.split(".") + dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") + dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) + unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + total_seconds = (dt_obj - unix_epoch).total_seconds() + ns_value = int(ns_str) + us_from_ns = ns_value // 1000 + total_us = int(total_seconds * 1_000_000) + us_from_ns + tps.append(TimePoint.from_us(total_us)) + return tps + return None + + +def _extract_ego_state_all(log_name: str, kitti360_folders: Dict[str, Path]) -> Tuple[List[EgoStateSE3], List[int]]: + + ego_state_all: List[List[float]] = [] + pose_file = kitti360_folders[DIR_POSES] / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) + oxts_path = kitti360_folders[DIR_POSES] / log_name / "oxts" / "data" + + for idx in range(len(valid_timestamp)): + oxts_path_file = oxts_path / f"{int(valid_timestamp[idx]):010d}.txt" + oxts_data = np.loadtxt(oxts_path_file) + + vehicle_parameters = get_kitti360_vw_passat_parameters() + + pos = idx + if log_name == "2013_05_28_drive_0004_sync" and pos == 0: + pos = 1 + + # NOTE you can use oxts_data[3:6] as roll, pitch, yaw for simplicity + # roll, pitch, yaw = oxts_data[3:6] + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] + + ego_quaternion = Quaternion.from_rotation_matrix(R_mat_cali) + imu_pose = StateSE3( + x=poses[pos, 4], + y=poses[pos, 8], + z=poses[pos, 12], + qw=ego_quaternion.qw, + qx=ego_quaternion.qx, + qy=ego_quaternion.qy, + qz=ego_quaternion.qz, + ) + + rear_axle_pose = translate_se3_along_body_frame( + imu_pose, + Vector3D(0.05, -0.32, 0.0), + ) + + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + dynamic_state = DynamicStateSE3( + velocity=Vector3D( + x=oxts_data[8], + y=oxts_data[9], + z=oxts_data[10], + ), + acceleration=Vector3D( + x=oxts_data[14], + y=oxts_data[15], + z=oxts_data[16], + ), + angular_velocity=Vector3D( + x=oxts_data[20], + y=oxts_data[21], + z=oxts_data[22], + ), + ) + ego_state_all.append( + EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, + ) + ) + return ego_state_all, valid_timestamp + + +def _extract_kitti360_box_detections_all( + log_name: str, + ts_len: int, + ego_states_xyz: np.ndarray, + valid_timestamp: List[int], + kitti360_folders: Dict[str, Path], + detection_cache_root: Path, + detection_radius: float, +) -> List[BoxDetectionWrapper]: + + detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_velocity: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_tokens: List[List[str]] = [[] for _ in range(ts_len)] + detections_labels: List[List[int]] = [[] for _ in range(ts_len)] + + if log_name == "2013_05_28_drive_0004_sync": + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train_full" / f"{log_name}.xml" + else: + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train" / f"{log_name}.xml" + if not bbox_3d_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") + + tree = ET.parse(bbox_3d_path) + root = tree.getroot() + + detection_preprocess_path = detection_cache_root / f"{log_name}_detection_preprocessed.pkl" + if not detection_preprocess_path.exists(): + process_detection( + kitti360_data_root=kitti360_folders[DIR_ROOT], + log_name=log_name, + radius_m=detection_radius, + output_dir=detection_cache_root, + ) + with open(detection_preprocess_path, "rb") as f: + detection_preprocess_result = pickle.load(f) + static_records_dict = { + record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"] + } + logging.info(f"Loaded detection preprocess data from {detection_preprocess_path}") + + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + + for child in root: + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) + name = kittiId2label[semanticIdKITTI].name + else: + label = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(label, "unknown") + if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT.keys(): + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + + # static object + if obj.timestamp == -1: + if detection_preprocess_result is None: + obj.filter_by_radius(ego_states_xyz, valid_timestamp, radius=50.0) + else: + obj.load_detection_preprocess(static_records_dict) + for record in obj.valid_frames["records"]: + frame = record["timestamp"] + detections_states[frame].append(obj.get_state_array()) + detections_velocity[frame].append(np.array([0.0, 0.0, 0.0])) + detections_tokens[frame].append(str(obj.globalID)) + detections_labels[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) + else: + global_ID = obj.globalID + dynamic_objs[global_ID].append(obj) + + # dynamic object + for global_id, obj_list in dynamic_objs.items(): + obj_list.sort(key=lambda obj: obj.timestamp) + num_frames = len(obj_list) + + positions = [obj.get_state_array()[:3] for obj in obj_list] + timestamps = [int(obj.timestamp) for obj in obj_list] + + velocities = [] + + for i in range(1, num_frames - 1): + dt_frames = timestamps[i + 1] - timestamps[i - 1] + if dt_frames > 0: + dt = dt_frames * KITTI360_DT + vel = (positions[i + 1] - positions[i - 1]) / dt + vel = KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] @ obj_list[i].Rm.T @ vel + else: + vel = np.zeros(3) + velocities.append(vel) + + if num_frames > 1: + # first and last frame + velocities.insert(0, velocities[0]) + velocities.append(velocities[-1]) + elif num_frames == 1: + velocities.append(np.zeros(3)) + + for obj, vel in zip(obj_list, velocities): + frame = obj.timestamp + detections_states[frame].append(obj.get_state_array()) + detections_velocity[frame].append(vel) + detections_tokens[frame].append(str(obj.globalID)) + detections_labels[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) + + box_detection_wrapper_all: List[BoxDetectionWrapper] = [] + for frame in range(ts_len): + box_detections: List[BoxDetectionSE3] = [] + for state, velocity, token, detection_label in zip( + detections_states[frame], + detections_velocity[frame], + detections_tokens[frame], + detections_labels[frame], + ): + if state is None: + break + detection_metadata = BoxDetectionMetadata( + label=detection_label, + timepoint=None, + track_token=token, + confidence=None, + ) + bounding_box_se3 = BoundingBoxSE3.from_array(state) + velocity_vector = Vector3D.from_array(velocity) + box_detection = BoxDetectionSE3( + metadata=detection_metadata, + bounding_box_se3=bounding_box_se3, + velocity=velocity_vector, + ) + box_detections.append(box_detection) + box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) + return box_detection_wrapper_all + + +def _extract_kitti360_lidar( + log_name: str, + idx: int, + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> List[LiDARData]: + + lidars: List[LiDARData] = [] + if data_converter_config.include_lidars: + # NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 + if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: + return lidars + + lidar_full_path = kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" + if lidar_full_path.exists(): + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_TOP, + timestamp=None, + iteration=idx, + dataset_root=kitti360_folders[DIR_ROOT], + relative_path=lidar_full_path.relative_to(kitti360_folders[DIR_ROOT]), + ) + ) + else: + raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") + + return lidars + + +def _extract_kitti360_pinhole_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + + pinhole_camera_data_list: List[CameraData] = [] + if data_converter_config.include_pinhole_cameras: + for camera_type, cam_dir_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] + if img_path_png.exists(): + pinhole_camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=camera_extrinsic, + dataset_root=kitti360_folders[DIR_ROOT], + relative_path=img_path_png.relative_to(kitti360_folders[DIR_ROOT]), + ) + ) + + return pinhole_camera_data_list + + +def _extract_kitti360_fisheye_mei_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + + fisheye_camera_data_list: List[CameraData] = [] + if data_converter_config.include_fisheye_mei_cameras: + for camera_type, cam_dir_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] + if img_path_png.exists(): + fisheye_camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=camera_extrinsic, + dataset_root=kitti360_folders[DIR_ROOT], + relative_path=img_path_png.relative_to(kitti360_folders[DIR_ROOT]), + ) + ) + return fisheye_camera_data_list + + +def _load_kitti_360_calibration(kitti_360_data_root: Path) -> Dict[str, StateSE3]: + calib_file = kitti_360_data_root / DIR_CALIB / "calib_cam_to_pose.txt" + if not calib_file.exists(): + raise FileNotFoundError(f"Calibration file not found: {calib_file}") + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + calib_dict: Dict[str, StateSE3] = {} + with open(calib_file, "r") as f: + for line in f: + parts = line.strip().split() + key = parts[0][:-1] + values = list(map(float, parts[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_extrinsic = _extrinsic_from_imu_to_rear_axle(camera_extrinsic) + calib_dict[key] = camera_extrinsic + return calib_dict + + +def _extrinsic_from_imu_to_rear_axle(extrinsic: StateSE3) -> StateSE3: + imu_se3 = StateSE3(x=-0.05, y=0.32, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + rear_axle_se3 = StateSE3(x=0.0, y=0.0, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + return StateSE3.from_array(convert_se3_array_between_origins(imu_se3, rear_axle_se3, extrinsic.array)) diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py new file mode 100644 index 00000000..847250eb --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py @@ -0,0 +1,116 @@ +import os +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import List + +import numpy as np +import shapely.geometry as geom + +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import KITTI360_MAP_Bbox3D +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheGenericDrivable, + CacheRoadEdge, + CacheWalkway, +) +from py123d.datatypes.maps.map_datatypes import RoadEdgeType +from py123d.geometry.polyline import Polyline3D + +MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) + +DIR_3D_BBOX = "data_3d_bboxes" + +PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX + +KITTI360_MAP_BBOX = [ + "road", + "sidewalk", + # "railtrack", + # "ground", + "driveway", +] + + +def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWriter) -> None: + """ + Convert KITTI-360 map data using the provided map writer. + This function extracts map data from KITTI-360 XML files and writes them using the map writer interface. + + :param log_name: The name of the log to convert + :param map_writer: The map writer to use for writing the converted map + """ + xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + if not xml_path.exists(): + xml_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + + tree = ET.parse(xml_path) + root = tree.getroot() + objs: List[KITTI360_MAP_Bbox3D] = [] + + for child in root: + label = child.find("label").text + if child.find("transform") is None or label not in KITTI360_MAP_BBOX: + continue + obj = KITTI360_MAP_Bbox3D() + obj.parseBbox(child) + objs.append(obj) + + # 1. Write roads, sidewalks, driveways, and collect road geometries + road_outlines_3d: List[Polyline3D] = [] + for obj in objs: + if obj.label == "road": + map_writer.write_generic_drivable( + CacheGenericDrivable( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + road_outline_array = np.concatenate([obj.vertices.array[:, :3], obj.vertices.array[0:, :3]]) + road_outlines_3d.append(Polyline3D.from_array(road_outline_array)) + elif obj.label == "sidewalk": + map_writer.write_walkway( + CacheWalkway( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + elif obj.label == "driveway": + map_writer.write_carpark( + CacheCarpark( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + + # 2. Use road geometries to create road edges + + # NOTE @DanielDauner: We merge all drivable areas in 2D and lift the outlines to 3D. + # Currently the method assumes that the drivable areas do not overlap and all road surfaces are included. + road_polygons_2d = [geom.Polygon(road_outline.array[:, :2]) for road_outline in road_outlines_3d] + road_edges_2d = get_road_edge_linear_rings(road_polygons_2d) + road_edges_3d = lift_road_edges_to_3d(road_edges_2d, road_outlines_3d) + road_edges_linestrings_3d = [polyline.linestring for polyline in road_edges_3d] + road_edges_linestrings_3d = split_line_geometry_by_max_length(road_edges_linestrings_3d, MAX_ROAD_EDGE_LENGTH) + + for idx in range(len(road_edges_linestrings_3d)): + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edges_linestrings_3d[idx]), + ) + ) diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py new file mode 100644 index 00000000..e58b165d --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py @@ -0,0 +1,29 @@ +import logging +from pathlib import Path +from typing import Dict + +import numpy as np + +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.geometry.se import StateSE3 +from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins + + +def load_kitti360_lidar_pcs_from_file(filepath: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: + if not filepath.exists(): + logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") + return {LiDARType.LIDAR_TOP: np.zeros((1, len(Kitti360LiDARIndex)), dtype=np.float32)} + + lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic + lidar_pc = np.fromfile(filepath, dtype=np.float32) + lidar_pc = np.reshape(lidar_pc, [-1, len(Kitti360LiDARIndex)]) + + lidar_pc[..., Kitti360LiDARIndex.XYZ] = convert_points_3d_array_between_origins( + from_origin=lidar_extrinsic, + to_origin=StateSE3(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0), + points_3d_array=lidar_pc[..., Kitti360LiDARIndex.XYZ], + ) + + return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/d123/common/geometry/__init__.py b/src/py123d/conversion/datasets/kitti360/utils/__init__.py similarity index 100% rename from d123/common/geometry/__init__.py rename to src/py123d/conversion/datasets/kitti360/utils/__init__.py diff --git a/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py new file mode 100644 index 00000000..fa3afa77 --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py @@ -0,0 +1,261 @@ +import copy +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import numpy as np +from scipy.linalg import polar + +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import BBOX_LABLES_TO_DETECTION_NAME_DICT, kittiId2label +from py123d.geometry import BoundingBoxSE3, EulerAngles, Polyline3D, StateSE3 + +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_CALIB = "calibration" +# PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB + +KITTI3602NUPLAN_IMU_CALIBRATION = np.array( + [ + [1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1], + ], + dtype=np.float64, +) +MAX_N = 1000 + + +def local2global(semanticId: int, instanceId: int) -> int: + globalId = semanticId * MAX_N + instanceId + if isinstance(globalId, np.ndarray): + return globalId.astype(np.int32) + else: + return int(globalId) + + +def global2local(globalId: int) -> Tuple[int, int]: + semanticId = globalId // MAX_N + instanceId = globalId % MAX_N + if isinstance(globalId, np.ndarray): + return semanticId.astype(np.int32), instanceId.astype(np.int32) + else: + return int(semanticId), int(instanceId) + + +class KITTI360Bbox3D: + + # global id(only used for sequence 0004) + dynamic_global_id = 2000000 + static_global_id = 1000000 + + # Constructor + def __init__(self): + + # the ID of the corresponding object + self.semanticId = -1 + self.instanceId = -1 + self.annotationId = -1 + self.globalID = -1 + + # the window that contains the bbox + self.start_frame = -1 + self.end_frame = -1 + + # timestamp of the bbox (-1 if statis) + self.timestamp = -1 + + # name + self.name = "" + + # label + self.label = "" + + def parseBbox(self, child): + self.timestamp = int(child.find("timestamp").text) + + self.annotationId = int(child.find("index").text) + 1 + + self.label = child.find("label").text + + if child.find("semanticId") is None: + self.name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(self.label, "unknown") + self.is_dynamic = int(child.find("dynamic").text) + if self.is_dynamic != 0: + dynamicSeq = int(child.find("dynamicSeq").text) + self.globalID = KITTI360Bbox3D.dynamic_global_id + dynamicSeq + else: + self.globalID = KITTI360Bbox3D.static_global_id + KITTI360Bbox3D.static_global_id += 1 + else: + self.start_frame = int(child.find("start_frame").text) + self.end_frame = int(child.find("end_frame").text) + + semanticIdKITTI = int(child.find("semanticId").text) + self.semanticId = kittiId2label[semanticIdKITTI].id + self.instanceId = int(child.find("instanceId").text) + self.name = kittiId2label[semanticIdKITTI].name + + self.globalID = local2global(self.semanticId, self.instanceId) + + self.valid_frames = {"global_id": self.globalID, "records": []} + + self.parseVertices(child) + self.parse_scale_rotation() + + def parseVertices(self, child): + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + vertices = parseOpencvMatrix(child.find("vertices")) + self.vertices_template = copy.deepcopy(vertices) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = vertices + + self.R = R + self.T = T + + def parse_scale_rotation(self): + Rm, Sm = polar(self.R) + if np.linalg.det(Rm) < 0: + Rm[0] = -Rm[0] + scale = np.diag(Sm) + # yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + euler_angles = EulerAngles.from_rotation_matrix(Rm) + yaw, pitch, roll = euler_angles.yaw, euler_angles.pitch, euler_angles.roll + obj_quaternion = euler_angles.quaternion + # obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion + + self.Rm = np.array(Rm) + self.Sm = np.array(Sm) + self.scale = scale + self.yaw = yaw + self.pitch = pitch + self.roll = roll + self.qw = obj_quaternion.qw + self.qx = obj_quaternion.qx + self.qy = obj_quaternion.qy + self.qz = obj_quaternion.qz + + def get_state_array(self) -> np.ndarray: + center = StateSE3( + x=self.T[0], + y=self.T[1], + z=self.T[2], + qw=self.qw, + qx=self.qx, + qy=self.qy, + qz=self.qz, + ) + scale = self.scale + bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) + + return bounding_box_se3.array + + def filter_by_radius(self, ego_state_xyz: np.ndarray, valid_timestamp: List[int], radius: float = 50.0) -> None: + """first stage of detection, used to filter out detections by radius""" + d = np.linalg.norm(ego_state_xyz - self.T[None, :], axis=1) + idxs = np.where(d <= radius)[0] + for idx in idxs: + self.valid_frames["records"].append( + { + "timestamp": valid_timestamp[idx], + "points_in_box": None, + } + ) + + def box_visible_in_point_cloud(self, points: np.ndarray) -> Tuple[bool, int]: + """points: (N,3) , box: (8,3)""" + box = self.vertices.copy() + # avoid calculating ground point cloud + z_offset = 0.1 + box[:, 2] += z_offset + O, A, B, C = box[0], box[1], box[2], box[5] + OA = A - O + OB = B - O + OC = C - O + POA, POB, POC = (points @ OA[..., None])[:, 0], (points @ OB[..., None])[:, 0], (points @ OC[..., None])[:, 0] + mask = ( + (np.dot(O, OA) < POA) + & (POA < np.dot(A, OA)) + & (np.dot(O, OB) < POB) + & (POB < np.dot(B, OB)) + & (np.dot(O, OC) < POC) + & (POC < np.dot(C, OC)) + ) + + points_in_box = np.sum(mask) + visible = True if points_in_box > 40 else False + return visible, points_in_box + + def load_detection_preprocess(self, records_dict: Dict[int, Any]): + if self.globalID in records_dict: + self.valid_frames["records"] = records_dict[self.globalID]["records"] + + +class KITTI360_MAP_Bbox3D: + def __init__(self): + self.id = -1 + self.label = " " + + self.vertices: Polyline3D = None + self.R = None + self.T = None + + def parseVertices_plane(self, child): + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + if child.find("transform_plane").find("rows").text == "0": + vertices = parseOpencvMatrix(child.find("vertices")) + else: + vertices = parseOpencvMatrix(child.find("vertices_plane")) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = Polyline3D.from_array(vertices) + + self.R = R + self.T = T + + def parseBbox(self, child): + self.id = int(child.find("index").text) + self.label = child.find("label").text + self.parseVertices_plane(child) + + +def parseOpencvMatrix(node): + rows = int(node.find("rows").text) + cols = int(node.find("cols").text) + data = node.find("data").text.split(" ") + + mat = [] + for d in data: + d = d.replace("\n", "") + if len(d) < 1: + continue + mat.append(float(d)) + mat = np.reshape(mat, [rows, cols]) + return mat + + +def get_kitti360_lidar_extrinsic(kitti360_calibration_root: Path) -> np.ndarray: + cam2pose_txt = kitti360_calibration_root / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + cam2velo_txt = kitti360_calibration_root / "calib_cam_to_velo.txt" + if not cam2velo_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + + with open(cam2pose_txt, "r") as f: + image_00 = next(f) + values = list(map(float, image_00.strip().split()[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + + cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3, 4), lastrow)) + extrinsic = cam2pose @ np.linalg.inv(cam2velo) + + return extrinsic diff --git a/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py new file mode 100644 index 00000000..a40cffca --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py @@ -0,0 +1,208 @@ +#!/usr/bin/python +# +# KITTI-360 labels +# + +from collections import namedtuple + +from py123d.conversion.registry.box_detection_label_registry import KITTI360BoxDetectionLabel + +# -------------------------------------------------------------------------------- +# Definitions +# -------------------------------------------------------------------------------- + +# a label and all meta information +Label = namedtuple( + "Label", + [ + "name", # The identifier of this label, e.g. 'car', 'person', ... . + # We use them to uniquely name a class + "id", # An integer ID that is associated with this label. + # The IDs are used to represent the label in ground truth images + # An ID of -1 means that this label does not have an ID and thus + # is ignored when creating ground truth images (e.g. license plate). + # Do not modify these IDs, since exactly these IDs are expected by the + # evaluation server. + "kittiId", # An integer ID that is associated with this label for KITTI-360 + # NOT FOR RELEASING + "trainId", # Feel free to modify these IDs as suitable for your method. Then create + # ground truth images with train IDs, using the tools provided in the + # 'preparation' folder. However, make sure to validate or submit results + # to our evaluation server using the regular IDs above! + # For trainIds, multiple labels might have the same ID. Then, these labels + # are mapped to the same class in the ground truth images. For the inverse + # mapping, we use the label that is defined first in the list below. + # For example, mapping all void-type classes to the same ID in training, + # might make sense for some approaches. + # Max value is 255! + "category", # The name of the category that this label belongs to + "categoryId", # The ID of this category. Used to create ground truth images + # on category level. + "hasInstances", # Whether this label distinguishes between single instances or not + "ignoreInEval", # Whether pixels having this class as ground truth label are ignored + # during evaluations or not + "ignoreInInst", # Whether pixels having this class as ground truth label are ignored + # during evaluations of instance segmentation or not + "color", # The color of this label + ], +) + + +# -------------------------------------------------------------------------------- +# A list of all labels +# -------------------------------------------------------------------------------- + +# Please adapt the train IDs as appropriate for your approach. +# Note that you might want to ignore labels with ID 255 during training. +# Further note that the current train IDs are only a suggestion. You can use whatever you like. +# Make sure to provide your results using the original IDs and not the training IDs. +# Note that many IDs are ignored in evaluation and thus you never need to predict these! + +labels = [ + # name id kittiId, trainId category catId hasInstances ignoreInEval ignoreInInst color + Label("unlabeled", 0, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("ego vehicle", 1, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("rectification border", 2, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("out of roi", 3, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("static", 4, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("dynamic", 5, -1, 255, "void", 0, False, True, True, (111, 74, 0)), + Label("ground", 6, -1, 255, "void", 0, False, True, True, (81, 0, 81)), + Label("road", 7, 1, 0, "flat", 1, False, False, False, (128, 64, 128)), + Label("sidewalk", 8, 3, 1, "flat", 1, False, False, False, (244, 35, 232)), + Label("parking", 9, 2, 255, "flat", 1, False, True, True, (250, 170, 160)), + Label("rail track", 10, 10, 255, "flat", 1, False, True, True, (230, 150, 140)), + Label("building", 11, 11, 2, "construction", 2, True, False, False, (70, 70, 70)), + Label("wall", 12, 7, 3, "construction", 2, False, False, False, (102, 102, 156)), + Label("fence", 13, 8, 4, "construction", 2, False, False, False, (190, 153, 153)), + Label("guard rail", 14, 30, 255, "construction", 2, False, True, True, (180, 165, 180)), + Label("bridge", 15, 31, 255, "construction", 2, False, True, True, (150, 100, 100)), + Label("tunnel", 16, 32, 255, "construction", 2, False, True, True, (150, 120, 90)), + Label("pole", 17, 21, 5, "object", 3, True, False, True, (153, 153, 153)), + Label("polegroup", 18, -1, 255, "object", 3, False, True, True, (153, 153, 153)), + Label("traffic light", 19, 23, 6, "object", 3, True, False, True, (250, 170, 30)), + Label("traffic sign", 20, 24, 7, "object", 3, True, False, True, (220, 220, 0)), + Label("vegetation", 21, 5, 8, "nature", 4, False, False, False, (107, 142, 35)), + Label("terrain", 22, 4, 9, "nature", 4, False, False, False, (152, 251, 152)), + Label("sky", 23, 9, 10, "sky", 5, False, False, False, (70, 130, 180)), + Label("person", 24, 19, 11, "human", 6, True, False, False, (220, 20, 60)), + Label("rider", 25, 20, 12, "human", 6, True, False, False, (255, 0, 0)), + Label("car", 26, 13, 13, "vehicle", 7, True, False, False, (0, 0, 142)), + Label("truck", 27, 14, 14, "vehicle", 7, True, False, False, (0, 0, 70)), + Label("bus", 28, 34, 15, "vehicle", 7, True, False, False, (0, 60, 100)), + Label("caravan", 29, 16, 255, "vehicle", 7, True, True, True, (0, 0, 90)), + Label("trailer", 30, 15, 255, "vehicle", 7, True, True, True, (0, 0, 110)), + Label("train", 31, 33, 16, "vehicle", 7, True, False, False, (0, 80, 100)), + Label("motorcycle", 32, 17, 17, "vehicle", 7, True, False, False, (0, 0, 230)), + Label("bicycle", 33, 18, 18, "vehicle", 7, True, False, False, (119, 11, 32)), + Label("garage", 34, 12, 2, "construction", 2, True, True, True, (64, 128, 128)), + Label("gate", 35, 6, 4, "construction", 2, False, True, True, (190, 153, 153)), + Label("stop", 36, 29, 255, "construction", 2, True, True, True, (150, 120, 90)), + Label("smallpole", 37, 22, 5, "object", 3, True, True, True, (153, 153, 153)), + Label("lamp", 38, 25, 255, "object", 3, True, True, True, (0, 64, 64)), + Label("trash bin", 39, 26, 255, "object", 3, True, True, True, (0, 128, 192)), + Label("vending machine", 40, 27, 255, "object", 3, True, True, True, (128, 64, 0)), + Label("box", 41, 28, 255, "object", 3, True, True, True, (64, 64, 128)), + Label("unknown construction", 42, 35, 255, "void", 0, False, True, True, (102, 0, 0)), + Label("unknown vehicle", 43, 36, 255, "void", 0, False, True, True, (51, 0, 51)), + Label("unknown object", 44, 37, 255, "void", 0, False, True, True, (32, 32, 32)), + Label("license plate", -1, -1, -1, "vehicle", 7, False, True, True, (0, 0, 142)), +] + +# -------------------------------------------------------------------------------- +# Create dictionaries for a fast lookup +# -------------------------------------------------------------------------------- + +# Please refer to the main method below for example usages! + +# name to label object +name2label = {label.name: label for label in labels} +# id to label object +id2label = {label.id: label for label in labels} +# trainId to label object +trainId2label = {label.trainId: label for label in reversed(labels)} +# KITTI-360 ID to cityscapes ID +kittiId2label = {label.kittiId: label for label in labels} +# category to list of label objects +category2labels = {} +for label in labels: + category = label.category + if category in category2labels: + category2labels[category].append(label) + else: + category2labels[category] = [label] + +# -------------------------------------------------------------------------------- +# Assure single instance name +# -------------------------------------------------------------------------------- + + +# returns the label name that describes a single instance (if possible) +# e.g. input | output +# ---------------------- +# car | car +# cargroup | car +# foo | None +# foogroup | None +# skygroup | None +def assureSingleInstanceName(name): + # if the name is known, it is not a group + if name in name2label: + return name + # test if the name actually denotes a group + if not name.endswith("group"): + return None + # remove group + name = name[: -len("group")] + # test if the new name exists + if name not in name2label: + return None + # test if the new name denotes a label that actually has instances + if not name2label[name].hasInstances: + return None + # all good then + return name + + +BBOX_LABLES_TO_DETECTION_NAME_DICT = { + "car": "car", + "truck": "truck", + "bicycle": "bicycle", + "trafficLight": "traffic light", + "trailer": "trailer", + "bus": "bus", + "pedestrian": "person", + "motorcycle": "motorcycle", + "stop": "stop", + "trafficSign": "traffic sign", + "rider": "rider", + "caravan": "caravan", + "box": "box", + "lamp": "lamp", + "pole": "pole", + "smallpole": "smallpole", + "train": "train", + "trashBin": "trash bin", + "vendingMachine": "vending machine", +} + +KIITI360_DETECTION_NAME_DICT = { + "bicycle": KITTI360BoxDetectionLabel.BICYCLE, + "box": KITTI360BoxDetectionLabel.BOX, + "bus": KITTI360BoxDetectionLabel.BUS, + "car": KITTI360BoxDetectionLabel.CAR, + "caravan": KITTI360BoxDetectionLabel.CARAVAN, + "lamp": KITTI360BoxDetectionLabel.LAMP, + "motorcycle": KITTI360BoxDetectionLabel.MOTORCYCLE, + "person": KITTI360BoxDetectionLabel.PERSON, + "pole": KITTI360BoxDetectionLabel.POLE, + "rider": KITTI360BoxDetectionLabel.RIDER, + "smallpole": KITTI360BoxDetectionLabel.SMALLPOLE, + "stop": KITTI360BoxDetectionLabel.STOP, + "traffic light": KITTI360BoxDetectionLabel.TRAFFIC_LIGHT, + "traffic sign": KITTI360BoxDetectionLabel.TRAFFIC_SIGN, + "trailer": KITTI360BoxDetectionLabel.TRAILER, + "train": KITTI360BoxDetectionLabel.TRAIN, + "trash bin": KITTI360BoxDetectionLabel.TRASH_BIN, + "truck": KITTI360BoxDetectionLabel.TRUCK, + "vending machine": KITTI360BoxDetectionLabel.VENDING_MACHINE, +} diff --git a/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py new file mode 100644 index 00000000..1c2beeca --- /dev/null +++ b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py @@ -0,0 +1,227 @@ +""" +This script precomputes static detection records for KITTI-360: + - Stage 1: radius filtering using ego positions (from poses.txt). + - Stage 2: LiDAR visibility check to fill per-frame point counts. +It writes a pickle containing, for each static object, all feasible frames and +their point counts to avoid recomputation in later pipelines. +We have precomputed and saved the pickle for all training logs, you can either +download them or run this script to generate +""" + +from __future__ import annotations + +import concurrent.futures +import logging +import os +import pickle +import xml.etree.ElementTree as ET +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import numpy as np +import numpy.typing as npt + +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_kitti360_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KIITI360_DETECTION_NAME_DICT, + kittiId2label, +) + +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_3D_RAW = "data_3d_raw" +# DIR_3D_BBOX = "data_3d_bboxes" +# DIR_POSES = "data_poses" + +# PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +# PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX +# PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES + + +def _bbox_xml_path(kitti360_dataset_root: Path, log_name: str) -> Path: + if log_name == "2013_05_28_drive_0004_sync": + return kitti360_dataset_root / "data_3d_bboxes" / "train_full" / f"{log_name}.xml" + return kitti360_dataset_root / "data_3d_bboxes" / "train" / f"{log_name}.xml" + + +def _lidar_frame_path(kitti360_dataset_root: Path, log_name: str, frame_idx: int) -> Path: + return kitti360_dataset_root / "data_3d_raw" / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" + + +def _load_lidar_xyz(filepath: Path) -> np.ndarray: + """Load one LiDAR frame and return Nx3 xyz.""" + arr = np.fromfile(filepath, dtype=np.float32) + return arr.reshape(-1, 4)[:, :3] + + +def _collect_static_objects(kitti360_dataset_root: Path, log_name: str) -> List[KITTI360Bbox3D]: + """Parse XML and collect static objects with valid class names.""" + xml_path = _bbox_xml_path(kitti360_dataset_root, log_name) + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + tree = ET.parse(xml_path) + root = tree.getroot() + + static_objs: List[KITTI360Bbox3D] = [] + + for child in root: + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) + name = kittiId2label[semanticIdKITTI].name + else: + label = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(label, "unknown") + timestamp = int(child.find("timestamp").text) # -1 for static objects + if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT.keys() or timestamp != -1: + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + static_objs.append(obj) + return static_objs + + +def _collect_ego_states(kitti360_data_root: Path, log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: + """Load ego states from poses.txt.""" + + pose_file = kitti360_data_root / "data_poses" / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) + + ego_states = [] + for time_idx in range(len(valid_timestamp)): + pos = time_idx + state_item = np.eye(4) + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] + ego_state_xyz = np.array( + [ + poses[pos, 4], + poses[pos, 8], + poses[pos, 12], + ] + ) + + state_item[:3, :3] = R_mat_cali + state_item[:3, 3] = ego_state_xyz + ego_states.append(state_item) + + # [N,4,4] + return np.array(ego_states), valid_timestamp + + +def process_detection( + kitti360_data_root: Path, + log_name: str, + radius_m: float = 60.0, + output_dir: Optional[Path] = None, +) -> None: + """ + Precompute detections filtering + for static objects: + 1) filter by ego-centered radius over all frames + 2) filter by LiDAR point cloud visibility + Save per-frame detections to a pickle to avoid recomputation. + """ + + lidar_dir = kitti360_data_root / "data_3d_raw" / log_name / "velodyne_points" / "data" + if not lidar_dir.exists(): + raise FileNotFoundError(f"LiDAR data folder not found: {lidar_dir}") + ts_len = len(list(lidar_dir.glob("*.bin"))) + logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") + + # 1) Parse objects from XML + static_objs: List[KITTI360Bbox3D] = _collect_static_objects(kitti360_data_root, log_name) + logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") + + # 2) Filter static objs by ego-centered radius + ego_states, valid_timestamp = _collect_ego_states(kitti360_data_root, log_name) + logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") + for obj in static_objs: + obj.filter_by_radius(ego_states[:, :3, 3], valid_timestamp, radius_m) + + # 3) Filter static objs by LiDAR point cloud visibility + lidar_extrinsic = get_kitti360_lidar_extrinsic(kitti360_data_root / "calibration") + + def process_one_frame(time_idx: int) -> None: + valid_time_idx = valid_timestamp[time_idx] + logging.info(f"[preprocess] {log_name}: t={valid_time_idx}") + lidar_path = _lidar_frame_path(kitti360_data_root, log_name, valid_time_idx) + if not lidar_path.exists(): + logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") + return + + lidar_xyz = _load_lidar_xyz(lidar_path) + + # lidar to pose + lidar_h = np.concatenate((lidar_xyz, np.ones((lidar_xyz.shape[0], 1), dtype=lidar_xyz.dtype)), axis=1) + lidar_in_imu = lidar_h @ lidar_extrinsic.T + lidar_in_imu = lidar_in_imu[:, :3] + + # pose to world + lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3, :3].T + ego_states[time_idx][:3, 3] + + for obj in static_objs: + if not any(record["timestamp"] == valid_time_idx for record in obj.valid_frames["records"]): + continue + visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) + if not visible: + obj.valid_frames["records"] = [ + record for record in obj.valid_frames["records"] if record["timestamp"] != valid_time_idx + ] + else: + for record in obj.valid_frames["records"]: + if record["timestamp"] == valid_time_idx: + record["points_in_box"] = points_in_box + break + + max_workers = os.cpu_count() * 2 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + list(executor.map(process_one_frame, range(len(valid_timestamp)))) + + # 4) Save pickle + static_records: List[Dict[str, Any]] = [] + for obj in static_objs: + static_records.append(obj.valid_frames) + + if output_dir is None: + output_dir = kitti360_data_root / "data_3d_bboxes" / "preprocess" + output_dir.mkdir(parents=True, exist_ok=True) + out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" + + payload = { + "log_name": log_name, + "static": static_records, + } + with open(out_path, "wb") as f: + pickle.dump(payload, f) + logging.info(f"[preprocess] saved: {out_path}") + + +if __name__ == "__main__": + import argparse + + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") + parser.add_argument("--kitti360_data_root", type=Path, default=".", help="KITTI-360 data root directory") + parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") + parser.add_argument("--radius", type=float, default=60.0) + parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") + args = parser.parse_args() + + process_detection( + kitti360_data_root=args.kitti360_data_root, + log_name=args.log_name, + radius_m=args.radius, + output_dir=args.out, + ) diff --git a/d123/common/geometry/bounding_box/__init__.py b/src/py123d/conversion/datasets/nuplan/__init__.py similarity index 100% rename from d123/common/geometry/bounding_box/__init__.py rename to src/py123d/conversion/datasets/nuplan/__init__.py diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py new file mode 100644 index 00000000..36fa3a25 --- /dev/null +++ b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py @@ -0,0 +1,447 @@ +import pickle +from pathlib import Path +from typing import Dict, Final, List, Optional, Tuple, Union + +import numpy as np +import yaml + +import py123d.conversion.datasets.nuplan.utils as nuplan_utils +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.nuplan.nuplan_map_conversion import write_nuplan_map +from py123d.conversion.datasets.nuplan.utils.nuplan_constants import ( + NUPLAN_DATA_SPLITS, + NUPLAN_DEFAULT_DT, + NUPLAN_LIDAR_DICT, + NUPLAN_MAP_LOCATIONS, + NUPLAN_ROLLING_SHUTTER_S, + NUPLAN_TRAFFIC_STATUS_DICT, +) +from py123d.conversion.datasets.nuplan.utils.nuplan_sql_helper import ( + get_box_detections_for_lidarpc_token_from_db, + get_nearest_ego_pose_for_timestamp_from_db, +) +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import NuPlanBoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex +from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetection, TrafficLightDetectionWrapper +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_nuplan_chrysler_pacifica_parameters, + rear_axle_se3_to_center_se3, +) +from py123d.geometry import StateSE3, Vector3D + +check_dependencies(["nuplan", "sqlalchemy"], "nuplan") +from nuplan.database.nuplan_db.nuplan_scenario_queries import get_cameras, get_images_from_lidar_tokens +from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc +from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB +from nuplan.planning.simulation.observation.observation_type import CameraChannel + +# NOTE: Leaving this constant here, to avoid having a nuplan dependency in nuplan_constants.py +NUPLAN_CAMERA_MAPPING = { + PinholeCameraType.PCAM_F0: CameraChannel.CAM_F0, + PinholeCameraType.PCAM_B0: CameraChannel.CAM_B0, + PinholeCameraType.PCAM_L0: CameraChannel.CAM_L0, + PinholeCameraType.PCAM_L1: CameraChannel.CAM_L1, + PinholeCameraType.PCAM_L2: CameraChannel.CAM_L2, + PinholeCameraType.PCAM_R0: CameraChannel.CAM_R0, + PinholeCameraType.PCAM_R1: CameraChannel.CAM_R1, + PinholeCameraType.PCAM_R2: CameraChannel.CAM_R2, +} + +TARGET_DT: Final[float] = 0.1 # TODO: make configurable + + +def create_splits_logs() -> Dict[str, List[str]]: + # NOTE: nuPlan stores the training and validataion logs + yaml_filepath = Path(nuplan_utils.__path__[0]) / "log_splits.yaml" + with open(yaml_filepath, "r") as stream: + splits = yaml.safe_load(stream) + + return splits["log_splits"] + + +class NuPlanConverter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + nuplan_data_root: Union[Path, str], + nuplan_maps_root: Union[Path, str], + nuplan_sensor_root: Union[Path, str], + dataset_converter_config: DatasetConverterConfig, + ) -> None: + super().__init__(dataset_converter_config) + assert nuplan_data_root is not None, "The variable `nuplan_data_root` must be provided." + assert nuplan_maps_root is not None, "The variable `nuplan_maps_root` must be provided." + assert nuplan_sensor_root is not None, "The variable `nuplan_sensor_root` must be provided." + for split in splits: + assert ( + split in NUPLAN_DATA_SPLITS + ), f"Split {split} is not available. Available splits: {NUPLAN_DATA_SPLITS}" + + self._splits: List[str] = splits + self._nuplan_data_root: Path = Path(nuplan_data_root) + self._nuplan_maps_root: Path = Path(nuplan_maps_root) + self._nuplan_sensor_root: Path = Path(nuplan_sensor_root) + + self._split_log_path_pairs: List[Tuple[str, List[Path]]] = self._collect_split_log_path_pairs() + + def _collect_split_log_path_pairs(self) -> List[Tuple[str, List[Path]]]: + # NOTE: the nuplan mini folder has an internal train, val, test structure, all stored in "mini". + # The complete dataset is saved in the "trainval" folder (train and val), or in the "test" folder (for test). + # Thus, we need filter the logs in a split, based on the internal nuPlan configuration. + split_log_path_pairs: List[Tuple[str, List[Path]]] = [] + log_names_per_split = create_splits_logs() + + for split in self._splits: + split_type = split.split("_")[-1] + assert split_type in ["train", "val", "test"] + + if split in ["nuplan_train", "nuplan_val"]: + nuplan_split_folder = self._nuplan_data_root / "nuplan-v1.1" / "splits" / "trainval" + elif split in ["nuplan_test"]: + nuplan_split_folder = self._nuplan_data_root / "nuplan-v1.1" / "splits" / "test" + elif split in ["nuplan-mini_train", "nuplan-mini_val", "nuplan-mini_test"]: + nuplan_split_folder = self._nuplan_data_root / "nuplan-v1.1" / "splits" / "mini" + elif split == "nuplan-private_test": + # TODO: Remove private split + nuplan_split_folder = self._nuplan_data_root / "nuplan-v1.1" / "splits" / "private_test" + + all_log_files_in_path = [log_file for log_file in nuplan_split_folder.glob("*.db")] + + # TODO: Remove private split + if split == "nuplan-private_test": + valid_log_names = [str(log_file.stem) for log_file in all_log_files_in_path] + else: + all_log_files_in_path = [log_file for log_file in nuplan_split_folder.glob("*.db")] + all_log_names = set([str(log_file.stem) for log_file in all_log_files_in_path]) + log_names_in_split = set(log_names_per_split[split_type]) + valid_log_names = list(all_log_names & log_names_in_split) + + for log_name in valid_log_names: + log_path = nuplan_split_folder / f"{log_name}.db" + split_log_path_pairs.append((split, log_path)) + + return split_log_path_pairs + + def get_number_of_maps(self) -> int: + """Inherited, see superclass.""" + return len(NUPLAN_MAP_LOCATIONS) + + def get_number_of_logs(self) -> int: + """Inherited, see superclass.""" + return len(self._split_log_path_pairs) + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """Inherited, see superclass.""" + location = NUPLAN_MAP_LOCATIONS[map_index] + + # Dummy log metadata for map writing, TODO: Consider using MapMetadata instead? + map_metadata = _get_nuplan_map_metadata(location) + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + if map_needs_writing: + write_nuplan_map( + location=location, + nuplan_maps_root=self._nuplan_maps_root, + map_writer=map_writer, + ) + + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """Inherited, see superclass.""" + + split, source_log_path = self._split_log_path_pairs[log_index] + + nuplan_log_db = NuPlanDB(self._nuplan_data_root, str(source_log_path), None) + + log_name = nuplan_log_db.log_name + + # 1. Initialize log metadata + log_metadata = LogMetadata( + dataset="nuplan", + split=split, + log_name=log_name, + location=nuplan_log_db.log.map_version, + timestep_seconds=TARGET_DT, + vehicle_parameters=get_nuplan_chrysler_pacifica_parameters(), + box_detection_label_class=NuPlanBoxDetectionLabel, + pinhole_camera_metadata=_get_nuplan_camera_metadata( + source_log_path, self._nuplan_sensor_root, self.dataset_converter_config + ), + lidar_metadata=_get_nuplan_lidar_metadata( + self._nuplan_sensor_root, log_name, self.dataset_converter_config + ), + map_metadata=_get_nuplan_map_metadata(nuplan_log_db.log.map_version), + ) + + # 2. Prepare log writer + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + if log_needs_writing: + step_interval: float = int(TARGET_DT / NUPLAN_DEFAULT_DT) + for nuplan_lidar_pc in nuplan_log_db.lidar_pc[::step_interval]: + + lidar_pc_token: str = nuplan_lidar_pc.token + log_writer.write( + timestamp=TimePoint.from_us(nuplan_lidar_pc.timestamp), + ego_state=_extract_nuplan_ego_state(nuplan_lidar_pc), + box_detections=_extract_nuplan_box_detections(nuplan_lidar_pc, source_log_path), + traffic_lights=_extract_nuplan_traffic_lights(nuplan_log_db, lidar_pc_token), + pinhole_cameras=_extract_nuplan_cameras( + nuplan_log_db=nuplan_log_db, + nuplan_lidar_pc=nuplan_lidar_pc, + source_log_path=source_log_path, + nuplan_sensor_root=self._nuplan_sensor_root, + dataset_converter_config=self.dataset_converter_config, + ), + lidars=_extract_nuplan_lidars( + nuplan_lidar_pc=nuplan_lidar_pc, + nuplan_sensor_root=self._nuplan_sensor_root, + dataset_converter_config=self.dataset_converter_config, + ), + scenario_tags=_extract_nuplan_scenario_tag(nuplan_log_db, lidar_pc_token), + route_lane_group_ids=_extract_nuplan_route_lane_group_ids(nuplan_lidar_pc), + ) + del nuplan_lidar_pc + + log_writer.close() + + # NOTE: The nuPlanDB class has several internal references, which makes memory management tricky. + # We need to ensure all references are released properly. It is not always working with just del. + nuplan_log_db.detach_tables() + nuplan_log_db.remove_ref() + del nuplan_log_db + + +def _get_nuplan_map_metadata(location: str) -> MapMetadata: + return MapMetadata( + dataset="nuplan", + split=None, + log_name=None, + location=location, + map_has_z=False, + map_is_local=False, + ) + + +def _get_nuplan_camera_metadata( + source_log_path: Path, + nuplan_sensor_root: Path, + dataset_converter_config: DatasetConverterConfig, +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadata: + cam = list(get_cameras(source_log_path, [str(NUPLAN_CAMERA_MAPPING[camera_type].value)]))[0] + + intrinsics_camera_matrix = np.array(pickle.loads(cam.intrinsic), dtype=np.float64) # array of shape (3, 3) + intrinsic = PinholeIntrinsics.from_camera_matrix(intrinsics_camera_matrix) + + distortion_array = np.array(pickle.loads(cam.distortion), dtype=np.float64) # array of shape (5,) + distortion = PinholeDistortion.from_array(distortion_array, copy=False) + + return PinholeCameraMetadata( + camera_type=camera_type, + width=cam.width, + height=cam.height, + intrinsics=intrinsic, + distortion=distortion, + ) + + camera_metadata: Dict[str, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: + log_name = source_log_path.stem + for camera_type, nuplan_camera_type in NUPLAN_CAMERA_MAPPING.items(): + camera_folder = nuplan_sensor_root / log_name / f"{nuplan_camera_type.value}" + if camera_folder.exists() and camera_folder.is_dir(): + camera_metadata[camera_type] = _get_camera_metadata(camera_type) + + return camera_metadata + + +def _get_nuplan_lidar_metadata( + nuplan_sensor_root: Path, + log_name: str, + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: + + metadata: Dict[LiDARType, LiDARMetadata] = {} + log_lidar_folder = nuplan_sensor_root / log_name / "MergedPointCloud" + + # NOTE: We first need to check if the LiDAR folder exists, as not all logs have LiDAR data + if log_lidar_folder.exists() and log_lidar_folder.is_dir() and dataset_converter_config.include_lidars: + for lidar_type in NUPLAN_LIDAR_DICT.values(): + metadata[lidar_type] = LiDARMetadata( + lidar_type=lidar_type, + lidar_index=NuPlanLiDARIndex, + extrinsic=None, # NOTE: LiDAR extrinsic are unknown + ) + return metadata + + +def _extract_nuplan_ego_state(nuplan_lidar_pc: LidarPc) -> EgoStateSE3: + + vehicle_parameters = get_nuplan_chrysler_pacifica_parameters() + rear_axle_pose = StateSE3( + x=nuplan_lidar_pc.ego_pose.x, + y=nuplan_lidar_pc.ego_pose.y, + z=nuplan_lidar_pc.ego_pose.z, + qw=nuplan_lidar_pc.ego_pose.qw, + qx=nuplan_lidar_pc.ego_pose.qx, + qy=nuplan_lidar_pc.ego_pose.qy, + qz=nuplan_lidar_pc.ego_pose.qz, + ) + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + dynamic_state = DynamicStateSE3( + velocity=Vector3D( + x=nuplan_lidar_pc.ego_pose.vx, + y=nuplan_lidar_pc.ego_pose.vy, + z=nuplan_lidar_pc.ego_pose.vz, + ), + acceleration=Vector3D( + x=nuplan_lidar_pc.ego_pose.acceleration_x, + y=nuplan_lidar_pc.ego_pose.acceleration_y, + z=nuplan_lidar_pc.ego_pose.acceleration_z, + ), + angular_velocity=Vector3D( + x=nuplan_lidar_pc.ego_pose.angular_rate_x, + y=nuplan_lidar_pc.ego_pose.angular_rate_y, + z=nuplan_lidar_pc.ego_pose.angular_rate_z, + ), + ) + return EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, # NOTE: Timepoint is not needed during writing, set to None + ) + + +def _extract_nuplan_box_detections(lidar_pc: LidarPc, source_log_path: Path) -> BoxDetectionWrapper: + box_detections: List[BoxDetectionSE3] = list( + get_box_detections_for_lidarpc_token_from_db(source_log_path, lidar_pc.token) + ) + return BoxDetectionWrapper(box_detections=box_detections) + + +def _extract_nuplan_traffic_lights(log_db: NuPlanDB, lidar_pc_token: str) -> TrafficLightDetectionWrapper: + traffic_lights_detections: List[TrafficLightDetection] = [ + TrafficLightDetection( + timepoint=None, # NOTE: Timepoint is not needed during writing, set to None + lane_id=int(traffic_light.lane_connector_id), + status=NUPLAN_TRAFFIC_STATUS_DICT[traffic_light.status], + ) + for traffic_light in log_db.traffic_light_status.select_many(lidar_pc_token=lidar_pc_token) + ] + return TrafficLightDetectionWrapper(traffic_light_detections=traffic_lights_detections) + + +def _extract_nuplan_cameras( + nuplan_log_db: NuPlanDB, + nuplan_lidar_pc: LidarPc, + source_log_path: Path, + nuplan_sensor_root: Path, + dataset_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + + camera_data_list: List[CameraData] = [] + + if dataset_converter_config.include_pinhole_cameras: + log_cam_infos = {camera.token: camera for camera in nuplan_log_db.log.cameras} + for camera_type, camera_channel in NUPLAN_CAMERA_MAPPING.items(): + camera_data: Optional[Union[str, bytes]] = None + image_class = list( + get_images_from_lidar_tokens(source_log_path, [nuplan_lidar_pc.token], [str(camera_channel.value)]) + ) + + if len(image_class) != 0: + image = image_class[0] + filename_jpg = nuplan_sensor_root / image.filename_jpg + if filename_jpg.exists() and filename_jpg.is_file(): + + # NOTE: This part of the modified from the MTGS code + # In MTGS, a slower method is used to find the nearest ego pose. + # The code below uses a direct SQL query to find the nearest ego pose, in a given window. + # https://github.com/OpenDriveLab/MTGS/blob/main/nuplan_scripts/utils/nuplan_utils_custom.py#L117 + + # Query nearest ego pose for the image timestamp + timestamp = image.timestamp + NUPLAN_ROLLING_SHUTTER_S.time_us + nearest_ego_pose = get_nearest_ego_pose_for_timestamp_from_db( + source_log_path, + timestamp, + [nuplan_lidar_pc.token], + ) + + # Compute camera to ego transformation, given the nearest ego pose + img_e2g = nearest_ego_pose.transformation_matrix + g2e = nuplan_lidar_pc.ego_pose.trans_matrix_inv + img_e2e = g2e @ img_e2g + cam_info = log_cam_infos[image.camera_token] + c2img_e = cam_info.trans_matrix + c2e = img_e2e @ c2img_e + extrinsic = StateSE3.from_transformation_matrix(c2e) + + # Store in dictionary + camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=extrinsic, + dataset_root=nuplan_sensor_root, + relative_path=filename_jpg.relative_to(nuplan_sensor_root), + ) + ) + + return camera_data_list + + +def _extract_nuplan_lidars( + nuplan_lidar_pc: LidarPc, + nuplan_sensor_root: Path, + dataset_converter_config: DatasetConverterConfig, +) -> List[LiDARData]: + + lidars: List[LiDARData] = [] + if dataset_converter_config.include_lidars: + + lidar_full_path = nuplan_sensor_root / nuplan_lidar_pc.filename + if lidar_full_path.exists() and lidar_full_path.is_file(): + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_MERGED, + dataset_root=nuplan_sensor_root, + relative_path=nuplan_lidar_pc.filename, + ) + ) + + return lidars + + +def _extract_nuplan_scenario_tag(nuplan_log_db: NuPlanDB, lidar_pc_token: str) -> List[str]: + scenario_tags = [ + scenario_tag.type for scenario_tag in nuplan_log_db.scenario_tag.select_many(lidar_pc_token=lidar_pc_token) + ] + if len(scenario_tags) == 0: + scenario_tags = ["unknown"] + return scenario_tags + + +def _extract_nuplan_route_lane_group_ids(nuplan_lidar_pc: LidarPc) -> List[int]: + return [ + int(roadblock_id) + for roadblock_id in str(nuplan_lidar_pc.scene.roadblock_ids).split(" ") + if len(roadblock_id) > 0 + ] diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py new file mode 100644 index 00000000..b8b010cb --- /dev/null +++ b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py @@ -0,0 +1,402 @@ +import warnings +from pathlib import Path +from typing import Dict, Final + +import geopandas as gpd +import numpy as np +import pyogrio +from shapely import LineString + +from py123d.conversion.datasets.nuplan.utils.nuplan_constants import ( + NUPLAN_MAP_GPKG_LAYERS, + NUPLAN_MAP_LOCATION_FILES, + NUPLAN_ROAD_LINE_CONVERSION, +) +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheCrosswalk, + CacheGenericDrivable, + CacheIntersection, + CacheLane, + CacheLaneGroup, + CacheRoadEdge, + CacheRoadLine, + CacheWalkway, +) +from py123d.datatypes.maps.gpkg.gpkg_utils import get_all_rows_with_value, get_row_with_value +from py123d.datatypes.maps.map_datatypes import RoadEdgeType +from py123d.geometry.polyline import Polyline2D, Polyline3D + +MAX_ROAD_EDGE_LENGTH: Final[float] = 100.0 # meters, used to filter out very long road edges. TODO @add to config? + + +def write_nuplan_map(nuplan_maps_root: Path, location: str, map_writer: AbstractMapWriter) -> None: + assert location in NUPLAN_MAP_LOCATION_FILES.keys(), f"Map name {location} is not supported." + source_map_path = nuplan_maps_root / NUPLAN_MAP_LOCATION_FILES[location] + assert source_map_path.exists(), f"Map file {source_map_path} does not exist." + nuplan_gdf = _load_nuplan_gdf(source_map_path) + _write_nuplan_lanes(nuplan_gdf, map_writer) + _write_nuplan_lane_connectors(nuplan_gdf, map_writer) + _write_nuplan_lane_groups(nuplan_gdf, map_writer) + _write_nuplan_lane_connector_groups(nuplan_gdf, map_writer) + _write_nuplan_intersections(nuplan_gdf, map_writer) + _write_nuplan_crosswalks(nuplan_gdf, map_writer) + _write_nuplan_walkways(nuplan_gdf, map_writer) + _write_nuplan_carparks(nuplan_gdf, map_writer) + _write_nuplan_generic_drivables(nuplan_gdf, map_writer) + _write_nuplan_road_edges(nuplan_gdf, map_writer) + _write_nuplan_road_lines(nuplan_gdf, map_writer) + del nuplan_gdf + + +def _write_nuplan_lanes(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + + # NOTE: drops: lane_index (?), creator_id, name (?), road_type_fid (?), lane_type_fid (?), width (?), + # left_offset (?), right_offset (?), min_speed (?), max_speed (?), stops, left_has_reflectors (?), + # right_has_reflectors (?), from_edge_fid, to_edge_fid + + all_ids = nuplan_gdf["lanes_polygons"].lane_fid.to_list() + all_lane_group_ids = nuplan_gdf["lanes_polygons"].lane_group_fid.to_list() + all_speed_limits_mps = nuplan_gdf["lanes_polygons"].speed_limit_mps.to_list() + all_geometries = nuplan_gdf["lanes_polygons"].geometry.to_list() + + for idx, lane_id in enumerate(all_ids): + + # 1. predecessor_ids, successor_ids + predecessor_ids = get_all_rows_with_value( + nuplan_gdf["lane_connectors"], + "entry_lane_fid", + lane_id, + )["fid"].tolist() + successor_ids = get_all_rows_with_value( + nuplan_gdf["lane_connectors"], + "exit_lane_fid", + lane_id, + )["fid"].tolist() + + # 2. left_boundary, right_boundary + lane_series = get_row_with_value(nuplan_gdf["lanes_polygons"], "fid", str(lane_id)) + left_boundary_fid = lane_series["left_boundary_fid"] + left_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] + + right_boundary_fid = lane_series["right_boundary_fid"] + right_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] + + # 3. left_lane_id, right_lane_id + lane_index = lane_series["lane_index"] + all_group_lanes = get_all_rows_with_value( + nuplan_gdf["lanes_polygons"], "lane_group_fid", lane_series["lane_group_fid"] + ) + left_lane_id = all_group_lanes[all_group_lanes["lane_index"] == int(lane_index) - 1]["fid"] + right_lane_id = all_group_lanes[all_group_lanes["lane_index"] == int(lane_index) + 1]["fid"] + left_lane_id = left_lane_id.item() if not left_lane_id.empty else None + right_lane_id = right_lane_id.item() if not right_lane_id.empty else None + + # 3. centerline (aka. baseline_path) + centerline = get_row_with_value(nuplan_gdf["baseline_paths"], "lane_fid", float(lane_id))["geometry"] + + # Ensure the left/right boundaries are aligned with the baseline path direction. + left_boundary = align_boundary_direction(centerline, left_boundary) + right_boundary = align_boundary_direction(centerline, right_boundary) + + map_writer.write_lane( + CacheLane( + object_id=lane_id, + lane_group_id=all_lane_group_ids[idx], + left_boundary=Polyline3D.from_linestring(left_boundary), + right_boundary=Polyline3D.from_linestring(right_boundary), + centerline=Polyline3D.from_linestring(centerline), + left_lane_id=left_lane_id, + right_lane_id=right_lane_id, + predecessor_ids=predecessor_ids, + successor_ids=successor_ids, + speed_limit_mps=all_speed_limits_mps[idx], + outline=None, + geometry=all_geometries[idx], + ) + ) + + +def _write_nuplan_lane_connectors(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + + # NOTE: drops: exit_lane_group_fid, entry_lane_group_fid, to_edge_fid, + # turn_type_fid (?), bulb_fids (?), traffic_light_stop_line_fids (?), overlap (?), creator_id + # left_has_reflectors (?), right_has_reflectors (?) + all_ids = nuplan_gdf["lane_connectors"].fid.to_list() + all_lane_group_ids = nuplan_gdf["lane_connectors"].lane_group_connector_fid.to_list() + all_speed_limits_mps = nuplan_gdf["lane_connectors"].speed_limit_mps.to_list() + + for idx, lane_id in enumerate(all_ids): + + # 1. predecessor_ids, successor_ids + lane_connector_row = get_row_with_value(nuplan_gdf["lane_connectors"], "fid", str(lane_id)) + predecessor_ids = [lane_connector_row["entry_lane_fid"]] + successor_ids = [lane_connector_row["exit_lane_fid"]] + + # 2. left_boundaries, right_boundaries + lane_connector_polygons_row = get_row_with_value( + nuplan_gdf["gen_lane_connectors_scaled_width_polygons"], "lane_connector_fid", str(lane_id) + ) + left_boundary_fid = lane_connector_polygons_row["left_boundary_fid"] + left_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] + + right_boundary_fid = lane_connector_polygons_row["right_boundary_fid"] + right_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] + + # 3. baseline_paths + centerline = get_row_with_value(nuplan_gdf["baseline_paths"], "lane_connector_fid", float(lane_id))["geometry"] + + left_boundary = align_boundary_direction(centerline, left_boundary) + right_boundary = align_boundary_direction(centerline, right_boundary) + + # # 4. geometries + # geometries.append(lane_connector_polygons_row.geometry) + + map_writer.write_lane( + CacheLane( + object_id=lane_id, + lane_group_id=all_lane_group_ids[idx], + left_boundary=Polyline3D.from_linestring(left_boundary), + right_boundary=Polyline3D.from_linestring(right_boundary), + centerline=Polyline3D.from_linestring(centerline), + left_lane_id=None, + right_lane_id=None, + predecessor_ids=predecessor_ids, + successor_ids=successor_ids, + speed_limit_mps=all_speed_limits_mps[idx], + outline=None, + geometry=lane_connector_polygons_row.geometry, + ) + ) + + +def _write_nuplan_lane_groups(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id, from_edge_fid, to_edge_fid + ids = nuplan_gdf["lane_groups_polygons"].fid.to_list() + # all_geometries = nuplan_gdf["lane_groups_polygons"].geometry.to_list() + + for lane_group_id in ids: + + # 1. lane_ids + lane_ids = get_all_rows_with_value( + nuplan_gdf["lanes_polygons"], + "lane_group_fid", + lane_group_id, + )["fid"].tolist() + + # 2. predecessor_lane_group_ids, successor_lane_group_ids + predecessor_lane_group_ids = get_all_rows_with_value( + nuplan_gdf["lane_group_connectors"], + "to_lane_group_fid", + lane_group_id, + )["fid"].tolist() + successor_lane_group_ids = get_all_rows_with_value( + nuplan_gdf["lane_group_connectors"], + "from_lane_group_fid", + lane_group_id, + )["fid"].tolist() + + # 3. left_boundaries, right_boundaries + lane_group_row = get_row_with_value(nuplan_gdf["lane_groups_polygons"], "fid", str(lane_group_id)) + left_boundary_fid = lane_group_row["left_boundary_fid"] + left_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] + + right_boundary_fid = lane_group_row["right_boundary_fid"] + right_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] + + # Flip the boundaries to align with the first lane's baseline path direction. + repr_centerline = get_row_with_value(nuplan_gdf["baseline_paths"], "lane_fid", float(lane_ids[0]))["geometry"] + + left_boundary = align_boundary_direction(repr_centerline, left_boundary) + right_boundary = align_boundary_direction(repr_centerline, right_boundary) + + map_writer.write_lane_group( + CacheLaneGroup( + object_id=lane_group_id, + lane_ids=lane_ids, + left_boundary=Polyline3D.from_linestring(left_boundary), + right_boundary=Polyline3D.from_linestring(right_boundary), + intersection_id=None, + predecessor_ids=predecessor_lane_group_ids, + successor_ids=successor_lane_group_ids, + outline=None, + geometry=lane_group_row.geometry, + ) + ) + + +def _write_nuplan_lane_connector_groups(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id, from_edge_fid, to_edge_fid, intersection_fid + ids = nuplan_gdf["lane_group_connectors"].fid.to_list() + all_intersection_ids = nuplan_gdf["lane_group_connectors"].intersection_fid.to_list() + # all_geometries = nuplan_gdf["lane_group_connectors"].geometry.to_list() + + for idx, lane_group_connector_id in enumerate(ids): + + # 1. lane_ids + lane_ids = get_all_rows_with_value( + nuplan_gdf["lane_connectors"], "lane_group_connector_fid", lane_group_connector_id + )["fid"].tolist() + + # 2. predecessor_lane_group_ids, successor_lane_group_ids + lane_group_connector_row = get_row_with_value( + nuplan_gdf["lane_group_connectors"], "fid", lane_group_connector_id + ) + predecessor_lane_group_ids = [str(lane_group_connector_row["from_lane_group_fid"])] + successor_lane_group_ids = [str(lane_group_connector_row["to_lane_group_fid"])] + + # 3. left_boundaries, right_boundaries + left_boundary_fid = lane_group_connector_row["left_boundary_fid"] + left_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(left_boundary_fid))["geometry"] + right_boundary_fid = lane_group_connector_row["right_boundary_fid"] + right_boundary = get_row_with_value(nuplan_gdf["boundaries"], "fid", str(right_boundary_fid))["geometry"] + + map_writer.write_lane_group( + CacheLaneGroup( + object_id=lane_group_connector_id, + lane_ids=lane_ids, + left_boundary=Polyline3D.from_linestring(left_boundary), + right_boundary=Polyline3D.from_linestring(right_boundary), + intersection_id=all_intersection_ids[idx], + predecessor_ids=predecessor_lane_group_ids, + successor_ids=successor_lane_group_ids, + outline=None, + geometry=lane_group_connector_row.geometry, + ) + ) + + +def _write_nuplan_intersections(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id, intersection_type_fid (?), is_mini (?) + all_ids = nuplan_gdf["intersections"].fid.to_list() + all_geometries = nuplan_gdf["intersections"].geometry.to_list() + for idx, intersection_id in enumerate(all_ids): + lane_group_connector_ids = get_all_rows_with_value( + nuplan_gdf["lane_group_connectors"], "intersection_fid", str(intersection_id) + )["fid"].tolist() + + map_writer.write_intersection( + CacheIntersection( + object_id=intersection_id, + lane_group_ids=lane_group_connector_ids, + geometry=all_geometries[idx], + ) + ) + + +def _write_nuplan_crosswalks(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id, intersection_fids, lane_fids, is_marked (?) + for id, geometry in zip(nuplan_gdf["crosswalks"].fid.to_list(), nuplan_gdf["crosswalks"].geometry.to_list()): + map_writer.write_crosswalk(CacheCrosswalk(object_id=id, geometry=geometry)) + + +def _write_nuplan_walkways(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id + for id, geometry in zip(nuplan_gdf["walkways"].fid.to_list(), nuplan_gdf["walkways"].geometry.to_list()): + map_writer.write_walkway(CacheWalkway(object_id=id, geometry=geometry)) + + +def _write_nuplan_carparks(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id + for id, geometry in zip(nuplan_gdf["carpark_areas"].fid.to_list(), nuplan_gdf["carpark_areas"].geometry.to_list()): + map_writer.write_carpark(CacheCarpark(object_id=id, geometry=geometry)) + + +def _write_nuplan_generic_drivables(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + # NOTE: drops: creator_id + for id, geometry in zip( + nuplan_gdf["generic_drivable_areas"].fid.to_list(), nuplan_gdf["generic_drivable_areas"].geometry.to_list() + ): + map_writer.write_generic_drivable(CacheGenericDrivable(object_id=id, geometry=geometry)) + + +def _write_nuplan_road_edges(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + drivable_polygons = ( + nuplan_gdf["intersections"].geometry.to_list() + + nuplan_gdf["lane_groups_polygons"].geometry.to_list() + + nuplan_gdf["carpark_areas"].geometry.to_list() + + nuplan_gdf["generic_drivable_areas"].geometry.to_list() + ) + road_edge_linear_rings = get_road_edge_linear_rings(drivable_polygons) + road_edges = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) + + for idx in range(len(road_edges)): + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline2D.from_linestring(road_edges[idx]), + ) + ) + + +def _write_nuplan_road_lines(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_writer: AbstractMapWriter) -> None: + boundaries = nuplan_gdf["boundaries"].geometry.to_list() + fids = nuplan_gdf["boundaries"].fid.to_list() + boundary_types = nuplan_gdf["boundaries"].boundary_type_fid.to_list() + + for idx in range(len(boundary_types)): + map_writer.write_road_line( + CacheRoadLine( + object_id=fids[idx], + road_line_type=NUPLAN_ROAD_LINE_CONVERSION[boundary_types[idx]], + polyline=Polyline2D.from_linestring(boundaries[idx]), + ) + ) + + +def _load_nuplan_gdf(map_file_path: Path) -> Dict[str, gpd.GeoDataFrame]: + + # The projected coordinate system depends on which UTM zone the mapped location is in. + map_meta = gpd.read_file(map_file_path, layer="meta", engine="pyogrio") + projection_system = map_meta[map_meta["key"] == "projectedCoordSystem"]["value"].iloc[0] + + nuplan_gdf: Dict[str, gpd.GeoDataFrame] = {} + for layer_name in NUPLAN_MAP_GPKG_LAYERS: + with warnings.catch_warnings(): + # Suppress the warnings from the GPKG operations below so that they don't spam the training logs. + warnings.filterwarnings("ignore") + + gdf_in_pixel_coords = pyogrio.read_dataframe(map_file_path, layer=layer_name, fid_as_index=True) + gdf_in_utm_coords = gdf_in_pixel_coords.to_crs(projection_system) + # gdf_in_utm_coords = gdf_in_pixel_coords + + # For backwards compatibility, cast the index to string datatype. + # and mirror it to the "fid" column. + gdf_in_utm_coords.index = gdf_in_utm_coords.index.map(str) + gdf_in_utm_coords["fid"] = gdf_in_utm_coords.index + + nuplan_gdf[layer_name] = gdf_in_utm_coords + + return nuplan_gdf + + +def _flip_linestring(linestring: LineString) -> LineString: + # TODO: move somewhere more appropriate or implement in Polyline2D, PolylineSE2, etc. + return LineString(linestring.coords[::-1]) + + +def lines_same_direction(centerline: LineString, boundary: LineString) -> bool: + # TODO: refactor helper function. + center_start = np.array(centerline.coords[0]) + center_end = np.array(centerline.coords[-1]) + boundary_start = np.array(boundary.coords[0]) + boundary_end = np.array(boundary.coords[-1]) + + # Distance from centerline start to boundary start + centerline end to boundary end + same_dir_dist = np.linalg.norm(center_start - boundary_start) + np.linalg.norm(center_end - boundary_end) + opposite_dir_dist = np.linalg.norm(center_start - boundary_end) + np.linalg.norm(center_end - boundary_start) + + return same_dir_dist <= opposite_dir_dist + + +def align_boundary_direction(centerline: LineString, boundary: LineString) -> LineString: + # TODO: refactor helper function. + if not lines_same_direction(centerline, boundary): + return _flip_linestring(boundary) + return boundary diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py new file mode 100644 index 00000000..8c2506f0 --- /dev/null +++ b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py @@ -0,0 +1,29 @@ +import io +from pathlib import Path +from typing import Dict + +import numpy as np + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.datasets.nuplan.utils.nuplan_constants import NUPLAN_LIDAR_DICT +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType + +check_dependencies(["nuplan"], "nuplan") +from nuplan.database.utils.pointclouds.lidar import LidarPointCloud + + +def load_nuplan_lidar_pcs_from_file(pcd_path: Path) -> Dict[LiDARType, np.ndarray]: + assert pcd_path.exists(), f"LiDAR file not found: {pcd_path}" + with open(pcd_path, "rb") as fp: + buffer = io.BytesIO(fp.read()) + + merged_lidar_pc = LidarPointCloud.from_buffer(buffer, "pcd").points + + lidar_pcs_dict: Dict[LiDARType, np.ndarray] = {} + for lidar_id, lidar_type in NUPLAN_LIDAR_DICT.items(): + mask = merged_lidar_pc[-1, :] == lidar_id + lidar_pc = merged_lidar_pc[: len(NuPlanLiDARIndex), mask].T.astype(np.float32) + lidar_pcs_dict[lidar_type] = lidar_pc + + return lidar_pcs_dict diff --git a/d123/common/geometry/line/__init__.py b/src/py123d/conversion/datasets/nuplan/utils/__init__.py similarity index 100% rename from d123/common/geometry/line/__init__.py rename to src/py123d/conversion/datasets/nuplan/utils/__init__.py diff --git a/d123/dataset/dataset_specific/nuplan/utils/log_splits.yaml b/src/py123d/conversion/datasets/nuplan/utils/log_splits.yaml similarity index 100% rename from d123/dataset/dataset_specific/nuplan/utils/log_splits.yaml rename to src/py123d/conversion/datasets/nuplan/utils/log_splits.yaml diff --git a/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py new file mode 100644 index 00000000..d904a698 --- /dev/null +++ b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py @@ -0,0 +1,88 @@ +from typing import Dict, Final, List, Set + +from py123d.conversion.registry.box_detection_label_registry import NuPlanBoxDetectionLabel +from py123d.datatypes.detections.traffic_light_detections import TrafficLightStatus +from py123d.datatypes.maps.map_datatypes import RoadLineType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.time.time_point import TimePoint + +NUPLAN_DEFAULT_DT: Final[float] = 0.05 + +NUPLAN_TRAFFIC_STATUS_DICT: Final[Dict[str, TrafficLightStatus]] = { + "green": TrafficLightStatus.GREEN, + "red": TrafficLightStatus.RED, + "unknown": TrafficLightStatus.UNKNOWN, +} + + +NUPLAN_DETECTION_NAME_DICT = { + "vehicle": NuPlanBoxDetectionLabel.VEHICLE, + "bicycle": NuPlanBoxDetectionLabel.BICYCLE, + "pedestrian": NuPlanBoxDetectionLabel.PEDESTRIAN, + "traffic_cone": NuPlanBoxDetectionLabel.TRAFFIC_CONE, + "barrier": NuPlanBoxDetectionLabel.BARRIER, + "czone_sign": NuPlanBoxDetectionLabel.CZONE_SIGN, + "generic_object": NuPlanBoxDetectionLabel.GENERIC_OBJECT, +} + +# https://github.com/motional/nuplan-devkit/blob/e9241677997dd86bfc0bcd44817ab04fe631405b/nuplan/database/nuplan_db_orm/utils.py#L1129-L1135 +NUPLAN_LIDAR_DICT = { + 0: LiDARType.LIDAR_TOP, + 1: LiDARType.LIDAR_SIDE_RIGHT, + 2: LiDARType.LIDAR_SIDE_LEFT, + 3: LiDARType.LIDAR_BACK, + 4: LiDARType.LIDAR_FRONT, +} + +NUPLAN_DATA_SPLITS: Set[str] = { + "nuplan_train", + "nuplan_val", + "nuplan_test", + "nuplan-mini_train", + "nuplan-mini_val", + "nuplan-mini_test", + "nuplan-private_test", # TODO: remove, not publicly available +} + +NUPLAN_MAP_LOCATIONS: List[str] = [ + "sg-one-north", + "us-ma-boston", + "us-nv-las-vegas-strip", + "us-pa-pittsburgh-hazelwood", +] + +NUPLAN_MAP_LOCATION_FILES: Dict[str, str] = { + "sg-one-north": "sg-one-north/9.17.1964/map.gpkg", + "us-ma-boston": "us-ma-boston/9.12.1817/map.gpkg", + "us-nv-las-vegas-strip": "us-nv-las-vegas-strip/9.15.1915/map.gpkg", + "us-pa-pittsburgh-hazelwood": "us-pa-pittsburgh-hazelwood/9.17.1937/map.gpkg", +} + + +NUPLAN_MAP_GPKG_LAYERS: Set[str] = { + "baseline_paths", + "carpark_areas", + "generic_drivable_areas", + "dubins_nodes", + "lane_connectors", + "intersections", + "boundaries", + "crosswalks", + "lanes_polygons", + "lane_group_connectors", + "lane_groups_polygons", + "road_segments", + "stop_polygons", + "traffic_lights", + "walkways", + "gen_lane_connectors_scaled_width_polygons", +} + +NUPLAN_ROAD_LINE_CONVERSION = { + 0: RoadLineType.DASHED_WHITE, + 2: RoadLineType.SOLID_WHITE, + 3: RoadLineType.UNKNOWN, +} + + +NUPLAN_ROLLING_SHUTTER_S: Final[TimePoint] = TimePoint.from_s(1 / 60) diff --git a/src/py123d/conversion/datasets/nuplan/utils/nuplan_sql_helper.py b/src/py123d/conversion/datasets/nuplan/utils/nuplan_sql_helper.py new file mode 100644 index 00000000..866246ba --- /dev/null +++ b/src/py123d/conversion/datasets/nuplan/utils/nuplan_sql_helper.py @@ -0,0 +1,128 @@ +from typing import List + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.datasets.nuplan.utils.nuplan_constants import NUPLAN_DETECTION_NAME_DICT +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3 +from py123d.geometry import BoundingBoxSE3, EulerAngles, StateSE3, Vector3D +from py123d.geometry.utils.constants import DEFAULT_PITCH, DEFAULT_ROLL + +check_dependencies(modules=["nuplan"], optional_name="nuplan") +from nuplan.database.nuplan_db.query_session import execute_many, execute_one + + +def get_box_detections_for_lidarpc_token_from_db(log_file: str, token: str) -> List[BoxDetectionSE3]: + + query = """ + SELECT c.name AS category_name, + lb.x, + lb.y, + lb.z, + lb.yaw, + lb.width, + lb.length, + lb.height, + lb.vx, + lb.vy, + lb.vz, + lb.token, + lb.track_token, + lp.timestamp + FROM lidar_box AS lb + INNER JOIN track AS t + ON t.token = lb.track_token + INNER JOIN category AS c + ON c.token = t.category_token + INNER JOIN lidar_pc AS lp + ON lp.token = lb.lidar_pc_token + WHERE lp.token = ? + """ + + box_detections: List[BoxDetectionSE3] = [] + + for row in execute_many(query, (bytearray.fromhex(token),), log_file): + quaternion = EulerAngles(roll=DEFAULT_ROLL, pitch=DEFAULT_PITCH, yaw=row["yaw"]).quaternion + bounding_box = BoundingBoxSE3( + center=StateSE3( + x=row["x"], + y=row["y"], + z=row["z"], + qw=quaternion.qw, + qx=quaternion.qx, + qy=quaternion.qy, + qz=quaternion.qz, + ), + length=row["length"], + width=row["width"], + height=row["height"], + ) + box_detection = BoxDetectionSE3( + metadata=BoxDetectionMetadata( + label=NUPLAN_DETECTION_NAME_DICT[row["category_name"]], + track_token=row["track_token"].hex(), + ), + bounding_box_se3=bounding_box, + velocity=Vector3D(x=row["vx"], y=row["vy"], z=row["vz"]), + ) + box_detections.append(box_detection) + + return box_detections + + +def get_ego_pose_for_timestamp_from_db(log_file: str, timestamp: int) -> StateSE3: + + query = """ + SELECT ep.x, + ep.y, + ep.z, + ep.qw, + ep.qx, + ep.qy, + ep.qz, + ep.timestamp, + ep.vx, + ep.vy, + ep.acceleration_x, + ep.acceleration_y + FROM ego_pose AS ep + ORDER BY ABS(ep.timestamp - ?) + LIMIT 1 + """ + + row = execute_one(query, (timestamp,), log_file) + if row is None: + return None + + return StateSE3(x=row["x"], y=row["y"], z=row["z"], qw=row["qw"], qx=row["qx"], qy=row["qy"], qz=row["qz"]) + + +def get_nearest_ego_pose_for_timestamp_from_db( + log_file: str, + timestamp: int, + tokens: List[str], + lookahead_window_us: int = 50000, + lookback_window_us: int = 50000, +) -> StateSE3: + + query = f""" + SELECT ep.x, + ep.y, + ep.z, + ep.qw, + ep.qx, + ep.qy, + ep.qz + FROM ego_pose AS ep + INNER JOIN lidar_pc AS lpc + ON ep.timestamp <= lpc.timestamp + ? + AND ep.timestamp >= lpc.timestamp - ? + WHERE lpc.token IN ({('?,'*len(tokens))[:-1]}) + ORDER BY ABS(ep.timestamp - ?) + LIMIT 1 + """ # noqa: E226 + + args = [lookahead_window_us, lookback_window_us] + args += [bytearray.fromhex(t) for t in tokens] + args += [timestamp] + + for row in execute_many(query, args, log_file): + return StateSE3(x=row["x"], y=row["y"], z=row["z"], qw=row["qw"], qx=row["qx"], qy=row["qy"], qz=row["qz"]) diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py new file mode 100644 index 00000000..7cc39d34 --- /dev/null +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py @@ -0,0 +1,449 @@ +import gc +from pathlib import Path +from typing import Any, Dict, List, Union + +import numpy as np +from pyquaternion import Quaternion + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.nuscenes.nuscenes_map_conversion import NUSCENES_MAPS, write_nuscenes_map +from py123d.conversion.datasets.nuscenes.utils.nuscenes_constants import ( + NUSCENES_CAMERA_TYPES, + NUSCENES_DATA_SPLITS, + NUSCENES_DETECTION_NAME_DICT, + NUSCENES_DT, +) +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import NuScenesBoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import get_nuscenes_renault_zoe_parameters +from py123d.geometry import BoundingBoxSE3, StateSE3 +from py123d.geometry.vector import Vector3D + +check_dependencies(["nuscenes"], "nuscenes") +from nuscenes import NuScenes +from nuscenes.can_bus.can_bus_api import NuScenesCanBus +from nuscenes.utils.data_classes import Box +from nuscenes.utils.splits import create_splits_scenes + + +class NuScenesConverter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + nuscenes_data_root: Union[Path, str], + nuscenes_map_root: Union[Path, str], + nuscenes_lanelet2_root: Union[Path, str], + use_lanelet2: bool, + dataset_converter_config: DatasetConverterConfig, + version: str = "v1.0-mini", + ) -> None: + super().__init__(dataset_converter_config) + + assert nuscenes_data_root is not None, "The variable `nuscenes_data_root` must be provided." + assert nuscenes_map_root is not None, "The variable `nuscenes_map_root` must be provided." + for split in splits: + assert ( + split in NUSCENES_DATA_SPLITS + ), f"Split {split} is not available. Available splits: {NUSCENES_DATA_SPLITS}" + + if dataset_converter_config.include_lidars: + assert dataset_converter_config.lidar_store_option in ["path", "binary"], ( + f"Invalid lidar_store_option: {dataset_converter_config.lidar_store_option}. " + f"Supported options are 'path' and 'binary'." + ) + + self._splits: List[str] = splits + + self._nuscenes_data_root: Path = Path(nuscenes_data_root) + self._nuscenes_map_root: Path = Path(nuscenes_map_root) + self._nuscenes_lanelet2_root: Path = Path(nuscenes_lanelet2_root) + + self._use_lanelet2 = use_lanelet2 + self._version = version + self._scene_tokens_per_split: Dict[str, List[str]] = self._collect_scene_tokens() + + def _collect_scene_tokens(self) -> Dict[str, List[str]]: + + scene_tokens_per_split: Dict[str, List[str]] = {} + nusc = NuScenes(version=self._version, dataroot=str(self._nuscenes_data_root), verbose=False) + + nuscenes_split_name_mapping = { + "nuscenes_train": "train", + "nuscenes_val": "val", + "nuscenes_test": "test", + "nuscenes-mini_train": "mini_train", + "nuscenes-mini_val": "mini_val", + } + + scene_splits = create_splits_scenes() + available_scenes = [scene for scene in nusc.scene] + + for split in self._splits: + nuscenes_split = nuscenes_split_name_mapping[split] + scene_names = scene_splits.get(nuscenes_split, []) + + # get token + scene_tokens = [scene["token"] for scene in available_scenes if scene["name"] in scene_names] + scene_tokens_per_split[split] = scene_tokens + + return scene_tokens_per_split + + def get_number_of_maps(self) -> int: + """Inherited, see superclass.""" + return len(NUSCENES_MAPS) + + def get_number_of_logs(self) -> int: + """Inherited, see superclass.""" + return sum(len(scene_tokens) for scene_tokens in self._scene_tokens_per_split.values()) + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """Inherited, see superclass.""" + map_name = NUSCENES_MAPS[map_index] + + map_metadata = _get_nuscenes_map_metadata(map_name) + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + + if map_needs_writing: + write_nuscenes_map( + nuscenes_maps_root=self._nuscenes_map_root, + location=map_name, + map_writer=map_writer, + use_lanelet2=self._use_lanelet2, + lanelet2_root=Path(self._nuscenes_lanelet2_root), + ) + + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """Inherited, see superclass.""" + # Find the scene token for the given log index + all_scene_tokens = [] + for split, scene_tokens in self._scene_tokens_per_split.items(): + all_scene_tokens.extend([(split, token) for token in scene_tokens]) + + if log_index >= len(all_scene_tokens): + raise ValueError(f"Log index {log_index} is out of range. Total logs: {len(all_scene_tokens)}") + + split, scene_token = all_scene_tokens[log_index] + + nusc = NuScenes(version=self._version, dataroot=str(self._nuscenes_data_root), verbose=False) + scene = nusc.get("scene", scene_token) + log_record = nusc.get("log", scene["log_token"]) + + # 1. Initialize log metadata + log_metadata = LogMetadata( + dataset="nuscenes", + split=split, + log_name=scene["name"], + location=log_record["location"], + timestep_seconds=NUSCENES_DT, + vehicle_parameters=get_nuscenes_renault_zoe_parameters(), + box_detection_label_class=NuScenesBoxDetectionLabel, + pinhole_camera_metadata=_get_nuscenes_pinhole_camera_metadata(nusc, scene, self.dataset_converter_config), + lidar_metadata=_get_nuscenes_lidar_metadata(nusc, scene, self.dataset_converter_config), + map_metadata=_get_nuscenes_map_metadata(log_record["location"]), + ) + + # 2. Prepare log writer + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + if log_needs_writing: + can_bus = NuScenesCanBus(dataroot=str(self._nuscenes_data_root)) + + step_interval = max(1, int(NUSCENES_DT / NUSCENES_DT)) + sample_count = 0 + + # Traverse all samples in the scene + sample_token = scene["first_sample_token"] + while sample_token: + if sample_count % step_interval == 0: + sample = nusc.get("sample", sample_token) + + log_writer.write( + timestamp=TimePoint.from_us(sample["timestamp"]), + ego_state=_extract_nuscenes_ego_state(nusc, sample, can_bus), + box_detections=_extract_nuscenes_box_detections(nusc, sample), + pinhole_cameras=_extract_nuscenes_cameras( + nusc=nusc, + sample=sample, + nuscenes_data_root=self._nuscenes_data_root, + dataset_converter_config=self.dataset_converter_config, + ), + lidars=_extract_nuscenes_lidars( + nusc=nusc, + sample=sample, + nuscenes_data_root=self._nuscenes_data_root, + dataset_converter_config=self.dataset_converter_config, + ), + ) + + sample_token = sample["next"] + sample_count += 1 + + log_writer.close() + del nusc + gc.collect() + + +def _get_nuscenes_pinhole_camera_metadata( + nusc: NuScenes, + scene: Dict[str, Any], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + + if dataset_converter_config.include_pinhole_cameras: + first_sample_token = scene["first_sample_token"] + first_sample = nusc.get("sample", first_sample_token) + + for camera_type, camera_channel in NUSCENES_CAMERA_TYPES.items(): + cam_token = first_sample["data"][camera_channel] + cam_data = nusc.get("sample_data", cam_token) + calib = nusc.get("calibrated_sensor", cam_data["calibrated_sensor_token"]) + + intrinsic_matrix = np.array(calib["camera_intrinsic"]) + intrinsic = PinholeIntrinsics.from_camera_matrix(intrinsic_matrix) + distortion = PinholeDistortion.from_array(np.zeros(5), copy=False) + + camera_metadata[camera_type] = PinholeCameraMetadata( + camera_type=camera_type, + width=cam_data["width"], + height=cam_data["height"], + intrinsics=intrinsic, + distortion=distortion, + ) + + return camera_metadata + + +def _get_nuscenes_lidar_metadata( + nusc: NuScenes, + scene: Dict[str, Any], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: + metadata: Dict[LiDARType, LiDARMetadata] = {} + + if dataset_converter_config.include_lidars: + first_sample_token = scene["first_sample_token"] + first_sample = nusc.get("sample", first_sample_token) + lidar_token = first_sample["data"]["LIDAR_TOP"] + lidar_data = nusc.get("sample_data", lidar_token) + calib = nusc.get("calibrated_sensor", lidar_data["calibrated_sensor_token"]) + + translation = np.array(calib["translation"]) + rotation = Quaternion(calib["rotation"]).rotation_matrix + extrinsic = np.eye(4) + extrinsic[:3, :3] = rotation + extrinsic[:3, 3] = translation + extrinsic = StateSE3.from_transformation_matrix(extrinsic) + + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=NuScenesLiDARIndex, + extrinsic=extrinsic, + ) + + return metadata + + +def _get_nuscenes_map_metadata(location): + return MapMetadata( + dataset="nuscenes", + split=None, + log_name=None, + location=location, + map_has_z=False, + map_is_local=False, + ) + + +def _extract_nuscenes_ego_state(nusc, sample, can_bus) -> EgoStateSE3: + lidar_data = nusc.get("sample_data", sample["data"]["LIDAR_TOP"]) + ego_pose = nusc.get("ego_pose", lidar_data["ego_pose_token"]) + + quat = Quaternion(ego_pose["rotation"]) + + vehicle_parameters = get_nuscenes_renault_zoe_parameters() + imu_pose = StateSE3( + x=ego_pose["translation"][0], + y=ego_pose["translation"][1], + z=ego_pose["translation"][2], + qw=quat.w, + qx=quat.x, + qy=quat.y, + qz=quat.z, + ) + + scene_name = nusc.get("scene", sample["scene_token"])["name"] + + try: + pose_msgs = can_bus.get_messages(scene_name, "pose") + except Exception: + pose_msgs = [] + + if pose_msgs: + closest_msg = None + min_time_diff = float("inf") + for msg in pose_msgs: + time_diff = abs(msg["utime"] - sample["timestamp"]) + if time_diff < min_time_diff: + min_time_diff = time_diff + closest_msg = msg + + if closest_msg and min_time_diff < 500000: + velocity = [*closest_msg["vel"]] + acceleration = [*closest_msg["accel"]] + angular_velocity = [*closest_msg["rotation_rate"]] + else: + velocity = acceleration = angular_velocity = [0.0, 0.0, 0.0] + else: + velocity = acceleration = angular_velocity = [0.0, 0.0, 0.0] + + dynamic_state = DynamicStateSE3( + velocity=Vector3D(*velocity), + acceleration=Vector3D(*acceleration), + angular_velocity=Vector3D(*angular_velocity), + ) + + # return EgoStateSE3( + # center_se3=pose, + # dynamic_state_se3=dynamic_state, + # vehicle_parameters=vehicle_parameters, + # timepoint=TimePoint.from_us(sample["timestamp"]), + # ) + return EgoStateSE3.from_rear_axle( + rear_axle_se3=imu_pose, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + time_point=TimePoint.from_us(sample["timestamp"]), + ) + + +def _extract_nuscenes_box_detections(nusc: NuScenes, sample: Dict[str, Any]) -> BoxDetectionWrapper: + box_detections: List[BoxDetectionSE3] = [] + + for ann_token in sample["anns"]: + ann = nusc.get("sample_annotation", ann_token) + box = Box(ann["translation"], ann["size"], Quaternion(ann["rotation"])) + + box_quat = box.orientation + euler_angles = box_quat.yaw_pitch_roll # (yaw, pitch, roll) + + # Create StateSE3 for box center and orientation + center_quat = box.orientation + center = StateSE3( + box.center[0], + box.center[1], + box.center[2], + center_quat.w, + center_quat.x, + center_quat.y, + center_quat.z, + ) + bounding_box = BoundingBoxSE3(center, box.wlh[1], box.wlh[0], box.wlh[2]) + # Get detection type + category = ann["category_name"] + label = NUSCENES_DETECTION_NAME_DICT[category] + + # Get velocity if available + velocity = nusc.box_velocity(ann_token) + velocity_3d = Vector3D(x=velocity[0], y=velocity[1], z=velocity[2] if len(velocity) > 2 else 0.0) + + metadata = BoxDetectionMetadata( + label=label, + track_token=ann["instance_token"], + timepoint=TimePoint.from_us(sample["timestamp"]), + confidence=1.0, # nuScenes annotations are ground truth + num_lidar_points=ann.get("num_lidar_pts", 0), + ) + + box_detection = BoxDetectionSE3( + metadata=metadata, + bounding_box_se3=bounding_box, + velocity=velocity_3d, + ) + box_detections.append(box_detection) + + return BoxDetectionWrapper(box_detections=box_detections) + + +def _extract_nuscenes_cameras( + nusc: NuScenes, + sample: Dict[str, Any], + nuscenes_data_root: Path, + dataset_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + camera_data_list: List[CameraData] = [] + + if dataset_converter_config.include_pinhole_cameras: + for camera_type, camera_channel in NUSCENES_CAMERA_TYPES.items(): + cam_token = sample["data"][camera_channel] + cam_data = nusc.get("sample_data", cam_token) + + # Check timestamp synchronization (within 100ms) + if abs(cam_data["timestamp"] - sample["timestamp"]) > 100000: + continue + + calib = nusc.get("calibrated_sensor", cam_data["calibrated_sensor_token"]) + + translation = np.array(calib["translation"]) + rotation = Quaternion(calib["rotation"]).rotation_matrix + extrinsic_matrix = np.eye(4) + extrinsic_matrix[:3, :3] = rotation + extrinsic_matrix[:3, 3] = translation + extrinsic = StateSE3.from_transformation_matrix(extrinsic_matrix) + + cam_path = nuscenes_data_root / str(cam_data["filename"]) + + if cam_path.exists() and cam_path.is_file(): + # camera_dict[camera_type] = (camera_data, extrinsic) + camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=extrinsic, + relative_path=cam_path.relative_to(nuscenes_data_root), + dataset_root=nuscenes_data_root, + ) + ) + + return camera_data_list + + +def _extract_nuscenes_lidars( + nusc: NuScenes, + sample: Dict[str, Any], + nuscenes_data_root: Path, + dataset_converter_config: DatasetConverterConfig, +) -> List[LiDARData]: + lidars: List[LiDARData] = [] + + if dataset_converter_config.include_lidars: + lidar_token = sample["data"]["LIDAR_TOP"] + lidar_data = nusc.get("sample_data", lidar_token) + absolute_lidar_path = nuscenes_data_root / lidar_data["filename"] + + if absolute_lidar_path.exists() and absolute_lidar_path.is_file(): + lidar = LiDARData( + lidar_type=LiDARType.LIDAR_TOP, + relative_path=absolute_lidar_path.relative_to(nuscenes_data_root), + dataset_root=nuscenes_data_root, + iteration=lidar_data.get("iteration"), + ) + lidars.append(lidar) + return lidars diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_map_conversion.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_map_conversion.py new file mode 100644 index 00000000..39ae313a --- /dev/null +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_map_conversion.py @@ -0,0 +1,498 @@ +from collections import defaultdict +from pathlib import Path +from typing import Dict, Final, List, Optional + +import numpy as np +from shapely.geometry import LineString, Polygon + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.datasets.nuscenes.utils.nuscenes_constants import NUSCENES_MAPS +from py123d.conversion.datasets.nuscenes.utils.nuscenes_map_utils import ( + extract_lane_and_boundaries, + extract_nuscenes_centerline, + order_lanes_left_to_right, +) +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, + split_polygon_by_grid, +) +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheCrosswalk, + CacheGenericDrivable, + CacheIntersection, + CacheLane, + CacheLaneGroup, + CacheRoadEdge, + CacheRoadLine, + CacheWalkway, +) +from py123d.datatypes.maps.map_datatypes import RoadEdgeType, RoadLineType +from py123d.geometry import OccupancyMap2D, Polyline2D, Polyline3D +from py123d.geometry.utils.polyline_utils import offset_points_perpendicular + +check_dependencies(["nuscenes"], optional_name="nuscenes") +from nuscenes.map_expansion.map_api import NuScenesMap + +MAX_ROAD_EDGE_LENGTH: Final[float] = 100.0 # [m] +MAX_LANE_WIDTH: Final[float] = 4.0 # [m] +MIN_LANE_WIDTH: Final[float] = 1.0 # [m] + + +def write_nuscenes_map( + nuscenes_maps_root: Path, + location: str, + map_writer: AbstractMapWriter, + use_lanelet2: bool, + lanelet2_root: Optional[str] = None, +) -> None: + """Converts the nuScenes map types to the 123D format, and sends elements to the map writer. + FIXME @DanielDauner: Currently, Lanelet2 format is not supported for nuScenes. + + :param nuscenes_maps_root: Path to the nuScenes maps root directory + :param location: Name of the specific map location to convert + :param map_writer: Map writer instance to write the converted elements + :param use_lanelet2: Flag indicating whether to use Lanelet2 format + :param lanelet2_root: Path to the Lanelet2 root directory, defaults to None + """ + + assert location in NUSCENES_MAPS, f"Map name {location} is not supported." + nuscenes_map = NuScenesMap(dataroot=str(nuscenes_maps_root), map_name=location) + + # 1. extract road edges (used later to determine lane connector widths) + road_edges = _extract_nuscenes_road_edges(nuscenes_map) + + # 2. extract lanes + lanes = _extract_nuscenes_lanes(nuscenes_map) + + # 3. extract lane connectors (i.e. lanes on intersections) + lane_connectors = _extract_nuscenes_lane_connectors(nuscenes_map, road_edges) + + # 4. extract intersections (and store lane-connector to intersection assignment for lane groups) + intersection_assignment = _write_nuscenes_intersections(nuscenes_map, lane_connectors, map_writer) + + # 5. extract lane groups + lane_groups = _extract_nuscenes_lane_groups(nuscenes_map, lanes, lane_connectors, intersection_assignment) + + # Write remaining map elements + _write_nuscenes_crosswalks(nuscenes_map, map_writer) + _write_nuscenes_walkways(nuscenes_map, map_writer) + _write_nuscenes_carparks(nuscenes_map, map_writer) + _write_nuscenes_generic_drivables(nuscenes_map, map_writer) + _write_nuscenes_stop_lines(nuscenes_map, map_writer) + _write_nuscenes_road_lines(nuscenes_map, map_writer) + + for lane in lanes + lane_connectors: + map_writer.write_lane(lane) + + for road_edge in road_edges: + map_writer.write_road_edge(road_edge) + + for lane_group in lane_groups: + map_writer.write_lane_group(lane_group) + + +def _extract_nuscenes_lanes(nuscenes_map: NuScenesMap) -> List[CacheLane]: + """Helper function to extract lanes from a nuScenes map.""" + + # NOTE: nuScenes does not provide explicitly provide lane groups and does not assign lanes to roadblocks. + # Therefore, we query the roadblocks given the middle-point of the centerline to assign lanes to a road block. + # Unlike road segments, road blocks outline a lane group going in the same direction. + # In case a roadblock cannot be assigned, e.g. because the lane is not located within any roadblock, or the + # roadblock data is invalid [1], we assign a new lane group with only this lane. + # [1] https://github.com/nutonomy/nuscenes-devkit/issues/862 + + road_blocks_invalid = nuscenes_map.map_name in ["singapore-queenstown", "singapore-hollandvillage"] + + road_block_dict: Dict[str, Polygon] = {} + if not road_blocks_invalid: + road_block_dict: Dict[str, Polygon] = { + road_block["token"]: nuscenes_map.extract_polygon(road_block["polygon_token"]) + for road_block in nuscenes_map.road_block + } + + road_block_map = OccupancyMap2D.from_dict(road_block_dict) + lanes: List[CacheLane] = [] + for lane_record in nuscenes_map.lane: + token = lane_record["token"] + + # 1. Extract centerline and boundaries + centerline, left_boundary, right_boundary = extract_lane_and_boundaries(nuscenes_map, lane_record) + + if left_boundary is None or right_boundary is None: + continue # skip lanes without valid boundaries + + # 2. Query road block for lane group assignment + lane_group_id: str = token # default to self, override if road block found + if not road_blocks_invalid: + query_point = centerline.interpolate(0.5, normalized=True).shapely_point + intersecting_roadblock = road_block_map.query_nearest(query_point, max_distance=0.1, all_matches=False) + + # NOTE: if a lane cannot be assigned to a road block, we assume a new lane group with only this lane. + # The lane group id is set to be the same as the lane id in this case. + if len(intersecting_roadblock) > 0: + lane_group_id = road_block_map.ids[intersecting_roadblock[0]] + + # Get topology + incoming = nuscenes_map.get_incoming_lane_ids(token) + outgoing = nuscenes_map.get_outgoing_lane_ids(token) + + lanes.append( + CacheLane( + object_id=token, + lane_group_id=lane_group_id, + left_boundary=left_boundary, + right_boundary=right_boundary, + centerline=centerline, + left_lane_id=None, + right_lane_id=None, + predecessor_ids=incoming, + successor_ids=outgoing, + speed_limit_mps=None, + outline=None, + geometry=None, + ) + ) + + return lanes + + +def _extract_nuscenes_lane_connectors(nuscenes_map: NuScenesMap, road_edges: List[CacheRoadEdge]) -> List[CacheLane]: + """Helper function to extract lane connectors from a nuScenes map.""" + + # TODO @DanielDauner: consider using connected lanes to estimate the lane width + + road_edge_map = OccupancyMap2D(geometries=[road_edge.shapely_linestring for road_edge in road_edges]) + + lane_connectors: List[CacheLane] = [] + for lane_record in nuscenes_map.lane_connector: + lane_connector_token: str = lane_record["token"] + + centerline = extract_nuscenes_centerline(nuscenes_map, lane_record) + + _, nearest_edge_distances = road_edge_map.query_nearest( + centerline.linestring, return_distance=True, all_matches=False + ) + road_edge_distance = nearest_edge_distances[0] if nearest_edge_distances else float("inf") + + lane_half_width = np.clip(road_edge_distance, MIN_LANE_WIDTH / 2.0, MAX_LANE_WIDTH / 2.0) + + left_pts = offset_points_perpendicular(centerline.array, offset=lane_half_width) + right_pts = offset_points_perpendicular(centerline.array, offset=-lane_half_width) + + predecessor_ids = nuscenes_map.get_incoming_lane_ids(lane_connector_token) + successor_ids = nuscenes_map.get_outgoing_lane_ids(lane_connector_token) + + lane_group_id = lane_connector_token + + lane_connectors.append( + CacheLane( + object_id=lane_connector_token, + lane_group_id=lane_group_id, + left_boundary=Polyline2D.from_array(left_pts), + right_boundary=Polyline2D.from_array(right_pts), + centerline=centerline, + left_lane_id=None, # Not directly available in nuscenes + right_lane_id=None, # Not directly available in nuscenes + predecessor_ids=predecessor_ids, + successor_ids=successor_ids, + speed_limit_mps=None, # Default value + outline=None, + geometry=None, + ) + ) + + return lane_connectors + + +def _extract_nuscenes_lane_groups( + nuscenes_map: NuScenesMap, + lanes: List[CacheLane], + lane_connectors: List[CacheLane], + intersection_assignment: Dict[str, int], +) -> List[CacheLaneGroup]: + """Helper function to extract lane groups from a nuScenes map.""" + + lane_groups = [] + lanes_dict = {lane.object_id: lane for lane in lanes + lane_connectors} + + # 1. Gather all lane group ids that were previously assigned in the lanes (either roadblocks of lane themselves) + lane_group_lane_dict: Dict[str, List[str]] = defaultdict(list) + for lane in lanes + lane_connectors: + lane_group_lane_dict[lane.lane_group_id].append(lane.object_id) + + for lane_group_id, lane_ids in lane_group_lane_dict.items(): + + if len(lane_ids) > 1: + lane_centerlines: List[Polyline2D] = [lanes_dict[lane_id].centerline for lane_id in lane_ids] + ordered_lane_indices = order_lanes_left_to_right(lane_centerlines) + left_boundary = lanes_dict[lane_ids[ordered_lane_indices[0]]].left_boundary + right_boundary = lanes_dict[lane_ids[ordered_lane_indices[-1]]].right_boundary + + else: + lane_id = lane_ids[0] + lane = lanes_dict[lane_id] + left_boundary = lane.left_boundary + right_boundary = lane.right_boundary + + # 2. For each lane group, gather predecessor and successor lane groups + predecessor_ids = set() + successor_ids = set() + for lane_id in lane_ids: + lane = lanes_dict[lane_id] + if lane is None: + continue + for pred_id in lane.predecessor_ids: + pred_lane = lanes_dict.get(pred_id) + if pred_lane is not None: + predecessor_ids.add(pred_lane.lane_group_id) + for succ_id in lane.successor_ids: + succ_lane = lanes_dict.get(succ_id) + if succ_lane is not None: + successor_ids.add(succ_lane.lane_group_id) + + intersection_ids = set( + [int(intersection_assignment[lane_id]) for lane_id in lane_ids if lane_id in intersection_assignment] + ) + assert len(intersection_ids) <= 1, "A lane group cannot belong to multiple intersections." + intersection_id = None if len(intersection_ids) == 0 else intersection_ids.pop() + + lane_groups.append( + CacheLaneGroup( + object_id=lane_group_id, + lane_ids=lane_ids, + left_boundary=left_boundary, + right_boundary=right_boundary, + intersection_id=intersection_id, + predecessor_ids=list(predecessor_ids), + successor_ids=list(successor_ids), + outline=None, + geometry=None, + ) + ) + + return lane_groups + + +def _write_nuscenes_intersections( + nuscenes_map: NuScenesMap, lane_connectors: List[CacheLane], map_writer: AbstractMapWriter +) -> None: + """Write intersection data to map_writer and return lane-connector to intersection assignment.""" + + intersection_assignment = {} + + # 1. Extract intersections and corresponding polygons + intersection_polygons = [] + for road_segment in nuscenes_map.road_segment: + if road_segment["is_intersection"]: + if "polygon_token" in road_segment: + polygon = nuscenes_map.extract_polygon(road_segment["polygon_token"]) + intersection_polygons.append(polygon) + + # 2. Find lane connectors within each intersection polygon + lane_connector_center_point_dict = { + lane_connector.object_id: lane_connector.centerline.interpolate(0.5, normalized=True).shapely_point + for lane_connector in lane_connectors + } + centerpoint_map = OccupancyMap2D.from_dict(lane_connector_center_point_dict) + for idx, intersection_polygon in enumerate(intersection_polygons): + intersecting_lane_connector_ids = centerpoint_map.intersects(intersection_polygon) + for lane_connector_id in intersecting_lane_connector_ids: + intersection_assignment[lane_connector_id] = idx + + map_writer.write_intersection( + CacheIntersection( + object_id=idx, + lane_group_ids=intersecting_lane_connector_ids, + outline=None, + geometry=intersection_polygon, + ) + ) + + return intersection_assignment + + +def _write_nuscenes_crosswalks(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write crosswalk data to map_writer.""" + + crosswalk_polygons = [] + for crossing in nuscenes_map.ped_crossing: + if "polygon_token" in crossing: + polygon = nuscenes_map.extract_polygon(crossing["polygon_token"]) + crosswalk_polygons.append(polygon) + + for idx, polygon in enumerate(crosswalk_polygons): + map_writer.write_crosswalk( + CacheCrosswalk( + object_id=idx, + geometry=polygon, + ) + ) + + +def _write_nuscenes_walkways(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write walkway data to map_writer.""" + walkway_polygons = [] + for walkway_record in nuscenes_map.walkway: + if "polygon_token" in walkway_record: + polygon = nuscenes_map.extract_polygon(walkway_record["polygon_token"]) + walkway_polygons.append(polygon) + + for idx, polygon in enumerate(walkway_polygons): + map_writer.write_walkway( + CacheWalkway( + object_id=idx, + geometry=polygon, + ) + ) + + +def _write_nuscenes_carparks(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write carpark data to map_writer.""" + carpark_polygons = [] + for carpark_record in nuscenes_map.carpark_area: + if "polygon_token" in carpark_record: + polygon = nuscenes_map.extract_polygon(carpark_record["polygon_token"]) + carpark_polygons.append(polygon) + + for idx, polygon in enumerate(carpark_polygons): + map_writer.write_carpark( + CacheCarpark( + object_id=idx, + geometry=polygon, + ) + ) + + +def _write_nuscenes_generic_drivables(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write generic drivable area data to map_writer.""" + cell_size = 10.0 + drivable_polygons = [] + for drivable_area_record in nuscenes_map.drivable_area: + drivable_area = nuscenes_map.get("drivable_area", drivable_area_record["token"]) + for polygon_token in drivable_area["polygon_tokens"]: + polygon = nuscenes_map.extract_polygon(polygon_token) + + split_polygons = split_polygon_by_grid(polygon, cell_size=cell_size) + drivable_polygons.extend(split_polygons) + # drivable_polygons.append(polygon) + + for idx, geometry in enumerate(drivable_polygons): + map_writer.write_generic_drivable(CacheGenericDrivable(object_id=idx, geometry=geometry)) + + +def _write_nuscenes_stop_lines(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write stop line data to map_writer.""" + # FIXME: Add stop lines. + # stop_lines = nuscenes_map.stop_line + # for stop_line in stop_lines: + # token = stop_line["token"] + # try: + # if "polygon_token" in stop_line: + # polygon = nuscenes_map.extract_polygon(stop_line["polygon_token"]) + # else: + # continue + # if not polygon.is_valid: + # continue + # except Exception: + # traceback.print_exc() + # continue + + # # Note: Stop lines are written as generic drivable for compatibility + # map_writer.write_generic_drivable( + # CacheGenericDrivable( + # object_id=token, + # geometry=polygon, + # ) + # ) + + +def _write_nuscenes_road_lines(nuscenes_map: NuScenesMap, map_writer: AbstractMapWriter) -> None: + """Write road line data (dividers) to map_writer.""" + # Process road dividers + road_dividers = nuscenes_map.road_divider + running_idx = 0 + for divider in road_dividers: + line = nuscenes_map.extract_line(divider["line_token"]) + + # Determine line type + line_type = _get_road_line_type(divider["line_token"], nuscenes_map) + + map_writer.write_road_line( + CacheRoadLine( + object_id=running_idx, + road_line_type=line_type, + polyline=Polyline3D(LineString(line.coords)), + ) + ) + running_idx += 1 + + # Process lane dividers + lane_dividers = nuscenes_map.lane_divider + for divider in lane_dividers: + line = nuscenes_map.extract_line(divider["line_token"]) + line_type = _get_road_line_type(divider["line_token"], nuscenes_map) + + map_writer.write_road_line( + CacheRoadLine( + object_id=running_idx, + road_line_type=line_type, + polyline=Polyline3D(LineString(line.coords)), + ) + ) + running_idx += 1 + + +def _extract_nuscenes_road_edges(nuscenes_map: NuScenesMap) -> List[CacheRoadEdge]: + """Helper function to extract road edges from a nuScenes map.""" + drivable_polygons = [] + for drivable_area_record in nuscenes_map.drivable_area: + drivable_area = nuscenes_map.get("drivable_area", drivable_area_record["token"]) + for polygon_token in drivable_area["polygon_tokens"]: + polygon = nuscenes_map.extract_polygon(polygon_token) + drivable_polygons.append(polygon) + + road_edge_linear_rings = get_road_edge_linear_rings(drivable_polygons) + road_edges_linestrings = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) + + road_edges_cache: List[CacheRoadEdge] = [] + for idx in range(len(road_edges_linestrings)): + road_edges_cache.append( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline2D.from_linestring(road_edges_linestrings[idx]), + ) + ) + + return road_edges_cache + + +def _get_road_line_type(line_token: str, nuscenes_map: NuScenesMap) -> RoadLineType: + """Map nuscenes line type to RoadLineType.""" + + # FIXME @DanielDauner: Store token to type mapping. Creating mapping for every call is not ideal. + nuscenes_to_road_line_type = { + "SINGLE_SOLID_WHITE": RoadLineType.SOLID_WHITE, + "DOUBLE_DASHED_WHITE": RoadLineType.DOUBLE_DASH_WHITE, + "SINGLE_SOLID_YELLOW": RoadLineType.SOLID_YELLOW, + } + + line_token_to_type = {} + for lane_record in nuscenes_map.lane: + for seg in lane_record.get("left_lane_divider_segments", []): + token = seg.get("line_token") + seg_type = seg.get("segment_type") + if token and seg_type: + line_token_to_type[token] = seg_type + + for seg in lane_record.get("right_lane_divider_segments", []): + token = seg.get("line_token") + seg_type = seg.get("segment_type") + if token and seg_type: + line_token_to_type[token] = seg_type + + nuscenes_type = line_token_to_type.get(line_token, "UNKNOWN") + return nuscenes_to_road_line_type.get(nuscenes_type, RoadLineType.UNKNOWN) diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py new file mode 100644 index 00000000..e09caae6 --- /dev/null +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py @@ -0,0 +1,23 @@ +from pathlib import Path +from typing import Dict + +import numpy as np + +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.geometry.se import StateSE3 +from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins + + +def load_nuscenes_lidar_pcs_from_file(pcd_path: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: + lidar_pc = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, len(NuScenesLiDARIndex)) + + # convert lidar to ego frame + lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic + lidar_pc[..., NuScenesLiDARIndex.XYZ] = convert_points_3d_array_between_origins( + from_origin=lidar_extrinsic, + to_origin=StateSE3(0, 0, 0, 1.0, 0, 0, 0), + points_3d_array=lidar_pc[..., NuScenesLiDARIndex.XYZ], + ) + return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py new file mode 100644 index 00000000..5cafb870 --- /dev/null +++ b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py @@ -0,0 +1,58 @@ +from typing import Final, List + +from py123d.conversion.registry.box_detection_label_registry import NuScenesBoxDetectionLabel +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType + +NUSCENES_MAPS: List[str] = ["boston-seaport", "singapore-hollandvillage", "singapore-onenorth", "singapore-queenstown"] + +NUSCENES_DATA_SPLITS: Final[List[str]] = [ + "nuscenes_train", + "nuscenes_val", + "nuscenes_test", + "nuscenes-mini_train", + "nuscenes-mini_val", +] + +TARGET_DT: Final[float] = 0.1 +NUSCENES_DT: Final[float] = 0.5 +SORT_BY_TIMESTAMP: Final[bool] = True +NUSCENES_DETECTION_NAME_DICT = { + # Vehicles (4+ wheels) + "vehicle.car": NuScenesBoxDetectionLabel.VEHICLE_CAR, + "vehicle.truck": NuScenesBoxDetectionLabel.VEHICLE_TRUCK, + "vehicle.bus.bendy": NuScenesBoxDetectionLabel.VEHICLE_BUS_BENDY, + "vehicle.bus.rigid": NuScenesBoxDetectionLabel.VEHICLE_BUS_RIGID, + "vehicle.construction": NuScenesBoxDetectionLabel.VEHICLE_CONSTRUCTION, + "vehicle.emergency.ambulance": NuScenesBoxDetectionLabel.VEHICLE_EMERGENCY_AMBULANCE, + "vehicle.emergency.police": NuScenesBoxDetectionLabel.VEHICLE_EMERGENCY_POLICE, + "vehicle.trailer": NuScenesBoxDetectionLabel.VEHICLE_TRAILER, + # Bicycles / Motorcycles + "vehicle.bicycle": NuScenesBoxDetectionLabel.VEHICLE_BICYCLE, + "vehicle.motorcycle": NuScenesBoxDetectionLabel.VEHICLE_MOTORCYCLE, + # Pedestrians (all subtypes) + "human.pedestrian.adult": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_ADULT, + "human.pedestrian.child": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_CHILD, + "human.pedestrian.construction_worker": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_CONSTRUCTION_WORKER, + "human.pedestrian.personal_mobility": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_PERSONAL_MOBILITY, + "human.pedestrian.police_officer": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_POLICE_OFFICER, + "human.pedestrian.stroller": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_STROLLER, + "human.pedestrian.wheelchair": NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_WHEELCHAIR, + # Traffic cone / barrier + "movable_object.trafficcone": NuScenesBoxDetectionLabel.MOVABLE_OBJECT_TRAFFICCONE, + "movable_object.barrier": NuScenesBoxDetectionLabel.MOVABLE_OBJECT_BARRIER, + # Generic objects + "movable_object.pushable_pullable": NuScenesBoxDetectionLabel.MOVABLE_OBJECT_PUSHABLE_PULLABLE, + "movable_object.debris": NuScenesBoxDetectionLabel.MOVABLE_OBJECT_DEBRIS, + "static_object.bicycle_rack": NuScenesBoxDetectionLabel.STATIC_OBJECT_BICYCLE_RACK, + "animal": NuScenesBoxDetectionLabel.ANIMAL, +} + + +NUSCENES_CAMERA_TYPES = { + PinholeCameraType.PCAM_F0: "CAM_FRONT", + PinholeCameraType.PCAM_B0: "CAM_BACK", + PinholeCameraType.PCAM_L0: "CAM_FRONT_LEFT", + PinholeCameraType.PCAM_L1: "CAM_BACK_LEFT", + PinholeCameraType.PCAM_R0: "CAM_FRONT_RIGHT", + PinholeCameraType.PCAM_R1: "CAM_BACK_RIGHT", +} diff --git a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_map_utils.py b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_map_utils.py new file mode 100644 index 00000000..a512dc18 --- /dev/null +++ b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_map_utils.py @@ -0,0 +1,197 @@ +from typing import Dict, List, Optional, Tuple + +import numpy as np +from scipy.spatial.distance import cdist + +from py123d.common.utils.dependencies import check_dependencies +from py123d.geometry.polyline import Polyline2D + +check_dependencies(["nuscenes"], optional_name="nuscenes") +from nuscenes.map_expansion import arcline_path_utils +from nuscenes.map_expansion.map_api import NuScenesMap + + +def extract_lane_and_boundaries( + nuscenes_map: NuScenesMap, lane_record: Dict +) -> Tuple[Polyline2D, Optional[Polyline2D], Optional[Polyline2D]]: + """Extracts the centerline and left / right boundary from a nusScenes lane. + + :param nuscenes_map: nuScenes map API instance + :param lane_record: lane record dictionary + :return: centerline, left boundary (optional), right boundary (optional) + """ + + # NOTE @DanielDauner: Code adapted from trajdata, Apache 2.0 License. Thank you! + # https://github.com/NVlabs/trajdata/blob/main/src/trajdata/dataset_specific/nusc/nusc_utils.py#L281 + + # Getting the bounding polygon vertices. + lane_polygon_obj = nuscenes_map.get("polygon", lane_record["polygon_token"]) + polygon_nodes = [nuscenes_map.get("node", node_token) for node_token in lane_polygon_obj["exterior_node_tokens"]] + polygon_outline: np.ndarray = np.array([(node["x"], node["y"]) for node in polygon_nodes]) + + # Getting the lane center's points. + centerline = extract_nuscenes_centerline(nuscenes_map, lane_record) + centerline_array = centerline.array + + # Computing the closest lane center point to each bounding polygon vertex. + closest_midlane_pt: np.ndarray = np.argmin(cdist(polygon_outline, centerline_array), axis=1) + # Computing the local direction of the lane at each lane center point. + direction_vectors: np.ndarray = np.diff( + centerline_array, + axis=0, + prepend=centerline_array[[0]] - (centerline_array[[1]] - centerline_array[[0]]), + ) + + # Selecting the direction vectors at the closest lane center point per polygon vertex. + local_dir_vecs: np.ndarray = direction_vectors[closest_midlane_pt] + # Calculating the vectors from the the closest lane center point per polygon vertex to the polygon vertex. + origin_to_polygon_vecs: np.ndarray = polygon_outline - centerline_array[closest_midlane_pt] + + # Computing the perpendicular dot product. + # See https://www.xarg.org/book/linear-algebra/2d-perp-product/ + # If perp_dot_product < 0, then the associated polygon vertex is + # on the right edge of the lane. + perp_dot_product: np.ndarray = ( + local_dir_vecs[:, 0] * origin_to_polygon_vecs[:, 1] - local_dir_vecs[:, 1] * origin_to_polygon_vecs[:, 0] + ) + + # Determining which indices are on the right of the lane center. + on_right: np.ndarray = perp_dot_product < 0 + # Determining the boundary between the left/right polygon vertices + # (they will be together in blocks due to the ordering of the polygon vertices). + idx_changes: int = np.where(np.roll(on_right, 1) < on_right)[0].item() + + if idx_changes > 0: + # If the block of left/right points spreads across the bounds of the array, + # roll it until the boundary between left/right points is at index 0. + # This is important so that the following index selection orders points + # without jumps. + polygon_outline = np.roll(polygon_outline, shift=-idx_changes, axis=0) + on_right = np.roll(on_right, shift=-idx_changes) + + left_polyline_array: np.ndarray = polygon_outline[~on_right] + right_polyline_array: np.ndarray = polygon_outline[on_right] + + # Final ordering check, ensuring that left_pts and right_pts can be combined + # into a polygon without the endpoints intersecting. + # Reversing the one lane edge that does not match the ordering of the midline. + if endpoints_intersect(left_polyline_array, right_polyline_array): + if not order_matches(left_polyline_array, centerline_array): + left_polyline_array = left_polyline_array[::-1] + else: + right_polyline_array = right_polyline_array[::-1] + + left_boundary = Polyline2D.from_array(left_polyline_array) if len(left_polyline_array) > 1 else None + right_boundary = Polyline2D.from_array(right_polyline_array) if len(right_polyline_array) > 1 else None + return centerline, left_boundary, right_boundary + + +def extract_nuscenes_centerline(nuscenes_map: NuScenesMap, lane_record: Dict) -> Polyline2D: + """Extract the centerline of a nuScenes lane. + + :param nuscenes_map: nuScenes map API instance + :param lane_record: lane record dictionary + :return: centerline 2D polyline + """ + + # NOTE @DanielDauner: Code adapted from trajdata, Apache 2.0 License. Thank you! + # https://github.com/NVlabs/trajdata/blob/main/src/trajdata/dataset_specific/nusc/nusc_utils.py#L262 + + # Getting the lane center's points. + curr_lane = nuscenes_map.arcline_path_3.get(lane_record["token"], []) + centerline_array: np.ndarray = np.array(arcline_path_utils.discretize_lane(curr_lane, resolution_meters=0.25))[ + :, :2 + ] + + # For some reason, nuScenes duplicates a few entries + # (likely how they're building their arcline representation). + # We delete those duplicate entries here. + duplicate_check: np.ndarray = np.where( + np.linalg.norm(np.diff(centerline_array, axis=0, prepend=0), axis=1) < 1e-10 + )[0] + if duplicate_check.size > 0: + centerline_array = np.delete(centerline_array, duplicate_check, axis=0) + + return Polyline2D.from_array(centerline_array) + + +def endpoints_intersect(left_edge: np.ndarray, right_edge: np.ndarray) -> bool: + """Check if the line segment connecting the endpoints of left_edge intersects + with the line segment connecting the endpoints of right_edge. + + Forms two segments: (left_edge[-1], left_edge[0]) and (right_edge[-1], right_edge[0]), + then tests if they intersect using the counter-clockwise (CCW) orientation test. + """ + + # NOTE @DanielDauner: Code adapted from trajdata, Apache 2.0 License. Thank you! + # https://github.com/NVlabs/trajdata/blob/main/src/trajdata/utils/map_utils.py#L177 + + def ccw(A, B, C): + return (C[1] - A[1]) * (B[0] - A[0]) > (B[1] - A[1]) * (C[0] - A[0]) + + A, B = left_edge[-1], right_edge[-1] + C, D = right_edge[0], left_edge[0] + return ccw(A, C, D) != ccw(B, C, D) and ccw(A, B, C) != ccw(A, B, D) + + +def order_matches(pts: np.ndarray, ref: np.ndarray) -> bool: + """Check if two polylines have the same ordering direction, by comparing + the distance of their start and end points to the start point of the reference polyline. + """ + + # NOTE @DanielDauner: Code adapted from trajdata, Apache 2.0 License. Thank you! + # https://github.com/NVlabs/trajdata/blob/main/src/trajdata/utils/map_utils.py#L162 + + return np.linalg.norm(pts[0] - ref[0]) <= np.linalg.norm(pts[-1] - ref[0]) + + +def order_lanes_left_to_right(polylines: List[Polyline2D]) -> List[int]: + """ + Order lanes from left to right based on their position. + + :param polylines: List of polylines representing lanes + :return: List of indices representing the order of lanes from left to right + """ + if len(polylines) == 0: + return [] + + # Step 1: Compute the average direction vector across all lanes + all_directions = [] + for polyline in polylines: + + polyline_array = polyline.array + if len(polyline_array) < 2: + continue + start = np.array(polyline_array[0]) + end = np.array(polyline_array[-1]) + direction = end - start + all_directions.append(direction) + + avg_direction = np.mean(all_directions, axis=0) + avg_direction = avg_direction / np.linalg.norm(avg_direction) + + # Step 2: Compute perpendicular vector (left direction) + # Rotate 90 degrees counter-clockwise: (x, y) -> (-y, x) + left_vector = np.array([-avg_direction[1], avg_direction[0]]) + + # Step 3: For each lane, use midpoint of start and end, project onto left vector + lane_positions = [] + for i, polyline in enumerate(polylines): + if len(polyline) == 0: + lane_positions.append((i, 0)) + continue + + start = np.array(polyline[0]) + end = np.array(polyline[-1]) + # Use midpoint of start and end + midpoint = (start + end) / 2 + + # Project midpoint onto the left vector + position = np.dot(midpoint, left_vector) + lane_positions.append((i, position)) + + # Step 4: Sort by position (higher values are more to the left) + lane_positions.sort(key=lambda x: x[1], reverse=True) + + # Return ordered indices + return [idx for idx, _ in lane_positions] diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py new file mode 100644 index 00000000..008ab4e0 --- /dev/null +++ b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py @@ -0,0 +1,383 @@ +from pathlib import Path +from typing import Dict, List, Tuple, Union + +import numpy as np +import pandas as pd + +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.pandaset.utils.pandaset_constants import ( + PANDASET_BOX_DETECTION_FROM_STR, + PANDASET_CAMERA_DISTORTIONS, + PANDASET_CAMERA_MAPPING, + PANDASET_LIDAR_EXTRINSICS, + PANDASET_LIDAR_MAPPING, + PANDASET_LOG_NAMES, + PANDASET_SPLITS, +) +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( + main_lidar_to_rear_axle, + pandaset_pose_dict_to_state_se3, + read_json, + read_pkl_gz, + rotate_pandaset_pose_to_iso_coordinates, +) +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import PandasetBoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraMetadata, PinholeCameraType, PinholeIntrinsics +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_pandaset_chrysler_pacifica_parameters, + rear_axle_se3_to_center_se3, +) +from py123d.geometry import BoundingBoxSE3, BoundingBoxSE3Index, EulerAnglesIndex, StateSE3, Vector3D +from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_se3_array +from py123d.geometry.utils.constants import DEFAULT_PITCH, DEFAULT_ROLL +from py123d.geometry.utils.rotation_utils import get_quaternion_array_from_euler_array + + +class PandasetConverter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + pandaset_data_root: Union[Path, str], + dataset_converter_config: DatasetConverterConfig, + train_log_names: List[str], + val_log_names: List[str], + test_log_names: List[str], + ) -> None: + super().__init__(dataset_converter_config) + for split in splits: + assert split in PANDASET_SPLITS, f"Split {split} is not available. Available splits: {PANDASET_SPLITS}" + assert pandaset_data_root is not None, "The variable `pandaset_data_root` must be provided." + + self._splits: List[str] = splits + self._pandaset_data_root: Path = Path(pandaset_data_root) + + self._train_log_names: List[str] = train_log_names + self._val_log_names: List[str] = val_log_names + self._test_log_names: List[str] = test_log_names + self._log_paths_and_split: Dict[str, List[Path]] = self._collect_log_paths() + + def _collect_log_paths(self) -> Dict[str, List[Path]]: + log_paths_and_split: List[Tuple[Path, str]] = [] + + for log_folder in self._pandaset_data_root.iterdir(): + if not log_folder.is_dir(): + continue + + log_name = log_folder.name + assert log_name in PANDASET_LOG_NAMES, f"Log name {log_name} is not recognized." + if (log_name in self._train_log_names) and ("pandaset_train" in self._splits): + log_paths_and_split.append((log_folder, "pandaset_train")) + elif (log_name in self._val_log_names) and ("pandaset_val" in self._splits): + log_paths_and_split.append((log_folder, "pandaset_val")) + elif (log_name in self._test_log_names) and ("pandaset_test" in self._splits): + log_paths_and_split.append((log_folder, "pandaset_test")) + + return log_paths_and_split + + def get_number_of_maps(self) -> int: + """Inherited, see superclass.""" + return 0 # NOTE: Pandaset does not have maps. + + def get_number_of_logs(self) -> int: + """Inherited, see superclass.""" + return len(self._log_paths_and_split) + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """Inherited, see superclass.""" + return None # NOTE: Pandaset does not have maps. + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """Inherited, see superclass.""" + + source_log_path, split = self._log_paths_and_split[log_index] + + # 1. Initialize Metadata + log_metadata = LogMetadata( + dataset="pandaset", + split=split, + log_name=source_log_path.name, + location=None, # TODO: Add location information. + timestep_seconds=0.1, + vehicle_parameters=get_pandaset_chrysler_pacifica_parameters(), + box_detection_label_class=PandasetBoxDetectionLabel, + pinhole_camera_metadata=_get_pandaset_camera_metadata(source_log_path, self.dataset_converter_config), + lidar_metadata=_get_pandaset_lidar_metadata(source_log_path, self.dataset_converter_config), + map_metadata=None, # NOTE: Pandaset does not have maps. + ) + + # 2. Prepare log writer + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + # 3. Process source log data + if log_needs_writing: + + # Read files from pandaset + timesteps = read_json(source_log_path / "meta" / "timestamps.json") + gps: List[Dict[str, float]] = read_json(source_log_path / "meta" / "gps.json") + lidar_poses: List[Dict[str, Dict[str, float]]] = read_json(source_log_path / "lidar" / "poses.json") + camera_poses: Dict[str, List[Dict[str, Dict[str, float]]]] = { + camera_name: read_json(source_log_path / "camera" / camera_name / "poses.json") + for camera_name in PANDASET_CAMERA_MAPPING.keys() + } + + # Write data to log writer + for iteration, timestep_s in enumerate(timesteps): + + ego_state = _extract_pandaset_sensor_ego_state(gps[iteration], lidar_poses[iteration]) + log_writer.write( + timestamp=TimePoint.from_s(timestep_s), + ego_state=ego_state, + box_detections=_extract_pandaset_box_detections(source_log_path, iteration, ego_state), + pinhole_cameras=_extract_pandaset_sensor_camera( + source_log_path, + iteration, + ego_state, + camera_poses, + self.dataset_converter_config, + ), + lidars=_extract_pandaset_lidar( + source_log_path, + iteration, + ego_state, + self.dataset_converter_config, + ), + ) + + # 4. Finalize log writing + log_writer.close() + + +def _get_pandaset_camera_metadata( + source_log_path: Path, dataset_config: DatasetConverterConfig +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + + if dataset_config.include_pinhole_cameras: + all_cameras_folder = source_log_path / "camera" + for camera_folder in all_cameras_folder.iterdir(): + camera_name = camera_folder.name + + assert camera_name in PANDASET_CAMERA_MAPPING.keys(), f"Camera name {camera_name} is not recognized." + camera_type = PANDASET_CAMERA_MAPPING[camera_name] + + intrinsics_file = camera_folder / "intrinsics.json" + assert intrinsics_file.exists(), f"Camera intrinsics file {intrinsics_file} does not exist." + intrinsics_data = read_json(intrinsics_file) + + camera_metadata[camera_type] = PinholeCameraMetadata( + camera_type=camera_type, + width=1920, + height=1080, + intrinsics=PinholeIntrinsics( + fx=intrinsics_data["fx"], + fy=intrinsics_data["fy"], + cx=intrinsics_data["cx"], + cy=intrinsics_data["cy"], + ), + distortion=PANDASET_CAMERA_DISTORTIONS[camera_name], + ) + + return camera_metadata + + +def _get_pandaset_lidar_metadata( + log_path: Path, dataset_config: DatasetConverterConfig +) -> Dict[LiDARType, LiDARMetadata]: + lidar_metadata: Dict[LiDARType, LiDARMetadata] = {} + + if dataset_config.include_lidars: + for lidar_name, lidar_type in PANDASET_LIDAR_MAPPING.items(): + lidar_metadata[lidar_type] = LiDARMetadata( + lidar_type=lidar_type, + lidar_index=PandasetLiDARIndex, + extrinsic=PANDASET_LIDAR_EXTRINSICS[ + lidar_name + ], # TODO: These extrinsics are incorrect, and need to be transformed correctly. + ) + + return lidar_metadata + + +def _extract_pandaset_sensor_ego_state(gps: Dict[str, float], lidar_pose: Dict[str, Dict[str, float]]) -> EgoStateSE3: + + rear_axle_pose = main_lidar_to_rear_axle(pandaset_pose_dict_to_state_se3(lidar_pose)) + + vehicle_parameters = get_pandaset_chrysler_pacifica_parameters() + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + + # TODO: Add script to calculate the dynamic state from log sequence. + dynamic_state = DynamicStateSE3( + # velocity=Vector3D(x=gps["xvel"], y=gps["yvel"], z=0.0), + velocity=Vector3D(x=0.0, y=0.0, z=0.0), + acceleration=Vector3D(x=0.0, y=0.0, z=0.0), + angular_velocity=Vector3D(x=0.0, y=0.0, z=0.0), + ) + + return EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, + ) + + +def _extract_pandaset_box_detections( + source_log_path: Path, iteration: int, ego_state_se3: EgoStateSE3 +) -> BoxDetectionWrapper: + + # NOTE: The following provided quboids annotations are not stored in 123D + # - stationary + # - camera_used + # - attributes.object_motion + # - cuboids.sibling_id + # - cuboids.sensor_id + # - attributes.pedestrian_behavior + # - attributes.pedestrian_age + # - attributes.rider_status + # https://github.com/scaleapi/pandaset-devkit/blob/master/README.md?plain=1#L288 + + iteration_str = f"{iteration:02d}" + cuboids_file = source_log_path / "annotations" / "cuboids" / f"{iteration_str}.pkl.gz" + + if not cuboids_file.exists(): + return BoxDetectionWrapper(box_detections=[]) + + cuboid_df: pd.DataFrame = read_pkl_gz(cuboids_file) + + # Read cuboid data + box_label_names = list(cuboid_df["label"]) + box_uuids = list(cuboid_df["uuid"]) + num_boxes = len(box_uuids) + + box_position_x = np.array(cuboid_df["position.x"], dtype=np.float64) + box_position_y = np.array(cuboid_df["position.y"], dtype=np.float64) + box_position_z = np.array(cuboid_df["position.z"], dtype=np.float64) + box_points = np.stack([box_position_x, box_position_y, box_position_z], axis=-1) + box_yaws = np.array(cuboid_df["yaw"], dtype=np.float64) + + # NOTE: Rather strange format to have dimensions.x as width, dimensions.y as length + box_widths = np.array(cuboid_df["dimensions.x"], dtype=np.float64) + box_lengths = np.array(cuboid_df["dimensions.y"], dtype=np.float64) + box_heights = np.array(cuboid_df["dimensions.z"], dtype=np.float64) + + # Create se3 array for boxes (i.e. convert rotation to quaternion) + box_euler_angles_array = np.zeros((num_boxes, len(EulerAnglesIndex)), dtype=np.float64) + box_euler_angles_array[..., EulerAnglesIndex.ROLL] = DEFAULT_ROLL + box_euler_angles_array[..., EulerAnglesIndex.PITCH] = DEFAULT_PITCH + box_euler_angles_array[..., EulerAnglesIndex.YAW] = box_yaws + + box_se3_array = np.zeros((num_boxes, len(BoundingBoxSE3Index)), dtype=np.float64) + box_se3_array[:, BoundingBoxSE3Index.XYZ] = box_points + box_se3_array[:, BoundingBoxSE3Index.QUATERNION] = get_quaternion_array_from_euler_array(box_euler_angles_array) + box_se3_array[:, BoundingBoxSE3Index.EXTENT] = np.stack([box_lengths, box_widths, box_heights], axis=-1) + + # NOTE: Pandaset annotates moving bounding boxes twice (for synchronization reasons), + # if they are in the overlap area between the top 360° lidar and the front-facing lidar (and moving). + # The value in `cuboids.sensor_id` is either + # - `0` (mechanical 360° LiDAR) + # - `1` (front-facing LiDAR). + # - All other cuboids have value `-1`. + + # To avoid duplicate bounding boxes, we only keep boxes from the front lidar (sensor_id == 1), if they do not + # have a sibling box in the top lidar (sensor_id == 0). Otherwise, all boxes with sensor_id == {-1, 0} are kept. + # https://github.com/scaleapi/pandaset-devkit/blob/master/python/pandaset/annotations.py#L166 + # https://github.com/scaleapi/pandaset-devkit/issues/26 + + top_lidar_uuids = set(cuboid_df[cuboid_df["cuboids.sensor_id"] == 0]["uuid"]) + sensor_ids = cuboid_df["cuboids.sensor_id"].to_list() + sibling_ids = cuboid_df["cuboids.sibling_id"].to_list() + + # Fill bounding box detections and return + box_detections: List[BoxDetectionSE3] = [] + for box_idx in range(num_boxes): + + # Skip duplicate box detections from front lidar if sibling exists in top lidar + if sensor_ids[box_idx] == 1 and sibling_ids[box_idx] in top_lidar_uuids: + continue + + pandaset_box_detection_label = PANDASET_BOX_DETECTION_FROM_STR[box_label_names[box_idx]] + + # Convert coordinates to ISO 8855 + # NOTE: This would be faster over a batch operation. + box_se3_array[box_idx, BoundingBoxSE3Index.STATE_SE3] = rotate_pandaset_pose_to_iso_coordinates( + StateSE3.from_array(box_se3_array[box_idx, BoundingBoxSE3Index.STATE_SE3], copy=False) + ).array + + box_detection_se3 = BoxDetectionSE3( + metadata=BoxDetectionMetadata( + label=pandaset_box_detection_label, + track_token=box_uuids[box_idx], + ), + bounding_box_se3=BoundingBoxSE3.from_array(box_se3_array[box_idx]), + velocity=Vector3D(0.0, 0.0, 0.0), # TODO: Add velocity + ) + box_detections.append(box_detection_se3) + + return BoxDetectionWrapper(box_detections=box_detections) + + +def _extract_pandaset_sensor_camera( + source_log_path: Path, + iteration: int, + ego_state_se3: EgoStateSE3, + camera_poses: Dict[str, List[Dict[str, Dict[str, float]]]], + dataset_converter_config: DatasetConverterConfig, +) -> List[CameraData]: + camera_data_list: List[CameraData] = [] + iteration_str = f"{iteration:02d}" + + if dataset_converter_config.include_pinhole_cameras: + + for camera_name, camera_type in PANDASET_CAMERA_MAPPING.items(): + + image_abs_path = source_log_path / f"camera/{camera_name}/{iteration_str}.jpg" + assert image_abs_path.exists(), f"Camera image file {str(image_abs_path)} does not exist." + + camera_pose_dict = camera_poses[camera_name][iteration] + camera_extrinsic = pandaset_pose_dict_to_state_se3(camera_pose_dict) + + camera_extrinsic = StateSE3.from_array( + convert_absolute_to_relative_se3_array(ego_state_se3.rear_axle_se3, camera_extrinsic.array), copy=True + ) + camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=camera_extrinsic, + dataset_root=source_log_path.parent, + relative_path=image_abs_path.relative_to(source_log_path.parent), + ) + ) + + return camera_data_list + + +def _extract_pandaset_lidar( + source_log_path: Path, iteration: int, ego_state_se3: EgoStateSE3, dataset_converter_config: DatasetConverterConfig +) -> List[LiDARData]: + + lidars: List[LiDARData] = [] + if dataset_converter_config.include_lidars: + iteration_str = f"{iteration:02d}" + lidar_absolute_path = source_log_path / "lidar" / f"{iteration_str}.pkl.gz" + assert lidar_absolute_path.exists(), f"LiDAR file {str(lidar_absolute_path)} does not exist." + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_MERGED, + timestamp=None, + iteration=iteration, + dataset_root=source_log_path.parent, + relative_path=str(lidar_absolute_path.relative_to(source_log_path.parent)), + ) + ) + + return lidars diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py new file mode 100644 index 00000000..14f1f236 --- /dev/null +++ b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py @@ -0,0 +1,54 @@ +from pathlib import Path +from typing import Dict, Optional, Union + +import numpy as np +import pandas as pd + +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( + main_lidar_to_rear_axle, + pandaset_pose_dict_to_state_se3, + read_json, + read_pkl_gz, +) +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_points_3d_array + + +def load_pandaset_global_lidar_pc_from_path(pkl_gz_path: Union[Path, str]) -> Dict[LiDARType, np.ndarray]: + # NOTE: The Pandaset dataset stores both front and top LiDAR data in the same gzip-pickle file. + # We need to separate them based on the laser_number field. + # See here: https://github.com/scaleapi/pandaset-devkit/blob/master/python/pandaset/sensors.py#L160 + + all_lidar_df = read_pkl_gz(pkl_gz_path) + top_lidar_df: pd.DataFrame = all_lidar_df[all_lidar_df["d"] == 0] + front_lidar_df: pd.DataFrame = all_lidar_df[all_lidar_df["d"] == 1] + + # Remove the 't' (timestamp) and 'd' (laser id) columns + top_lidar_df = top_lidar_df.drop(columns=["t", "d"]) + front_lidar_df = front_lidar_df.drop(columns=["t", "d"]) + + return {LiDARType.LIDAR_TOP: top_lidar_df.to_numpy(), LiDARType.LIDAR_FRONT: front_lidar_df.to_numpy()} + + +def load_pandaset_lidars_pcs_from_file( + pkl_gz_path: Union[Path, str], + iteration: Optional[int], +) -> np.ndarray: + + pkl_gz_path = Path(pkl_gz_path) + assert pkl_gz_path.exists(), f"Pandaset LiDAR file not found: {pkl_gz_path}" + + lidar_pc_dict = load_pandaset_global_lidar_pc_from_path(pkl_gz_path) + + ego_pose = main_lidar_to_rear_axle( + pandaset_pose_dict_to_state_se3(read_json(pkl_gz_path.parent / "poses.json")[iteration]) + ) + + for lidar_type in lidar_pc_dict.keys(): + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ] = convert_absolute_to_relative_points_3d_array( + ego_pose, + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ], + ) + + return lidar_pc_dict diff --git a/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py new file mode 100644 index 00000000..8428aadd --- /dev/null +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py @@ -0,0 +1,250 @@ +from typing import Dict, List + +from py123d.conversion.registry.box_detection_label_registry import PandasetBoxDetectionLabel +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType, PinholeDistortion, PinholeIntrinsics +from py123d.geometry import StateSE3 + +PANDASET_SPLITS: List[str] = ["pandaset_train", "pandaset_val", "pandaset_test"] + +PANDASET_CAMERA_MAPPING: Dict[str, PinholeCameraType] = { + "front_camera": PinholeCameraType.PCAM_F0, + "back_camera": PinholeCameraType.PCAM_B0, + "front_left_camera": PinholeCameraType.PCAM_L0, + "front_right_camera": PinholeCameraType.PCAM_R0, + "left_camera": PinholeCameraType.PCAM_L1, + "right_camera": PinholeCameraType.PCAM_R1, +} + +PANDASET_LIDAR_MAPPING: Dict[str, LiDARType] = {"main_pandar64": LiDARType.LIDAR_TOP, "front_gt": LiDARType.LIDAR_FRONT} + + +PANDASET_BOX_DETECTION_FROM_STR: Dict[str, PandasetBoxDetectionLabel] = { + "Animals - Bird": PandasetBoxDetectionLabel.ANIMALS_BIRD, + "Animals - Other": PandasetBoxDetectionLabel.ANIMALS_OTHER, + "Bicycle": PandasetBoxDetectionLabel.BICYCLE, + "Bus": PandasetBoxDetectionLabel.BUS, + "Car": PandasetBoxDetectionLabel.CAR, + "Cones": PandasetBoxDetectionLabel.CONES, + "Construction Signs": PandasetBoxDetectionLabel.CONSTRUCTION_SIGNS, + "Emergency Vehicle": PandasetBoxDetectionLabel.EMERGENCY_VEHICLE, + "Medium-sized Truck": PandasetBoxDetectionLabel.MEDIUM_SIZED_TRUCK, + "Motorcycle": PandasetBoxDetectionLabel.MOTORCYCLE, + "Motorized Scooter": PandasetBoxDetectionLabel.MOTORIZED_SCOOTER, + "Other Vehicle - Construction Vehicle": PandasetBoxDetectionLabel.OTHER_VEHICLE_CONSTRUCTION_VEHICLE, + "Other Vehicle - Pedicab": PandasetBoxDetectionLabel.OTHER_VEHICLE_PEDICAB, + "Other Vehicle - Uncommon": PandasetBoxDetectionLabel.OTHER_VEHICLE_UNCOMMON, + "Pedestrian": PandasetBoxDetectionLabel.PEDESTRIAN, + "Pedestrian with Object": PandasetBoxDetectionLabel.PEDESTRIAN_WITH_OBJECT, + "Personal Mobility Device": PandasetBoxDetectionLabel.PERSONAL_MOBILITY_DEVICE, + "Pickup Truck": PandasetBoxDetectionLabel.PICKUP_TRUCK, + "Pylons": PandasetBoxDetectionLabel.PYLONS, + "Road Barriers": PandasetBoxDetectionLabel.ROAD_BARRIERS, + "Rolling Containers": PandasetBoxDetectionLabel.ROLLING_CONTAINERS, + "Semi-truck": PandasetBoxDetectionLabel.SEMI_TRUCK, + "Signs": PandasetBoxDetectionLabel.SIGNS, + "Temporary Construction Barriers": PandasetBoxDetectionLabel.TEMPORARY_CONSTRUCTION_BARRIERS, + "Towed Object": PandasetBoxDetectionLabel.TOWED_OBJECT, + "Train": PandasetBoxDetectionLabel.TRAIN, + "Tram / Subway": PandasetBoxDetectionLabel.TRAM_SUBWAY, +} + + +# https://github.com/scaleapi/pandaset-devkit/blob/master/docs/static_extrinsic_calibration.yaml +PANDASET_LIDAR_EXTRINSICS: Dict[str, StateSE3] = { + "front_gt": StateSE3( + x=-0.000451117754, + y=-0.605646431446, + z=-0.301525235176, + qw=0.021475754959146356, + qx=-0.002060907279494794, + qy=0.01134678181520767, + qz=0.9997028534282365, + ), + "main_pandar64": StateSE3(x=0.0, y=0.0, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0), +} + +# https://github.com/scaleapi/pandaset-devkit/blob/master/docs/static_extrinsic_calibration.yaml +PANDASET_CAMERA_EXTRINSICS: Dict[str, StateSE3] = { + "back_camera": StateSE3( + x=-0.0004217634029916384, + y=-0.21683144949675118, + z=-1.0553445472201475, + qw=0.713789231075861, + qx=0.7003585531940812, + qy=-0.001595758695393934, + qz=-0.0005330311533742299, + ), + "front_camera": StateSE3( + x=0.0002585796504896516, + y=-0.03907777167811011, + z=-0.0440125762408362, + qw=0.016213200031258722, + qx=0.0030578899383849464, + qy=0.7114721800418571, + qz=-0.7025205466606356, + ), + "front_left_camera": StateSE3( + x=-0.25842240863267835, + y=-0.3070654284505582, + z=-0.9244245686318884, + qw=0.33540022607039827, + qx=0.3277491469609924, + qy=-0.6283486651480494, + qz=0.6206973014480826, + ), + "front_right_camera": StateSE3( + x=0.2546935700219631, + y=-0.24929449717803095, + z=-0.8686597280810242, + qw=0.3537633879725252, + qx=0.34931795852655334, + qy=0.6120314641083645, + qz=-0.6150170047424814, + ), + "left_camera": StateSE3( + x=0.23864835336611942, + y=-0.2801448284013492, + z=-0.5376795959387791, + qw=0.5050391917998245, + qx=0.49253073152800625, + qy=-0.4989265501075421, + qz=0.503409565706149, + ), + "right_camera": StateSE3( + x=-0.23097163411257893, + y=-0.30843497058841024, + z=-0.6850441215571058, + qw=0.5087448402081216, + qx=0.4947520981649951, + qy=0.4977829953071897, + qz=-0.49860920419297333, + ), +} + +# https://github.com/scaleapi/pandaset-devkit/blob/master/docs/static_extrinsic_calibration.yaml +PANDASET_CAMERA_INTRINSICS: Dict[str, PinholeIntrinsics] = { + "back_camera": PinholeIntrinsics(fx=933.4667, fy=934.6754, cx=896.4692, cy=507.3557), + "front_camera": PinholeIntrinsics(fx=1970.0131, fy=1970.0091, cx=970.0002, cy=483.2988), + "front_left_camera": PinholeIntrinsics(fx=929.8429, fy=930.0592, cx=972.1794, cy=508.0057), + "front_right_camera": PinholeIntrinsics(fx=930.0407, fy=930.0324, cx=965.0525, cy=463.4161), + "left_camera": PinholeIntrinsics(fx=930.4514, fy=930.0891, cx=991.6883, cy=541.6057), + "right_camera": PinholeIntrinsics(fx=922.5465, fy=922.4229, cx=945.057, cy=517.575), +} + +# https://github.com/scaleapi/pandaset-devkit/blob/master/docs/static_extrinsic_calibration.yaml +PANDASET_CAMERA_DISTORTIONS: Dict[str, PinholeDistortion] = { + "back_camera": PinholeDistortion.from_list([-0.1619, 0.0113, -0.00028815, -7.9827e-05, 0.0067]), + "front_camera": PinholeDistortion.from_list([-0.5894, 0.66, 0.0011, -0.001, -1.0088]), + "front_left_camera": PinholeDistortion.from_list([-0.165, 0.0099, -0.00075376, 5.3699e-05, 0.01]), + "front_right_camera": PinholeDistortion.from_list([-0.1614, -0.0027, -0.00029662, -0.00028927, 0.0181]), + "left_camera": PinholeDistortion.from_list([-0.1582, -0.0266, -0.00015221, 0.00059011, 0.0449]), + "right_camera": PinholeDistortion.from_list([-0.1648, 0.0191, 0.0027, -8.5282e-07, -9.6983e-05]), +} + + +PANDASET_LOG_NAMES: List[str] = [ + "001", + "002", + "003", + "004", + "005", + "006", + "008", + "011", + "012", + "013", + "014", + "015", + "016", + "017", + "018", + "019", + "020", + "021", + "023", + "024", + "027", + "028", + "029", + "030", + "032", + "033", + "034", + "035", + "037", + "038", + "039", + "040", + "041", + "042", + "043", + "044", + "045", + "046", + "047", + "048", + "050", + "051", + "052", + "053", + "054", + "055", + "056", + "057", + "058", + "059", + "062", + "063", + "064", + "065", + "066", + "067", + "068", + "069", + "070", + "071", + "072", + "073", + "074", + "077", + "078", + "079", + "080", + "084", + "085", + "086", + "088", + "089", + "090", + "091", + "092", + "093", + "094", + "095", + "097", + "098", + "099", + "100", + "101", + "102", + "103", + "104", + "105", + "106", + "109", + "110", + "112", + "113", + "115", + "116", + "117", + "119", + "120", + "122", + "123", + "124", + "139", + "149", + "158", +] diff --git a/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py new file mode 100644 index 00000000..68575e7e --- /dev/null +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py @@ -0,0 +1,84 @@ +import gzip +import json +import pickle +from pathlib import Path +from typing import Dict + +import numpy as np + +from py123d.geometry import StateSE3, Vector3D +from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame + + +def read_json(json_file: Path): + """Helper function to read a json file as dict.""" + with open(json_file, "r") as f: + json_data = json.load(f) + return json_data + + +def read_pkl_gz(pkl_gz_file: Path): + """Helper function to read a pkl.gz file as dict.""" + with gzip.open(pkl_gz_file, "rb") as f: + pkl_data = pickle.load(f) + return pkl_data + + +def pandaset_pose_dict_to_state_se3(pose_dict: Dict[str, Dict[str, float]]) -> StateSE3: + """Helper function for pandaset to convert a pose dict to StateSE3. + + :param pose_dict: The input pose dict. + :return: The converted StateSE3. + """ + return StateSE3( + x=pose_dict["position"]["x"], + y=pose_dict["position"]["y"], + z=pose_dict["position"]["z"], + qw=pose_dict["heading"]["w"], + qx=pose_dict["heading"]["x"], + qy=pose_dict["heading"]["y"], + qz=pose_dict["heading"]["z"], + ) + + +def rotate_pandaset_pose_to_iso_coordinates(pose: StateSE3) -> StateSE3: + """Helper function for pandaset to rotate a pose to ISO coordinate system (x: forward, y: left, z: up). + + NOTE: Pandaset uses a different coordinate system (x: right, y: forward, z: up). + [1] https://arxiv.org/pdf/2112.12610.pdf + + :param pose: The input pose. + :return: The rotated pose. + """ + F = np.array( + [ + [0.0, 1.0, 0.0], # new X = old Y (forward) + [-1.0, 0.0, 0.0], # new Y = old -X (left) + [0.0, 0.0, 1.0], # new Z = old Z (up) + ], + dtype=np.float64, + ).T + transformation_matrix = pose.transformation_matrix.copy() + transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F + + return StateSE3.from_transformation_matrix(transformation_matrix) + + +def main_lidar_to_rear_axle(pose: StateSE3) -> StateSE3: + + F = np.array( + [ + [0.0, 1.0, 0.0], # new X = old Y (forward) + [-1.0, 0.0, 0.0], # new Y = old X (left) + [0.0, 0.0, 1.0], # new Z = old Z (up) + ], + dtype=np.float64, + ).T + transformation_matrix = pose.transformation_matrix.copy() + transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F + + rotated_pose = StateSE3.from_transformation_matrix(transformation_matrix) + + imu_pose = translate_se3_along_body_frame(rotated_pose, vector_3d=Vector3D(x=-0.840, y=0.0, z=0.0)) + + return imu_pose diff --git a/d123/common/geometry/transform/__init__.py b/src/py123d/conversion/datasets/wopd/__init__.py similarity index 100% rename from d123/common/geometry/transform/__init__.py rename to src/py123d/conversion/datasets/wopd/__init__.py diff --git a/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py new file mode 100644 index 00000000..efa577d6 --- /dev/null +++ b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py @@ -0,0 +1,68 @@ +from typing import Dict, List + +from py123d.conversion.registry.box_detection_label_registry import WOPDBoxDetectionLabel +from py123d.datatypes.maps.map_datatypes import LaneType, RoadEdgeType, RoadLineType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType + +WOPD_AVAILABLE_SPLITS: List[str] = [ + "wopd_train", + "wopd_val", + "wopd_test", +] + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/label.proto#L63 +WOPD_DETECTION_NAME_DICT: Dict[int, WOPDBoxDetectionLabel] = { + 0: WOPDBoxDetectionLabel.TYPE_UNKNOWN, + 1: WOPDBoxDetectionLabel.TYPE_VEHICLE, + 2: WOPDBoxDetectionLabel.TYPE_PEDESTRIAN, + 3: WOPDBoxDetectionLabel.TYPE_SIGN, + 4: WOPDBoxDetectionLabel.TYPE_CYCLIST, +} + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L50 +WOPD_CAMERA_TYPES: Dict[int, PinholeCameraType] = { + 1: PinholeCameraType.PCAM_F0, # front_camera + 2: PinholeCameraType.PCAM_L0, # front_left_camera + 3: PinholeCameraType.PCAM_R0, # front_right_camera + 4: PinholeCameraType.PCAM_L1, # left_camera + 5: PinholeCameraType.PCAM_R1, # right_camera +} + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L66 +WOPD_LIDAR_TYPES: Dict[int, LiDARType] = { + 0: LiDARType.LIDAR_UNKNOWN, # UNKNOWN + 1: LiDARType.LIDAR_TOP, # TOP + 2: LiDARType.LIDAR_FRONT, # FRONT + 3: LiDARType.LIDAR_SIDE_LEFT, # SIDE_LEFT + 4: LiDARType.LIDAR_SIDE_RIGHT, # SIDE_RIGHT + 5: LiDARType.LIDAR_BACK, # REAR +} + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L206 +WAYMO_ROAD_LINE_TYPE_CONVERSION: Dict[int, RoadLineType] = { + 0: RoadLineType.UNKNOWN, # UNKNOWN + 1: RoadLineType.DASHED_WHITE, # BROKEN_SINGLE_WHITE + 2: RoadLineType.SOLID_WHITE, # SOLID_SINGLE_WHITE + 3: RoadLineType.DOUBLE_SOLID_WHITE, # SOLID_DOUBLE_WHITE + 4: RoadLineType.DASHED_YELLOW, # BROKEN_SINGLE_YELLOW + 5: RoadLineType.DOUBLE_DASH_YELLOW, # BROKEN_DOUBLE_YELLOW + 6: RoadLineType.SOLID_YELLOW, # SOLID_SINGLE_YELLOW + 7: RoadLineType.DOUBLE_SOLID_YELLOW, # SOLID_DOUBLE_YELLOW + 8: RoadLineType.DOUBLE_DASH_YELLOW, # PASSING_DOUBLE_YELLOW +} + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L186 +WAYMO_ROAD_EDGE_TYPE_CONVERSION: Dict[int, RoadEdgeType] = { + 0: RoadEdgeType.UNKNOWN, + 1: RoadEdgeType.ROAD_EDGE_BOUNDARY, + 2: RoadEdgeType.ROAD_EDGE_MEDIAN, +} + +# https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L147 +WAYMO_LANE_TYPE_CONVERSION: Dict[int, LaneType] = { + 0: LaneType.UNDEFINED, + 1: LaneType.FREEWAY, + 2: LaneType.SURFACE_STREET, + 3: LaneType.BIKE_LANE, +} diff --git a/d123/dataset/dataset_specific/wopd/wopd_utils.py b/src/py123d/conversion/datasets/wopd/utils/wopd_utils.py similarity index 96% rename from d123/dataset/dataset_specific/wopd/wopd_utils.py rename to src/py123d/conversion/datasets/wopd/utils/wopd_utils.py index c9c302ef..e81597bf 100644 --- a/d123/dataset/dataset_specific/wopd/wopd_utils.py +++ b/src/py123d/conversion/datasets/wopd/utils/wopd_utils.py @@ -1,5 +1,8 @@ from typing import Dict, List, Tuple +from py123d.common.utils.dependencies import check_dependencies + +check_dependencies(modules=["tensorflow", "waymo_open_dataset"], optional_name="waymo") import tensorflow as tf from waymo_open_dataset import dataset_pb2 diff --git a/d123/dataset/dataset_specific/wopd/waymo_map_utils/womp_boundary_utils.py b/src/py123d/conversion/datasets/wopd/waymo_map_utils/womp_boundary_utils.py similarity index 75% rename from d123/dataset/dataset_specific/wopd/waymo_map_utils/womp_boundary_utils.py rename to src/py123d/conversion/datasets/wopd/waymo_map_utils/womp_boundary_utils.py index a2ba1ee2..e9e6afad 100644 --- a/d123/dataset/dataset_specific/wopd/waymo_map_utils/womp_boundary_utils.py +++ b/src/py123d/conversion/datasets/wopd/waymo_map_utils/womp_boundary_utils.py @@ -2,15 +2,13 @@ from typing import Dict, List, Optional, Tuple import numpy as np -import numpy.typing as npt import shapely.geometry as geom -from d123.common.geometry.base import Point3D, StateSE2 -from d123.common.geometry.line.polylines import Polyline3D, PolylineSE2 -from d123.common.geometry.occupancy_map import OccupancyMap2D -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.utils import normalize_angle -from d123.common.geometry.vector import Vector2D +from py123d.datatypes.maps.abstract_map_objects import AbstractRoadEdge, AbstractRoadLine +from py123d.datatypes.maps.map_datatypes import LaneType +from py123d.geometry import OccupancyMap2D, Point3D, Polyline3D, PolylineSE2, StateSE2, Vector2D +from py123d.geometry.transform.transform_se2 import translate_se2_along_body_frame +from py123d.geometry.utils.rotation_utils import normalize_angle MAX_LANE_WIDTH = 25.0 # meters MIN_LANE_WIDTH = 2.0 @@ -37,8 +35,33 @@ def get_polyline_from_token(polyline_dict: Dict[str, Dict[int, Polyline3D]], tok return polyline_dict[line_type][line_id] +@dataclass +class WaymoLaneData: + """Helper class to store lane data.""" + + # Regular lane features + # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L142 + object_id: int + centerline: Polyline3D + predecessor_ids: List[int] + successor_ids: List[int] + speed_limit_mps: Optional[float] + lane_type: LaneType + + # Waymo allows multiple left/right neighbors + # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L111 + left_neighbors: List[Dict[str, int]] + right_neighbors: List[Dict[str, int]] + + # To be filled + left_boundary: Optional[Polyline3D] = None + right_boundary: Optional[Polyline3D] = None + + @dataclass class PerpendicularHit: + """Helper class to store perpendicular hit data.""" + distance_along_perp_2d: float hit_point_3d: Point3D hit_polyline_token: str @@ -67,10 +90,10 @@ def _collect_perpendicular_hits( assert sign in [1.0, -1.0], "Sign must be either 1.0 (left) or -1.0 (right)" # perp_start_point = translate_along_yaw(lane_query_se2, Vector2D(0.0, sign * PERP_START_OFFSET)) perp_start_point = lane_query_se2 - perp_end_point = translate_along_yaw(lane_query_se2, Vector2D(0.0, sign * MAX_LANE_WIDTH / 2.0)) + perp_end_point = translate_se2_along_body_frame(lane_query_se2, Vector2D(0.0, sign * MAX_LANE_WIDTH / 2.0)) perp_linestring = geom.LineString([[perp_start_point.x, perp_start_point.y], [perp_end_point.x, perp_end_point.y]]) - lane_linestring = occupancy_2d.geometries[occupancy_2d.token_to_idx[lane_token]] + lane_linestring = occupancy_2d.geometries[occupancy_2d.id_to_idx[lane_token]] # 1. find intersecting lines, compute 3D distance intersecting_tokens = occupancy_2d.intersects(perp_linestring) @@ -78,7 +101,7 @@ def _collect_perpendicular_hits( perpendicular_hits: List[PerpendicularHit] = [] for intersecting_token in intersecting_tokens: intersecting_polyline_3d = get_polyline_from_token(polyline_dict, intersecting_token) - intersecting_linestring = occupancy_2d.geometries[occupancy_2d.token_to_idx[intersecting_token]] + intersecting_linestring = occupancy_2d.geometries[occupancy_2d.id_to_idx[intersecting_token]] centerline_hit_crossing: bool = ( lane_linestring.intersects(intersecting_linestring) if intersecting_token.startswith("lane_") else False ) @@ -134,7 +157,7 @@ def _filter_perpendicular_hits( continue # 2. filter hits that are too close and not with the road edge (e.g. close lane lines) - if hit.distance_along_perp_2d < MIN_HIT_DISTANCE and hit.hit_polyline_type != "roadedge": + if hit.distance_along_perp_2d < MIN_HIT_DISTANCE and hit.hit_polyline_type != "road-edge": continue filtered_hits.append(hit) @@ -145,28 +168,31 @@ def _filter_perpendicular_hits( return filtered_hits -def extract_lane_boundaries( - lanes: Dict[int, npt.NDArray[np.float64]], - lanes_successors: Dict[int, List[int]], - lanes_predecessors: Dict[int, List[int]], - road_lines: Dict[int, npt.NDArray[np.float64]], - road_edges: Dict[int, npt.NDArray[np.float64]], +def fill_lane_boundaries( + lane_data_dict: Dict[int, WaymoLaneData], + road_lines: List[AbstractRoadLine], + road_edges: List[AbstractRoadEdge], ) -> Tuple[Dict[str, Polyline3D], Dict[str, Polyline3D]]: - polyline_dict: Dict[str, Dict[int, Polyline3D]] = {"lane": {}, "roadline": {}, "roadedge": {}} + """Welcome to insanity. + + :param lane_data: List of of WaymoLaneData helper class + :param road_lines: List of AbstractRoadLine objects + :param road_edges: List of AbstractRoadEdge objects + :return: Tuple of left and right lane boundaries as 3D polylines + """ + + polyline_dict: Dict[str, Dict[int, Polyline3D]] = {"lane": {}, "road-line": {}, "road-edge": {}} lane_polyline_se2_dict: Dict[int, PolylineSE2] = {} - for lane_id, lane_polyline in lanes.items(): - if lane_polyline.ndim == 2 and lane_polyline.shape[1] == 3 and len(lane_polyline) > 0: - polyline_dict["lane"][lane_id] = Polyline3D.from_array(lane_polyline) - lane_polyline_se2_dict[f"lane_{lane_id}"] = polyline_dict["lane"][lane_id].polyline_se2 + for lane_id, lane in lane_data_dict.items(): + polyline_dict["lane"][lane_id] = lane.centerline + lane_polyline_se2_dict[f"lane_{lane_id}"] = lane.centerline.polyline_se2 - # for road_line_id, road_line_polyline in road_lines.items(): - # if road_line_polyline.ndim == 2 and road_line_polyline.shape[1] == 3 and len(road_line_polyline) > 0: - # polyline_dict["roadline"][road_line_id] = Polyline3D.from_array(road_line_polyline) + # for road_line in road_lines: + # polyline_dict["road-line"][road_line.object_id] = road_line.polyline_3d - for road_edge_id, road_edge_polyline in road_edges.items(): - if road_edge_polyline.ndim == 2 and road_edge_polyline.shape[1] == 3 and len(road_edge_polyline) > 0: - polyline_dict["roadedge"][road_edge_id] = Polyline3D.from_array(road_edge_polyline) + for road_edge in road_edges: + polyline_dict["road-edge"][road_edge.object_id] = road_edge.polyline_3d geometries = [] tokens = [] @@ -221,23 +247,32 @@ def extract_lane_boundaries( first_hit = perpendicular_hits[0] # 1.1. If the first hit is a road edge, use it as the boundary point - if first_hit.hit_polyline_type == "roadedge": + if first_hit.hit_polyline_type == "road-edge": boundary_point_3d = first_hit.hit_point_3d - elif first_hit.hit_polyline_type == "roadline": + elif first_hit.hit_polyline_type == "road-line": boundary_point_3d = first_hit.hit_point_3d elif first_hit.hit_polyline_type == "lane": for hit in perpendicular_hits: - if hit.hit_polyline_type == "roadedge": + if hit.hit_polyline_type == "road-edge": continue if hit.hit_polyline_type == "lane": + lane_data_dict[lane_id].predecessor_ids + has_same_predecessor = ( - len(set(lanes_predecessors[hit.hit_polyline_id]) & set(lanes_predecessors[lane_id])) + len( + set(lane_data_dict[hit.hit_polyline_id].predecessor_ids) + & set(lane_data_dict[lane_id].predecessor_ids) + ) > 0 ) has_same_successor = ( - len(set(lanes_successors[hit.hit_polyline_id]) & set(lanes_successors[lane_id])) > 0 + len( + set(lane_data_dict[hit.hit_polyline_id].successor_ids) + & set(lane_data_dict[lane_id].successor_ids) + ) + > 0 ) heading_min = np.pi / 8.0 invalid_heading_error = heading_min < abs(hit.heading_error) < (np.pi - heading_min) @@ -264,7 +299,9 @@ def _get_default_boundary_point_3d( lane_query_se2: StateSE2, lane_query_3d: Point3D, sign: float ) -> Point3D: perp_boundary_distance = DEFAULT_LANE_WIDTH / 2.0 - boundary_point_se2 = translate_along_yaw(lane_query_se2, Vector2D(0.0, sign * perp_boundary_distance)) + boundary_point_se2 = translate_se2_along_body_frame( + lane_query_se2, Vector2D(0.0, sign * perp_boundary_distance) + ) return Point3D(boundary_point_se2.x, boundary_point_se2.y, lane_query_3d.z) if no_boundary_ratio > 0.8: @@ -285,11 +322,11 @@ def _get_default_boundary_point_3d( if len(final_boundary_points_3d) > 1: if sign == 1.0: - left_boundaries[lane_id] = Polyline3D.from_array( + lane_data_dict[lane_id].left_boundary = Polyline3D.from_array( np.array(final_boundary_points_3d, dtype=np.float64) ) else: - right_boundaries[lane_id] = Polyline3D.from_array( + lane_data_dict[lane_id].right_boundary = Polyline3D.from_array( np.array(final_boundary_points_3d, dtype=np.float64) ) diff --git a/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils.py b/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils.py new file mode 100644 index 00000000..741d9de7 --- /dev/null +++ b/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils.py @@ -0,0 +1,234 @@ +from typing import Dict, List, Optional + +import numpy as np + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.datasets.wopd.utils.wopd_constants import ( + WAYMO_LANE_TYPE_CONVERSION, + WAYMO_ROAD_EDGE_TYPE_CONVERSION, + WAYMO_ROAD_LINE_TYPE_CONVERSION, +) +from py123d.conversion.datasets.wopd.waymo_map_utils.womp_boundary_utils import WaymoLaneData, fill_lane_boundaries +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.datatypes.maps.abstract_map_objects import AbstractLane, AbstractRoadEdge, AbstractRoadLine +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheCrosswalk, + CacheLane, + CacheLaneGroup, + CacheRoadEdge, + CacheRoadLine, +) +from py123d.datatypes.maps.map_datatypes import LaneType, RoadEdgeType, RoadLineType +from py123d.geometry import Polyline3D +from py123d.geometry.utils.units import mph_to_mps + +check_dependencies(modules=["waymo_open_dataset"], optional_name="waymo") +from waymo_open_dataset import dataset_pb2 + +# TODO: +# - Implement stop signs +# - Implement speed bumps +# - Implement driveways with a different semantic type if needed +# - Implement intersections and lane group logic + + +def convert_wopd_map(frame: dataset_pb2.Frame, map_writer: AbstractMapWriter) -> None: + + # We first extract all road lines, road edges, and lanes, and write them to the map writer. + # NOTE: road lines and edges are used needed to extract lane boundaries. + road_lines = _write_and_get_waymo_road_lines(frame, map_writer) + road_edges = _write_and_get_waymo_road_edges(frame, map_writer) + lanes = _write_and_get_waymo_lanes(frame, road_lines, road_edges, map_writer) + + # Write lane groups based on the extracted lanes + _write_waymo_lane_groups(lanes, map_writer) + + # Write miscellaneous surfaces (carparks, crosswalks, stop zones, etc.) directly from the Waymo frame proto + _write_waymo_misc_surfaces(frame, map_writer) + + +def _write_and_get_waymo_road_lines(frame: dataset_pb2.Frame, map_writer: AbstractMapWriter) -> List[AbstractRoadLine]: + """Helper function to extract road lines from a Waymo frame proto.""" + + road_lines: List[AbstractRoadLine] = [] + for map_feature in frame.map_features: + if map_feature.HasField("road_line"): + polyline = _extract_polyline_waymo_proto(map_feature.road_line) + if polyline is not None: + road_line_type = WAYMO_ROAD_LINE_TYPE_CONVERSION.get(map_feature.road_line.type, RoadLineType.UNKNOWN) + road_lines.append( + CacheRoadLine( + object_id=map_feature.id, + road_line_type=road_line_type, + polyline=polyline, + ) + ) + + for road_line in road_lines: + map_writer.write_road_line(road_line) + + return road_lines + + +def _write_and_get_waymo_road_edges(frame: dataset_pb2.Frame, map_writer: AbstractMapWriter) -> List[AbstractRoadEdge]: + """Helper function to extract road edges from a Waymo frame proto.""" + + road_edges: List[AbstractRoadEdge] = [] + for map_feature in frame.map_features: + if map_feature.HasField("road_edge"): + polyline = _extract_polyline_waymo_proto(map_feature.road_edge) + if polyline is not None: + road_edge_type = WAYMO_ROAD_EDGE_TYPE_CONVERSION.get(map_feature.road_edge.type, RoadEdgeType.UNKNOWN) + road_edges.append( + CacheRoadEdge( + object_id=map_feature.id, + road_edge_type=road_edge_type, + polyline=polyline, + ) + ) + + for road_edge in road_edges: + map_writer.write_road_edge(road_edge) + + return road_edges + + +def _write_and_get_waymo_lanes( + frame: dataset_pb2.Frame, + road_lines: List[AbstractRoadLine], + road_edges: List[AbstractRoadEdge], + map_writer: AbstractMapWriter, +) -> List[AbstractLane]: + + # 1. Load lane data from Waymo frame proto + lane_data_dict: Dict[int, WaymoLaneData] = {} + for map_feature in frame.map_features: + if map_feature.HasField("lane"): + centerline = _extract_polyline_waymo_proto(map_feature.lane) + + # In case of a invalid lane, skip it + if centerline is None: + continue + + speed_limit_mps = mph_to_mps(map_feature.lane.speed_limit_mph) + speed_limit_mps = speed_limit_mps if speed_limit_mps > 0.0 else None + + lane_data_dict[map_feature.id] = WaymoLaneData( + object_id=map_feature.id, + centerline=centerline, + predecessor_ids=[int(lane_id_) for lane_id_ in map_feature.lane.entry_lanes], + successor_ids=[int(lane_id_) for lane_id_ in map_feature.lane.exit_lanes], + speed_limit_mps=speed_limit_mps, + lane_type=WAYMO_LANE_TYPE_CONVERSION.get(map_feature.lane.type, LaneType.UNDEFINED), + left_neighbors=_extract_lane_neighbors(map_feature.lane.left_neighbors), + right_neighbors=_extract_lane_neighbors(map_feature.lane.right_neighbors), + ) + + # 2. Process lane data to fill in left/right boundaries + fill_lane_boundaries(lane_data_dict, road_lines, road_edges) + + def _get_majority_neighbor(neighbors: List[Dict[str, int]]) -> Optional[int]: + if len(neighbors) == 0: + return None + length = { + neighbor["lane_id"]: neighbor["self_end_index"] - neighbor["self_start_index"] for neighbor in neighbors + } + return str(max(length, key=length.get)) + + lanes: List[AbstractLane] = [] + for lane_data in lane_data_dict.values(): + + # Skip lanes without boundaries + if lane_data.left_boundary is None or lane_data.right_boundary is None: + continue + + lanes.append( + CacheLane( + object_id=lane_data.object_id, + lane_group_id=lane_data.object_id, + left_boundary=lane_data.left_boundary, + right_boundary=lane_data.right_boundary, + centerline=lane_data.centerline, + left_lane_id=_get_majority_neighbor(lane_data.left_neighbors), + right_lane_id=_get_majority_neighbor(lane_data.right_neighbors), + predecessor_ids=lane_data.predecessor_ids, + successor_ids=lane_data.successor_ids, + speed_limit_mps=lane_data.speed_limit_mps, + ) + ) + + for lane in lanes: + map_writer.write_lane(lane) + + return lanes + + +def _write_waymo_lane_groups(lanes: List[AbstractLane], map_writer: AbstractMapWriter) -> None: + + # NOTE: WOPD does not provide lane groups, so we create a lane group for each lane. + for lane in lanes: + map_writer.write_lane_group( + CacheLaneGroup( + object_id=lane.object_id, + lane_ids=[lane.object_id], + left_boundary=lane.left_boundary, + right_boundary=lane.right_boundary, + intersection_id=None, + predecessor_ids=lane.predecessor_ids, + successor_ids=lane.successor_ids, + outline=lane.outline_3d, + ) + ) + + +def _write_waymo_misc_surfaces(frame: dataset_pb2.Frame, map_writer: AbstractMapWriter) -> None: + + for map_feature in frame.map_features: + if map_feature.HasField("driveway"): + # NOTE: We currently only handle classify driveways as carparks. + outline = _extract_outline_from_waymo_proto(map_feature.driveway) + if outline is not None: + map_writer.write_carpark(CacheCarpark(object_id=map_feature.id, outline=outline)) + elif map_feature.HasField("crosswalk"): + outline = _extract_outline_from_waymo_proto(map_feature.crosswalk) + if outline is not None: + map_writer.write_crosswalk(CacheCrosswalk(object_id=map_feature.id, outline=outline)) + + elif map_feature.HasField("stop_sign"): + pass # TODO: Implement stop signs + elif map_feature.HasField("speed_bump"): + pass # TODO: Implement speed bumps + + +def _extract_polyline_waymo_proto(data) -> Optional[Polyline3D]: + polyline: Optional[Polyline3D] = None + polyline_array = np.array([[p.x, p.y, p.z] for p in data.polyline], dtype=np.float64) + if polyline_array.ndim == 2 and polyline_array.shape[1] == 3 and len(polyline_array) >= 2: + # NOTE: A valid polyline must have at least 2 points, be 3D, and be non-empty + polyline = Polyline3D.from_array(polyline_array) + return polyline + + +def _extract_outline_from_waymo_proto(data) -> Optional[Polyline3D]: + outline: Optional[Polyline3D] = None + outline_array = np.array([[p.x, p.y, p.z] for p in data.polygon], dtype=np.float64) + if outline_array.ndim == 2 and outline_array.shape[0] >= 3 and outline_array.shape[1] == 3: + # NOTE: A valid polygon outline must have at least 3 points, be 3D, and be non-empty + outline = Polyline3D.from_array(outline_array) + return outline + + +def _extract_lane_neighbors(data) -> List[Dict[str, int]]: + neighbors = [] + for neighbor in data: + neighbors.append( + { + "lane_id": neighbor.feature_id, + "self_start_index": neighbor.self_start_index, + "self_end_index": neighbor.self_end_index, + "neighbor_start_index": neighbor.neighbor_start_index, + "neighbor_end_index": neighbor.neighbor_end_index, + } + ) + return neighbors diff --git a/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py new file mode 100644 index 00000000..cca4bc53 --- /dev/null +++ b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py @@ -0,0 +1,70 @@ +from pathlib import Path +from typing import Dict, Optional + +import numpy as np + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.datasets.wopd.utils.wopd_constants import WOPD_CAMERA_TYPES, WOPD_LIDAR_TYPES +from py123d.conversion.datasets.wopd.utils.wopd_utils import parse_range_image_and_camera_projection +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType + +check_dependencies(modules=["tensorflow", "waymo_open_dataset"], optional_name="waymo") +import tensorflow as tf +from waymo_open_dataset import dataset_pb2 +from waymo_open_dataset.utils import frame_utils + + +def _get_frame_at_iteration(filepath: Path, iteration: int) -> Optional[dataset_pb2.Frame]: + """Helper function to load a Waymo Frame at a specific iteration from a TFRecord file.""" + dataset = tf.data.TFRecordDataset(str(filepath), compression_type="") + + frame: Optional[dataset_pb2.Frame] = None + for i, data in enumerate(dataset): + if i == iteration: + frame = dataset_pb2.Frame() + frame.ParseFromString(data.numpy()) + break + return frame + + +def load_jpeg_binary_from_tf_record_file( + tf_record_path: Path, + iteration: int, + pinhole_camera_type: PinholeCameraType, +) -> bytes: + frame = _get_frame_at_iteration(tf_record_path, iteration) + assert frame is not None, f"Frame at iteration {iteration} not found in Waymo file: {tf_record_path}" + + jpeg_binary: Optional[bytes] = None + for image_proto in frame.images: + camera_type = WOPD_CAMERA_TYPES[image_proto.name] + if camera_type == pinhole_camera_type: + jpeg_binary = image_proto.image + break + return jpeg_binary + + +def load_wopd_lidar_pcs_from_file( + tf_record_path: Path, index: int, keep_polar_features: bool = False +) -> Dict[LiDARType, np.ndarray]: + + frame = _get_frame_at_iteration(tf_record_path, index) + assert frame is not None, f"Frame at iteration {index} not found in Waymo file: {tf_record_path}" + + (range_images, camera_projections, _, range_image_top_pose) = parse_range_image_and_camera_projection(frame) + + points, cp_points = frame_utils.convert_range_image_to_point_cloud( + frame=frame, + range_images=range_images, + camera_projections=camera_projections, + range_image_top_pose=range_image_top_pose, + keep_polar_features=keep_polar_features, + ) + + lidar_pcs_dict: Dict[LiDARType, np.ndarray] = {} + for lidar_idx, frame_lidar in enumerate(frame.lasers): + lidar_type = WOPD_LIDAR_TYPES[frame_lidar.name] + lidar_pcs_dict[lidar_type] = np.array(points[lidar_idx], dtype=np.float32) + + return lidar_pcs_dict diff --git a/src/py123d/conversion/datasets/wopd/wopd_converter.py b/src/py123d/conversion/datasets/wopd/wopd_converter.py new file mode 100644 index 00000000..c5d7a411 --- /dev/null +++ b/src/py123d/conversion/datasets/wopd/wopd_converter.py @@ -0,0 +1,441 @@ +import logging +import os +import traceback +from pathlib import Path +from typing import Dict, List, Optional, Tuple, Union + +import numpy as np +import numpy.typing as npt + +from py123d.common.utils.dependencies import check_dependencies +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.wopd.utils.wopd_constants import ( + WOPD_AVAILABLE_SPLITS, + WOPD_CAMERA_TYPES, + WOPD_DETECTION_NAME_DICT, + WOPD_LIDAR_TYPES, +) +from py123d.conversion.datasets.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, CameraData, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.box_detection_label_registry import WOPDBoxDetectionLabel +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex, WOPDLiDARIndex +from py123d.conversion.utils.sensor_utils.camera_conventions import CameraConvention, convert_camera_convention +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import get_wopd_chrysler_pacifica_parameters +from py123d.geometry import ( + BoundingBoxSE3, + BoundingBoxSE3Index, + EulerAngles, + EulerAnglesIndex, + StateSE3, + StateSE3Index, + Vector3D, + Vector3DIndex, +) +from py123d.geometry.transform.transform_se3 import convert_relative_to_absolute_se3_array +from py123d.geometry.utils.constants import DEFAULT_PITCH, DEFAULT_ROLL +from py123d.geometry.utils.rotation_utils import ( + get_euler_array_from_quaternion_array, + get_quaternion_array_from_euler_array, +) + +check_dependencies(modules=["tensorflow", "waymo_open_dataset"], optional_name="waymo") +import tensorflow as tf +from waymo_open_dataset import dataset_pb2 + +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2" +os.environ["CUDA_VISIBLE_DEVICES"] = "-1" +tf.config.set_visible_devices(tf.config.list_physical_devices("CPU")) + +logger = logging.getLogger(__name__) + + +class WOPDConverter(AbstractDatasetConverter): + def __init__( + self, + splits: List[str], + wopd_data_root: Union[Path, str], + zero_roll_pitch: bool, + keep_polar_features: bool, + add_map_pose_offset: bool, + dataset_converter_config: DatasetConverterConfig, + ) -> None: + super().__init__(dataset_converter_config) + for split in splits: + assert ( + split in WOPD_AVAILABLE_SPLITS + ), f"Split {split} is not available. Available splits: {WOPD_AVAILABLE_SPLITS}" + + self._splits: List[str] = splits + self._wopd_data_root: Path = Path(wopd_data_root) + self._zero_roll_pitch: bool = zero_roll_pitch + self._keep_polar_features: bool = keep_polar_features + self._add_map_pose_offset: bool = add_map_pose_offset # TODO: Implement this feature + + self._split_tf_record_pairs: List[Tuple[str, List[Path]]] = self._collect_split_tf_record_pairs() + + def _collect_split_tf_record_pairs(self) -> Dict[str, List[Path]]: + """Helper to collect the pairings of the split names and the corresponding tf record file.""" + + split_tf_record_pairs: List[Tuple[str, List[Path]]] = [] + split_name_mapping: Dict[str, str] = { + "wopd_train": "training", + "wopd_val": "validation", + "wopd_test": "testing", + } + + for split in self._splits: + assert split in split_name_mapping.keys() + split_folder = self._wopd_data_root / split_name_mapping[split] + source_log_paths = [log_file for log_file in split_folder.glob("*.tfrecord")] + for source_log_path in source_log_paths: + split_tf_record_pairs.append((split, source_log_path)) + + return split_tf_record_pairs + + def get_number_of_maps(self) -> int: + """Inherited, see superclass.""" + return len(self._split_tf_record_pairs) + + def get_number_of_logs(self) -> int: + """Inherited, see superclass.""" + return len(self._split_tf_record_pairs) + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """Inherited, see superclass.""" + split, source_tf_record_path = self._split_tf_record_pairs[map_index] + initial_frame = _get_initial_frame_from_tfrecord(source_tf_record_path) + + map_metadata = _get_wopd_map_metadata(initial_frame, split) + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + if map_needs_writing: + convert_wopd_map(initial_frame, map_writer) + + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """Inherited, see superclass.""" + + split, source_tf_record_path = self._split_tf_record_pairs[log_index] + + initial_frame = _get_initial_frame_from_tfrecord(source_tf_record_path, keep_dataset=False) + log_name = str(initial_frame.context.name) + dataset = tf.data.TFRecordDataset(source_tf_record_path, compression_type="") + + # 1. Initialize Metadata + log_metadata = LogMetadata( + dataset="wopd", + split=split, + log_name=log_name, + location=str(initial_frame.context.stats.location), + timestep_seconds=0.1, + vehicle_parameters=get_wopd_chrysler_pacifica_parameters(), + box_detection_label_class=WOPDBoxDetectionLabel, + pinhole_camera_metadata=_get_wopd_camera_metadata( + initial_frame, + self.dataset_converter_config, + ), + lidar_metadata=_get_wopd_lidar_metadata( + initial_frame, + self._keep_polar_features, + self.dataset_converter_config, + ), + map_metadata=_get_wopd_map_metadata(initial_frame, split), + ) + + # 2. Prepare log writer + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + + # 3. Process source log data + if log_needs_writing: + try: + for frame_idx, data in enumerate(dataset): + frame = dataset_pb2.Frame() + frame.ParseFromString(data.numpy()) + + map_pose_offset: Vector3D = Vector3D(0.0, 0.0, 0.0) + if self._add_map_pose_offset: + map_pose_offset = Vector3D( + x=frame.map_pose_offset.x, + y=frame.map_pose_offset.y, + z=frame.map_pose_offset.z, + ) + + log_writer.write( + timestamp=TimePoint.from_us(frame.timestamp_micros), + ego_state=_extract_wopd_ego_state(frame, map_pose_offset), + box_detections=_extract_wopd_box_detections(frame, map_pose_offset, self._zero_roll_pitch), + traffic_lights=None, # TODO: Check if WOPD has traffic light information + pinhole_cameras=_extract_wopd_cameras(frame, self.dataset_converter_config), + lidars=_extract_wopd_lidars( + frame, + self._keep_polar_features, + frame_idx, + self.dataset_converter_config, + source_tf_record_path, + self._wopd_data_root, + ), + ) + except Exception as e: + logger.error(f"Error processing log {log_name}: {e}") + traceback.print_exc() + + log_writer.close() + + +def _get_initial_frame_from_tfrecord( + tf_record_path: Path, + keep_dataset: bool = False, +) -> Union[dataset_pb2.Frame, Tuple[dataset_pb2.Frame, tf.data.TFRecordDataset]]: + dataset = tf.data.TFRecordDataset(tf_record_path, compression_type="") + for data in dataset: + initial_frame = dataset_pb2.Frame() + initial_frame.ParseFromString(data.numpy()) + break + + if keep_dataset: + return initial_frame, dataset + + del dataset + return initial_frame + + +def _get_wopd_map_metadata(initial_frame: dataset_pb2.Frame, split: str) -> MapMetadata: + + map_metadata = MapMetadata( + dataset="wopd", + split=split, + log_name=str(initial_frame.context.name), + location=None, # TODO: Add location information. + map_has_z=True, + map_is_local=True, # True, if map is per log + ) + + return map_metadata + + +def _get_wopd_camera_metadata( + initial_frame: dataset_pb2.Frame, dataset_converter_config: DatasetConverterConfig +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + camera_metadata_dict: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + + if dataset_converter_config.pinhole_camera_store_option is not None: + for calibration in initial_frame.context.camera_calibrations: + camera_type = WOPD_CAMERA_TYPES[calibration.name] + # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L96 + # https://github.com/waymo-research/waymo-open-dataset/issues/834#issuecomment-2134995440 + fx, fy, cx, cy, k1, k2, p1, p2, k3 = calibration.intrinsic + intrinsics = PinholeIntrinsics(fx=fx, fy=fy, cx=cx, cy=cy) + distortion = PinholeDistortion(k1=k1, k2=k2, p1=p1, p2=p2, k3=k3) + if camera_type in WOPD_CAMERA_TYPES.values(): + camera_metadata_dict[camera_type] = PinholeCameraMetadata( + camera_type=camera_type, + width=calibration.width, + height=calibration.height, + intrinsics=intrinsics, + distortion=distortion, + ) + + return camera_metadata_dict + + +def _get_wopd_lidar_metadata( + initial_frame: dataset_pb2.Frame, + keep_polar_features: bool, + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: + + laser_metadatas: Dict[LiDARType, LiDARMetadata] = {} + + # NOTE: Using + lidar_index = WOPDLiDARIndex if keep_polar_features else DefaultLiDARIndex + if dataset_converter_config.lidar_store_option is not None: + for laser_calibration in initial_frame.context.laser_calibrations: + + lidar_type = WOPD_LIDAR_TYPES[laser_calibration.name] + + extrinsic: Optional[StateSE3] = None + if laser_calibration.extrinsic: + extrinsic_transform = np.array(laser_calibration.extrinsic.transform, dtype=np.float64).reshape(4, 4) + extrinsic = StateSE3.from_transformation_matrix(extrinsic_transform) + + laser_metadatas[lidar_type] = LiDARMetadata( + lidar_type=lidar_type, + lidar_index=lidar_index, + extrinsic=extrinsic, + ) + + return laser_metadatas + + +def _get_ego_pose_se3(frame: dataset_pb2.Frame, map_pose_offset: Vector3D) -> StateSE3: + ego_pose_matrix = np.array(frame.pose.transform, dtype=np.float64).reshape(4, 4) + ego_pose_se3 = StateSE3.from_transformation_matrix(ego_pose_matrix) + ego_pose_se3.array[StateSE3Index.XYZ] += map_pose_offset.array[Vector3DIndex.XYZ] + return ego_pose_se3 + + +def _extract_wopd_ego_state(frame: dataset_pb2.Frame, map_pose_offset: Vector3D) -> List[float]: + rear_axle_pose = _get_ego_pose_se3(frame, map_pose_offset) + + vehicle_parameters = get_wopd_chrysler_pacifica_parameters() + # FIXME: Find dynamic state in waymo open perception dataset + # https://github.com/waymo-research/waymo-open-dataset/issues/55#issuecomment-546152290 + dynamic_state = DynamicStateSE3( + velocity=Vector3D(*np.zeros(3)), + acceleration=Vector3D(*np.zeros(3)), + angular_velocity=Vector3D(*np.zeros(3)), + ) + + return EgoStateSE3.from_rear_axle( + rear_axle_se3=rear_axle_pose, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + time_point=None, + ) + + +def _extract_wopd_box_detections( + frame: dataset_pb2.Frame, map_pose_offset: Vector3D, zero_roll_pitch: bool = True +) -> BoxDetectionWrapper: + + ego_pose_se3 = _get_ego_pose_se3(frame, map_pose_offset) + + num_detections = len(frame.laser_labels) + detections_state = np.zeros((num_detections, len(BoundingBoxSE3Index)), dtype=np.float64) + detections_velocity = np.zeros((num_detections, len(Vector3DIndex)), dtype=np.float64) + detections_types: List[int] = [] + detections_token: List[str] = [] + + for detection_idx, detection in enumerate(frame.laser_labels): + + detection_quaternion = EulerAngles( + roll=DEFAULT_ROLL, + pitch=DEFAULT_PITCH, + yaw=detection.box.heading, + ).quaternion + + # 2. Fill SE3 Bounding Box + detections_state[detection_idx, BoundingBoxSE3Index.X] = detection.box.center_x + detections_state[detection_idx, BoundingBoxSE3Index.Y] = detection.box.center_y + detections_state[detection_idx, BoundingBoxSE3Index.Z] = detection.box.center_z + detections_state[detection_idx, BoundingBoxSE3Index.QUATERNION] = detection_quaternion + detections_state[detection_idx, BoundingBoxSE3Index.LENGTH] = detection.box.length + detections_state[detection_idx, BoundingBoxSE3Index.WIDTH] = detection.box.width + detections_state[detection_idx, BoundingBoxSE3Index.HEIGHT] = detection.box.height + + # 2. Velocity TODO: check if velocity needs to be rotated + detections_velocity[detection_idx] = Vector3D( + x=detection.metadata.speed_x, + y=detection.metadata.speed_y, + z=detection.metadata.speed_z, + ).array + + # 3. Type and track token + detections_types.append(WOPD_DETECTION_NAME_DICT[detection.type]) + detections_token.append(str(detection.id)) + + detections_state[:, BoundingBoxSE3Index.STATE_SE3] = convert_relative_to_absolute_se3_array( + origin=ego_pose_se3, se3_array=detections_state[:, BoundingBoxSE3Index.STATE_SE3] + ) + if zero_roll_pitch: + euler_array = get_euler_array_from_quaternion_array(detections_state[:, BoundingBoxSE3Index.QUATERNION]) + euler_array[..., EulerAnglesIndex.ROLL] = DEFAULT_ROLL + euler_array[..., EulerAnglesIndex.PITCH] = DEFAULT_PITCH + detections_state[..., BoundingBoxSE3Index.QUATERNION] = get_quaternion_array_from_euler_array(euler_array) + + box_detections: List[BoxDetectionSE3] = [] + for detection_idx in range(num_detections): + box_detections.append( + BoxDetectionSE3( + metadata=BoxDetectionMetadata( + label=detections_types[detection_idx], + timepoint=None, + track_token=detections_token[detection_idx], + confidence=None, + ), + bounding_box_se3=BoundingBoxSE3.from_array(detections_state[detection_idx]), + velocity=Vector3D.from_array(detections_velocity[detection_idx]), + ) + ) + + return BoxDetectionWrapper(box_detections=box_detections) + + +def _extract_wopd_cameras( + frame: dataset_pb2.Frame, dataset_converter_config: DatasetConverterConfig +) -> List[CameraData]: + + camera_data_list: List[CameraData] = [] + + if dataset_converter_config.include_pinhole_cameras: + + # NOTE @DanielDauner: The extrinsic matrix in frame.context.camera_calibration is fixed to model the ego to camera transformation. + # The poses in frame.images[idx] are the motion compensated ego poses when the camera triggers. + # TODO: Verify if this is correct. + camera_extrinsic: Dict[str, StateSE3] = {} + for calibration in frame.context.camera_calibrations: + camera_type = WOPD_CAMERA_TYPES[calibration.name] + camera_transform = np.array(calibration.extrinsic.transform, dtype=np.float64).reshape(4, 4) + camera_pose = StateSE3.from_transformation_matrix(camera_transform) + # NOTE: WOPD uses a different camera convention than py123d + # https://arxiv.org/pdf/1912.04838 (Figure 1.) + camera_pose = convert_camera_convention( + camera_pose, + from_convention=CameraConvention.pXpZmY, + to_convention=CameraConvention.pZmYpX, + ) + camera_extrinsic[camera_type] = camera_pose + + for image_proto in frame.images: + camera_type = WOPD_CAMERA_TYPES[image_proto.name] + camera_data_list.append( + CameraData( + camera_type=camera_type, + extrinsic=camera_extrinsic[camera_type], + jpeg_binary=image_proto.image, + ) + ) + + return camera_data_list + + +def _extract_wopd_lidars( + frame: dataset_pb2.Frame, + keep_polar_features: bool, + frame_idx: int, + dataset_converter_config: DatasetConverterConfig, + absolute_tf_record_path: Path, + wopd_data_root: Path, +) -> Dict[LiDARType, npt.NDArray[np.float32]]: + + lidars: List[LiDARData] = [] + + if dataset_converter_config.include_lidars: + + relative_path = absolute_tf_record_path.relative_to(wopd_data_root) + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_MERGED, + iteration=frame_idx, + dataset_root=wopd_data_root, + relative_path=relative_path, + ) + ) + + return lidars diff --git a/d123/common/utils/__init__.py b/src/py123d/conversion/log_writer/__init__.py similarity index 100% rename from d123/common/utils/__init__.py rename to src/py123d/conversion/log_writer/__init__.py diff --git a/src/py123d/conversion/log_writer/abstract_log_writer.py b/src/py123d/conversion/log_writer/abstract_log_writer.py new file mode 100644 index 00000000..238919d6 --- /dev/null +++ b/src/py123d/conversion/log_writer/abstract_log_writer.py @@ -0,0 +1,117 @@ +from __future__ import annotations + +import abc +from dataclasses import dataclass +from pathlib import Path +from typing import List, Optional, Union + +import numpy as np +import numpy.typing as npt + +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.datatypes.detections.box_detections import BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry import StateSE3 + + +class AbstractLogWriter(abc.ABC): + """Abstract base class for log writers. + + A log writer is responsible specifying the output format of a converted log. + This includes how data is organized, how it is serialized, and how it is stored. + """ + + @abc.abstractmethod + def reset( + self, + dataset_converter_config: DatasetConverterConfig, + log_metadata: LogMetadata, + ) -> None: + """ + Reset the log writer for a new log. + """ + + @abc.abstractmethod + def write( + self, + timestamp: TimePoint, + ego_state: Optional[EgoStateSE3] = None, + box_detections: Optional[BoxDetectionWrapper] = None, + traffic_lights: Optional[TrafficLightDetectionWrapper] = None, + pinhole_cameras: Optional[List[CameraData]] = None, + fisheye_mei_cameras: Optional[List[CameraData]] = None, + lidars: Optional[List[LiDARData]] = None, + scenario_tags: Optional[List[str]] = None, + route_lane_group_ids: Optional[List[int]] = None, + **kwargs, + ) -> None: + pass + + @abc.abstractmethod + def close(self) -> None: + pass + + +@dataclass +class LiDARData: + + lidar_type: LiDARType + + timestamp: Optional[TimePoint] = None + iteration: Optional[int] = None + dataset_root: Optional[Union[str, Path]] = None + relative_path: Optional[Union[str, Path]] = None + point_cloud: Optional[npt.NDArray[np.float32]] = None + + def __post_init__(self): + assert ( + self.has_file_path or self.has_point_cloud + ), "Either file path (dataset_root and relative_path) or point_cloud must be provided for LiDARData." + + @property + def has_file_path(self) -> bool: + return self.dataset_root is not None and self.relative_path is not None + + @property + def has_point_cloud(self) -> bool: + return self.point_cloud is not None + + +@dataclass +class CameraData: + + camera_type: Union[PinholeCameraType, FisheyeMEICameraType] + extrinsic: StateSE3 + + timestamp: Optional[TimePoint] = None + jpeg_binary: Optional[bytes] = None + numpy_image: Optional[npt.NDArray[np.uint8]] = None + dataset_root: Optional[Union[str, Path]] = None + relative_path: Optional[Union[str, Path]] = None + + def __post_init__(self): + assert ( + self.has_file_path or self.has_jpeg_binary or self.has_numpy_image + ), "Either file path (dataset_root and relative_path) or jpeg_binary or numpy_image must be provided for CameraData." + + if self.has_file_path: + absolute_path = Path(self.dataset_root) / self.relative_path + assert absolute_path.exists(), f"Camera file not found: {absolute_path}" + + @property + def has_file_path(self) -> bool: + return self.dataset_root is not None and self.relative_path is not None + + @property + def has_jpeg_binary(self) -> bool: + return self.jpeg_binary is not None + + @property + def has_numpy_image(self) -> bool: + return self.numpy_image is not None diff --git a/src/py123d/conversion/log_writer/arrow_log_writer.py b/src/py123d/conversion/log_writer/arrow_log_writer.py new file mode 100644 index 00000000..acbcbb3b --- /dev/null +++ b/src/py123d/conversion/log_writer/arrow_log_writer.py @@ -0,0 +1,525 @@ +from pathlib import Path +from typing import Any, Dict, List, Literal, Optional, Tuple, Union + +import numpy as np +import pyarrow as pa + +from py123d.common.utils.uuid_utils import create_deterministic_uuid +from py123d.conversion.abstract_dataset_converter import AbstractLogWriter, DatasetConverterConfig +from py123d.conversion.log_writer.abstract_log_writer import CameraData, LiDARData +from py123d.conversion.sensor_io.camera.jpeg_camera_io import ( + decode_image_from_jpeg_binary, + encode_image_as_jpeg_binary, + load_image_from_jpeg_file, + load_jpeg_binary_from_jpeg_file, +) +from py123d.conversion.sensor_io.camera.mp4_camera_io import MP4Writer +from py123d.conversion.sensor_io.lidar.draco_lidar_io import encode_lidar_pc_as_draco_binary +from py123d.conversion.sensor_io.lidar.file_lidar_io import load_lidar_pcs_from_file +from py123d.conversion.sensor_io.lidar.laz_lidar_io import encode_lidar_pc_as_laz_binary +from py123d.datatypes.detections.box_detections import BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper +from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import add_log_metadata_to_arrow_schema +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3, EgoStateSE3Index +from py123d.geometry import BoundingBoxSE3Index, StateSE3, StateSE3Index, Vector3DIndex + + +def _get_logs_root() -> Path: + from py123d.script.utils.dataset_path_utils import get_dataset_paths + + DATASET_PATHS = get_dataset_paths() + return Path(DATASET_PATHS.py123d_logs_root) + + +def _get_sensors_root() -> Path: + from py123d.script.utils.dataset_path_utils import get_dataset_paths + + DATASET_PATHS = get_dataset_paths() + return Path(DATASET_PATHS.py123d_sensors_root) + + +class ArrowLogWriter(AbstractLogWriter): + + def __init__( + self, + logs_root: Optional[Union[str, Path]] = None, + sensors_root: Optional[Union[str, Path]] = None, + ipc_compression: Optional[Literal["lz4", "zstd"]] = None, + ipc_compression_level: Optional[int] = None, + lidar_compression: Optional[Literal["draco", "laz"]] = "draco", + ) -> None: + + self._logs_root = Path(logs_root) if logs_root is not None else _get_logs_root() + self._sensors_root = Path(sensors_root) if sensors_root is not None else _get_sensors_root() + self._ipc_compression = ipc_compression + self._ipc_compression_level = ipc_compression_level + self._lidar_compression = lidar_compression + + # Loaded during .reset() and cleared during .close() + self._dataset_converter_config: Optional[DatasetConverterConfig] = None + self._log_metadata: Optional[LogMetadata] = None + self._schema: Optional[LogMetadata] = None + self._source: Optional[pa.NativeFile] = None + self._record_batch_writer: Optional[pa.ipc.RecordBatchWriter] = None + self._pinhole_mp4_writers: Dict[str, MP4Writer] = {} + self._fisheye_mei_mp4_writers: Dict[str, MP4Writer] = {} + + def reset(self, dataset_converter_config: DatasetConverterConfig, log_metadata: LogMetadata) -> bool: + + log_needs_writing: bool = False + sink_log_path: Path = self._logs_root / log_metadata.split / f"{log_metadata.log_name}.arrow" + + # Check if the log file already exists or needs to be overwritten + if not sink_log_path.exists() or dataset_converter_config.force_log_conversion: + log_needs_writing = True + + # Delete the file if it exists (no error if it doesn't) + sink_log_path.unlink(missing_ok=True) + if not sink_log_path.parent.exists(): + sink_log_path.parent.mkdir(parents=True, exist_ok=True) + + # Load config and metadata + self._dataset_converter_config = dataset_converter_config + self._log_metadata = log_metadata + self._schema = self._build_schema(dataset_converter_config, log_metadata) + + # Initialize Arrow IPC writer, optionally with compression + # NOTE @DanielDauner: I tried some compression settings, which did not lead to significant reductions. + compression = ( + pa.Codec(self._ipc_compression, compression_level=self._ipc_compression_level) + if self._ipc_compression is not None + else None + ) + + options = pa.ipc.IpcWriteOptions(compression=compression) + self._source = pa.OSFile(str(sink_log_path), "wb") + self._record_batch_writer = pa.ipc.new_file(self._source, schema=self._schema, options=options) + + self._pinhole_mp4_writers = {} + self._fisheye_mei_mp4_writers = {} + + return log_needs_writing + + def write( + self, + timestamp: TimePoint, + ego_state: Optional[EgoStateSE3] = None, + box_detections: Optional[BoxDetectionWrapper] = None, + traffic_lights: Optional[TrafficLightDetectionWrapper] = None, + pinhole_cameras: Optional[List[CameraData]] = None, + fisheye_mei_cameras: Optional[List[CameraData]] = None, + lidars: Optional[List[LiDARData]] = None, + scenario_tags: Optional[List[str]] = None, + route_lane_group_ids: Optional[List[int]] = None, + ) -> None: + + assert self._dataset_converter_config is not None, "Log writer is not initialized." + assert self._log_metadata is not None, "Log writer is not initialized." + assert self._schema is not None, "Log writer is not initialized." + assert self._record_batch_writer is not None, "Log writer is not initialized." + assert self._source is not None, "Log writer is not initialized." + + record_batch_data = { + "uuid": [ + create_deterministic_uuid( + split=self._log_metadata.split, + log_name=self._log_metadata.log_name, + timestamp_us=timestamp.time_us, + ).bytes + ], + "timestamp": [timestamp.time_us], + } + + # -------------------------------------------------------------------------------------------------------------- + # Ego State + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_ego: + assert ego_state is not None, "Ego state is required but not provided." + record_batch_data["ego_state"] = [ego_state.array] + + # -------------------------------------------------------------------------------------------------------------- + # Box Detections + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_box_detections: + assert box_detections is not None, "Box detections are required but not provided." + # TODO: Figure out more elegant way without for-loops. + + # Accumulate box detection data + box_detection_state = [] + box_detection_velocity = [] + box_detection_token = [] + box_detection_label = [] + + for box_detection in box_detections: + box_detection_state.append(box_detection.bounding_box.array) + box_detection_velocity.append(box_detection.velocity.array) # TODO: make optional + box_detection_token.append(box_detection.metadata.track_token) + box_detection_label.append(int(box_detection.metadata.label)) + + # Add to record batch data + record_batch_data["box_detection_state"] = [box_detection_state] + record_batch_data["box_detection_velocity"] = [box_detection_velocity] + record_batch_data["box_detection_token"] = [box_detection_token] + record_batch_data["box_detection_label"] = [box_detection_label] + + # -------------------------------------------------------------------------------------------------------------- + # Traffic Lights + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_traffic_lights: + assert traffic_lights is not None, "Traffic light detections are required but not provided." + # TODO: Figure out more elegant way without for-loops. + + # Accumulate traffic light data + traffic_light_ids = [] + traffic_light_types = [] + + for traffic_light in traffic_lights: + traffic_light_ids.append(traffic_light.lane_id) + traffic_light_types.append(int(traffic_light.status)) + + # Add to record batch data + record_batch_data["traffic_light_ids"] = [traffic_light_ids] + record_batch_data["traffic_light_types"] = [traffic_light_types] + + # -------------------------------------------------------------------------------------------------------------- + # Pinhole Cameras + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_pinhole_cameras: + assert pinhole_cameras is not None, "Pinhole camera data is required but not provided." + provided_pinhole_data = self._prepare_camera_data_dict( + pinhole_cameras, self._dataset_converter_config.pinhole_camera_store_option + ) + provided_pinhole_extrinsics = { + camera_data.camera_type: camera_data.extrinsic for camera_data in pinhole_cameras + } + expected_pinhole_cameras = set(self._log_metadata.pinhole_camera_metadata.keys()) + + for pinhole_camera_type in expected_pinhole_cameras: + pinhole_camera_name = pinhole_camera_type.serialize() + + # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. + # In this case, we write None/null to the arrow table. + pinhole_camera_data: Optional[Any] = None + pinhole_camera_pose: Optional[StateSE3] = None + if pinhole_camera_type in provided_pinhole_data: + pinhole_camera_data = provided_pinhole_data[pinhole_camera_type] + pinhole_camera_pose = provided_pinhole_extrinsics[pinhole_camera_type] + + record_batch_data[f"{pinhole_camera_name}_data"] = [pinhole_camera_data] + record_batch_data[f"{pinhole_camera_name}_extrinsic"] = [ + pinhole_camera_pose.array if pinhole_camera_pose else None + ] + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_fisheye_mei_cameras: + assert fisheye_mei_cameras is not None, "Fisheye MEI camera data is required but not provided." + provided_fisheye_mei_data = self._prepare_camera_data_dict( + fisheye_mei_cameras, self._dataset_converter_config.fisheye_mei_camera_store_option + ) + provided_fisheye_mei_extrinsics = { + camera_data.camera_type: camera_data.extrinsic for camera_data in fisheye_mei_cameras + } + expected_fisheye_mei_cameras = set(self._log_metadata.fisheye_mei_camera_metadata.keys()) + + for fisheye_mei_camera_type in expected_fisheye_mei_cameras: + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. + # In this case, we write None/null to the arrow table. + fisheye_mei_camera_data: Optional[Any] = None + fisheye_mei_camera_pose: Optional[StateSE3] = None + if fisheye_mei_camera_type in provided_fisheye_mei_data: + fisheye_mei_camera_data = provided_fisheye_mei_data[fisheye_mei_camera_type] + fisheye_mei_camera_pose = provided_fisheye_mei_extrinsics[fisheye_mei_camera_type] + + record_batch_data[f"{fisheye_mei_camera_name}_data"] = [fisheye_mei_camera_data] + record_batch_data[f"{fisheye_mei_camera_name}_extrinsic"] = [ + fisheye_mei_camera_pose.array if fisheye_mei_camera_pose else None + ] + + # -------------------------------------------------------------------------------------------------------------- + # LiDARs + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_lidars and len(self._log_metadata.lidar_metadata) > 0: + assert lidars is not None, "LiDAR data is required but not provided." + + if self._dataset_converter_config.lidar_store_option == "path_merged": + # NOTE @DanielDauner: The path_merged option is necessary for dataset, that natively store multiple + # LiDAR point clouds in a single file. In this case, writing the file path several times is wasteful. + # Instead, we store the file path once, and divide the point clouds during reading. + assert len(lidars) == 1, "Exactly one LiDAR data must be provided for merged LiDAR storage." + assert lidars[0].has_file_path, "LiDAR data must provide file path for merged LiDAR storage." + merged_lidar_data: Optional[str] = str(lidars[0].relative_path) + + record_batch_data[f"{LiDARType.LIDAR_MERGED.serialize()}_data"] = [merged_lidar_data] + + else: + # NOTE @DanielDauner: for "path" and "binary" options, we write each LiDAR in a separate column. + # We currently assume that all lidars are provided at the same time step. + # Theoretically, we could extend the store asynchronous LiDARs in the future by storing the lidar data + # list as a dictionary, list or struct-like object in the columns. + expected_lidars = set(self._log_metadata.lidar_metadata.keys()) + lidar_data_dict = self._prepare_lidar_data_dict(lidars) + + for lidar_type in expected_lidars: + lidar_name = lidar_type.serialize() + lidar_data: Optional[Union[str, bytes]] = lidar_data_dict.get(lidar_type, None) + record_batch_data[f"{lidar_name}_data"] = [lidar_data] + + # -------------------------------------------------------------------------------------------------------------- + # Miscellaneous (Scenario Tags / Route) + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_scenario_tags: + assert scenario_tags is not None, "Scenario tags are required but not provided." + record_batch_data["scenario_tags"] = [scenario_tags] + + if self._dataset_converter_config.include_route: + assert route_lane_group_ids is not None, "Route lane group IDs are required but not provided." + record_batch_data["route_lane_group_ids"] = [route_lane_group_ids] + + record_batch = pa.record_batch(record_batch_data, schema=self._schema) + self._record_batch_writer.write_batch(record_batch) + + def close(self) -> None: + if self._record_batch_writer is not None: + self._record_batch_writer.close() + self._record_batch_writer: Optional[pa.ipc.RecordBatchWriter] = None + + if self._source is not None: + self._source.close() + self._source: Optional[pa.NativeFile] = None + + self._dataset_converter_config: Optional[DatasetConverterConfig] = None + self._log_metadata: Optional[LogMetadata] = None + self._schema: Optional[LogMetadata] = None + + for mp4_writer in self._pinhole_mp4_writers.values(): + mp4_writer.close() + self._pinhole_mp4_writers = {} + for mp4_writer in self._fisheye_mei_mp4_writers.values(): + mp4_writer.close() + self._fisheye_mei_mp4_writers = {} + + @staticmethod + def _build_schema(dataset_converter_config: DatasetConverterConfig, log_metadata: LogMetadata) -> pa.Schema: + + schema_list: List[Tuple[str, pa.DataType]] = [ + ("uuid", pa.uuid()), + ("timestamp", pa.int64()), + ] + + # -------------------------------------------------------------------------------------------------------------- + # Ego State + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_ego: + schema_list.extend( + [ + ("ego_state", pa.list_(pa.float64(), len(EgoStateSE3Index))), + ] + ) + + # -------------------------------------------------------------------------------------------------------------- + # Box Detections + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_box_detections: + schema_list.extend( + [ + ("box_detection_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), + ("box_detection_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), + ("box_detection_token", pa.list_(pa.string())), + ("box_detection_label", pa.list_(pa.int16())), + ] + ) + + # -------------------------------------------------------------------------------------------------------------- + # Traffic Lights + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_traffic_lights: + schema_list.extend( + [ + ("traffic_light_ids", pa.list_(pa.int64())), + ("traffic_light_types", pa.list_(pa.int16())), + ] + ) + + # -------------------------------------------------------------------------------------------------------------- + # Pinhole Cameras + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_pinhole_cameras: + for pinhole_camera_type in log_metadata.pinhole_camera_metadata.keys(): + pinhole_camera_name = pinhole_camera_type.serialize() + + # Depending on the storage option, define the schema for camera data + if dataset_converter_config.pinhole_camera_store_option == "path": + schema_list.append((f"{pinhole_camera_name}_data", pa.string())) + + elif dataset_converter_config.pinhole_camera_store_option == "binary": + schema_list.append((f"{pinhole_camera_name}_data", pa.binary())) + + elif dataset_converter_config.pinhole_camera_store_option == "mp4": + schema_list.append((f"{pinhole_camera_name}_data", pa.int64())) + + # Add camera pose + schema_list.append((f"{pinhole_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_fisheye_mei_cameras: + for fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata.keys(): + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # Depending on the storage option, define the schema for camera data + if dataset_converter_config.fisheye_mei_camera_store_option == "path": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.string())) + + elif dataset_converter_config.fisheye_mei_camera_store_option == "binary": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.binary())) + + elif dataset_converter_config.fisheye_mei_camera_store_option == "mp4": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.int64())) + + # Add camera pose + schema_list.append((f"{fisheye_mei_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + + # -------------------------------------------------------------------------------------------------------------- + # LiDARs + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_lidars and len(log_metadata.lidar_metadata) > 0: + if dataset_converter_config.lidar_store_option == "path_merged": + schema_list.append((f"{LiDARType.LIDAR_MERGED.serialize()}_data", pa.string())) + else: + for lidar_type in log_metadata.lidar_metadata.keys(): + lidar_name = lidar_type.serialize() + + # Depending on the storage option, define the schema for LiDAR data + if dataset_converter_config.lidar_store_option == "path": + schema_list.append((f"{lidar_name}_data", pa.string())) + + elif dataset_converter_config.lidar_store_option == "binary": + schema_list.append((f"{lidar_name}_data", pa.binary())) + + # -------------------------------------------------------------------------------------------------------------- + # Miscellaneous (Scenario Tags / Route) + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_scenario_tags: + schema_list.append(("scenario_tags", pa.list_(pa.string()))) + + if dataset_converter_config.include_route: + schema_list.append(("route_lane_group_ids", pa.list_(pa.int64()))) + + return add_log_metadata_to_arrow_schema(pa.schema(schema_list), log_metadata) + + def _prepare_lidar_data_dict(self, lidars: List[LiDARData]) -> Dict[LiDARType, Union[str, bytes]]: + lidar_data_dict: Dict[LiDARType, Union[str, bytes]] = {} + + if self._dataset_converter_config.lidar_store_option == "path": + for lidar_data in lidars: + assert lidar_data.has_file_path, "LiDAR data must provide file path for path storage." + lidar_data_dict[lidar_data.lidar_type] = str(lidar_data.relative_path) + + elif self._dataset_converter_config.lidar_store_option == "binary": + lidar_pcs_dict: Dict[LiDARType, np.ndarray] = {} + + # 1. Load point clouds from files + for lidar_data in lidars: + if lidar_data.has_point_cloud: + lidar_pcs_dict[lidar_data.lidar_type] = lidar_data.point_cloud + elif lidar_data.has_file_path: + lidar_pcs_dict.update( + load_lidar_pcs_from_file( + lidar_data.relative_path, + self._log_metadata, + lidar_data.iteration, + lidar_data.dataset_root, + ) + ) + + # 2. Compress the point clouds to bytes + for lidar_type, point_cloud in lidar_pcs_dict.items(): + lidar_metadata = self._log_metadata.lidar_metadata[lidar_type] + binary: Optional[bytes] = None + if self._lidar_compression == "draco": + binary = encode_lidar_pc_as_draco_binary(point_cloud, lidar_metadata) + elif self._lidar_compression == "laz": + binary = encode_lidar_pc_as_laz_binary(point_cloud, lidar_metadata) + lidar_data_dict[lidar_type] = binary + + return lidar_data_dict + + def _prepare_camera_data_dict( + self, cameras: List[CameraData], store_option: Literal["path", "binary"] + ) -> Dict[PinholeCameraType, Union[str, bytes]]: + camera_data_dict: Dict[PinholeCameraType, Union[str, int, bytes]] = {} + + for camera_data in cameras: + if store_option == "path": + if camera_data.has_file_path: + camera_data_dict[camera_data.camera_type] = str(camera_data.relative_path) + else: + raise NotImplementedError("Only file path storage is supported for camera data.") + elif store_option == "binary": + camera_data_dict[camera_data.camera_type] = _get_jpeg_binary_from_camera_data(camera_data) + elif store_option == "mp4": + camera_name = camera_data.camera_type.serialize() + if camera_name not in self._pinhole_mp4_writers: + mp4_path = ( + self._sensors_root + / self._log_metadata.split + / self._log_metadata.log_name + / f"{camera_name}.mp4" + ) + frame_interval = self._log_metadata.timestep_seconds + self._pinhole_mp4_writers[camera_name] = MP4Writer(mp4_path, fps=1 / frame_interval) + + image = _get_numpy_image_from_camera_data(camera_data) + camera_data_dict[camera_data.camera_type] = self._pinhole_mp4_writers[camera_name].write_frame(image) + + else: + raise NotImplementedError(f"Unsupported camera store option: {store_option}") + + return camera_data_dict + + +def _get_jpeg_binary_from_camera_data(camera_data: CameraData) -> Optional[bytes]: + jpeg_binary: Optional[bytes] = None + + if camera_data.has_jpeg_binary: + jpeg_binary = camera_data.jpeg_binary + elif camera_data.has_numpy_image: + jpeg_binary = encode_image_as_jpeg_binary(camera_data.numpy_image) + elif camera_data.has_file_path: + absolute_path = Path(camera_data.dataset_root) / camera_data.relative_path + + if absolute_path.suffix.lower() in [".jpg", ".jpeg"]: + jpeg_binary = load_jpeg_binary_from_jpeg_file(absolute_path) + else: + raise NotImplementedError(f"Unsupported camera file format: {absolute_path.suffix} for binary storage.") + else: + raise NotImplementedError("Camera data must provide jpeg_binary, numpy_image, or file path for binary storage.") + + assert jpeg_binary is not None + return jpeg_binary + + +def _get_numpy_image_from_camera_data(camera_data: CameraData) -> Optional[np.ndarray]: + numpy_image: Optional[np.ndarray] = None + + if camera_data.has_numpy_image: + numpy_image = camera_data.numpy_image + elif camera_data.has_jpeg_binary: + numpy_image = decode_image_from_jpeg_binary(camera_data.jpeg_binary) + elif camera_data.has_file_path: + absolute_path = Path(camera_data.dataset_root) / camera_data.relative_path + numpy_image = load_image_from_jpeg_file(absolute_path) + else: + raise NotImplementedError("Camera data must provide numpy_image, jpeg_binary, or file path for numpy image.") + + assert numpy_image is not None + return numpy_image diff --git a/src/py123d/conversion/map_writer/abstract_map_writer.py b/src/py123d/conversion/map_writer/abstract_map_writer.py new file mode 100644 index 00000000..99c867e6 --- /dev/null +++ b/src/py123d/conversion/map_writer/abstract_map_writer.py @@ -0,0 +1,69 @@ +import abc +from abc import abstractmethod + +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.datatypes.maps.abstract_map_objects import ( + AbstractCarpark, + AbstractCrosswalk, + AbstractGenericDrivable, + AbstractIntersection, + AbstractLane, + AbstractLaneGroup, + AbstractRoadEdge, + AbstractRoadLine, + AbstractStopLine, + AbstractWalkway, +) +from py123d.datatypes.maps.map_metadata import MapMetadata + + +class AbstractMapWriter(abc.ABC): + """Abstract base class for map writers.""" + + @abstractmethod + def reset(self, dataset_converter_config: DatasetConverterConfig, map_metadata: MapMetadata) -> bool: + """Reset the writer to its initial state.""" + + @abstractmethod + def write_lane(self, lane: AbstractLane) -> None: + """Write a lane to the map.""" + + @abstractmethod + def write_lane_group(self, lane: AbstractLaneGroup) -> None: + """Write a group of lanes to the map.""" + + @abstractmethod + def write_intersection(self, intersection: AbstractIntersection) -> None: + """Write an intersection to the map.""" + + @abstractmethod + def write_crosswalk(self, crosswalk: AbstractCrosswalk) -> None: + """Write a crosswalk to the map.""" + + @abstractmethod + def write_carpark(self, carpark: AbstractCarpark) -> None: + """Write a car park to the map.""" + + @abstractmethod + def write_walkway(self, walkway: AbstractWalkway) -> None: + """Write a walkway to the map.""" + + @abstractmethod + def write_generic_drivable(self, obj: AbstractGenericDrivable) -> None: + """Write a generic drivable area to the map.""" + + @abstractmethod + def write_stop_line(self, stop_line: AbstractStopLine) -> None: + """Write a stop lines to the map.""" + + @abstractmethod + def write_road_edge(self, road_edge: AbstractRoadEdge) -> None: + """Write a road edge to the map.""" + + @abstractmethod + def write_road_line(self, road_line: AbstractRoadLine) -> None: + """Write a road line to the map.""" + + @abstractmethod + def close(self) -> None: + """Close the writer and finalize any resources.""" diff --git a/src/py123d/conversion/map_writer/gpkg_map_writer.py b/src/py123d/conversion/map_writer/gpkg_map_writer.py new file mode 100644 index 00000000..289e1cc2 --- /dev/null +++ b/src/py123d/conversion/map_writer/gpkg_map_writer.py @@ -0,0 +1,257 @@ +from collections import defaultdict +from pathlib import Path +from typing import Dict, List, Optional, Union + +import geopandas as gpd +import pandas as pd +import shapely.geometry as geom + +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.map_writer.utils.gpkg_utils import IntIDMapping +from py123d.datatypes.maps.abstract_map_objects import ( + AbstractCarpark, + AbstractCrosswalk, + AbstractGenericDrivable, + AbstractIntersection, + AbstractLane, + AbstractLaneGroup, + AbstractLineMapObject, + AbstractRoadEdge, + AbstractRoadLine, + AbstractStopLine, + AbstractSurfaceMapObject, + AbstractWalkway, +) +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.geometry.polyline import Polyline3D + +MAP_OBJECT_DATA = Dict[str, List[Union[str, int, float, bool, geom.base.BaseGeometry]]] + + +class GPKGMapWriter(AbstractMapWriter): + """Abstract base class for map writers.""" + + def __init__(self, maps_root: Union[str, Path], remap_ids: bool = False) -> None: + self._maps_root = Path(maps_root) + self._crs: str = "EPSG:4326" # WGS84 + self._remap_ids = remap_ids + + # Data to be written to the map for each object type + self._map_data: Optional[Dict[MapLayer, MAP_OBJECT_DATA]] = None + self._map_file: Optional[Path] = None + self._map_metadata: Optional[MapMetadata] = None + + def reset(self, dataset_converter_config: DatasetConverterConfig, map_metadata: MapMetadata) -> bool: + """Inherited, see superclass.""" + + map_needs_writing: bool = False + + if dataset_converter_config.include_map: + if map_metadata.map_is_local: + split, log_name = map_metadata.split, map_metadata.log_name + map_file = self._maps_root / split / f"{log_name}.gpkg" + else: + dataset, location = map_metadata.dataset, map_metadata.location + map_file = self._maps_root / dataset / f"{dataset}_{location}.gpkg" + + map_needs_writing = dataset_converter_config.force_map_conversion or not map_file.exists() + if map_needs_writing: + # Reset all map layers and update map file / metadata + self._map_data = {map_layer: defaultdict(list) for map_layer in MapLayer} + self._map_file = map_file + self._map_metadata = map_metadata + + return map_needs_writing + + def write_lane(self, lane: AbstractLane) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.LANE, lane) + self._map_data[MapLayer.LANE]["lane_group_id"].append(lane.lane_group_id) + self._map_data[MapLayer.LANE]["left_boundary"].append(lane.left_boundary.linestring) + self._map_data[MapLayer.LANE]["right_boundary"].append(lane.right_boundary.linestring) + self._map_data[MapLayer.LANE]["centerline"].append(lane.centerline.linestring) + self._map_data[MapLayer.LANE]["left_lane_id"].append(lane.left_lane_id) + self._map_data[MapLayer.LANE]["right_lane_id"].append(lane.right_lane_id) + self._map_data[MapLayer.LANE]["predecessor_ids"].append(lane.predecessor_ids) + self._map_data[MapLayer.LANE]["successor_ids"].append(lane.successor_ids) + self._map_data[MapLayer.LANE]["speed_limit_mps"].append(lane.speed_limit_mps) + + def write_lane_group(self, lane_group: AbstractLaneGroup) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.LANE_GROUP, lane_group) + self._map_data[MapLayer.LANE_GROUP]["lane_ids"].append(lane_group.lane_ids) + self._map_data[MapLayer.LANE_GROUP]["intersection_id"].append(lane_group.intersection_id) + self._map_data[MapLayer.LANE_GROUP]["predecessor_ids"].append(lane_group.predecessor_ids) + self._map_data[MapLayer.LANE_GROUP]["successor_ids"].append(lane_group.successor_ids) + self._map_data[MapLayer.LANE_GROUP]["left_boundary"].append(lane_group.left_boundary.linestring) + self._map_data[MapLayer.LANE_GROUP]["right_boundary"].append(lane_group.right_boundary.linestring) + + def write_intersection(self, intersection: AbstractIntersection) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.INTERSECTION, intersection) + self._map_data[MapLayer.INTERSECTION]["lane_group_ids"].append(intersection.lane_group_ids) + + def write_crosswalk(self, crosswalk: AbstractCrosswalk) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.CROSSWALK, crosswalk) + + def write_carpark(self, carpark: AbstractCarpark) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.CARPARK, carpark) + + def write_walkway(self, walkway: AbstractWalkway) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.WALKWAY, walkway) + + def write_generic_drivable(self, obj: AbstractGenericDrivable) -> None: + """Inherited, see superclass.""" + self._write_surface_layer(MapLayer.GENERIC_DRIVABLE, obj) + + def write_stop_line(self, stop_line: AbstractStopLine) -> None: + """Inherited, see superclass.""" + # self._write_line_layer(MapLayer.STOP_LINE, stop_line) + raise NotImplementedError("Stop lines are not yet supported in GPKG maps.") + + def write_road_edge(self, road_edge: AbstractRoadEdge) -> None: + """Inherited, see superclass.""" + self._write_line_layer(MapLayer.ROAD_EDGE, road_edge) + self._map_data[MapLayer.ROAD_EDGE]["road_edge_type"].append(road_edge.road_edge_type) + + def write_road_line(self, road_line: AbstractRoadLine) -> None: + """Inherited, see superclass.""" + self._write_line_layer(MapLayer.ROAD_LINE, road_line) + self._map_data[MapLayer.ROAD_LINE]["road_line_type"].append(road_line.road_line_type) + + def close(self) -> None: + """Inherited, see superclass.""" + + if self._map_file is not None or self._map_data is not None: + if not self._map_file.parent.exists(): + self._map_file.parent.mkdir(parents=True, exist_ok=True) + + # Accumulate GeoDataFrames for each map layer + map_gdf: Dict[MapLayer, gpd.GeoDataFrame] = {} + for map_layer, layer_data in self._map_data.items(): + if len(layer_data["id"]) > 0: + df = pd.DataFrame(layer_data) + map_gdf[map_layer] = gpd.GeoDataFrame(df, geometry="geometry", crs=self._crs) + else: + map_gdf[map_layer] = gpd.GeoDataFrame( + {"id": [], "geometry": []}, geometry="geometry", crs=self._crs + ) + + # Optionally remap string IDs to integers + if self._remap_ids: + _map_ids_to_integer(map_gdf) + + # Write each map layer to the GPKG file + for map_layer, gdf in map_gdf.items(): + gdf.to_file(self._map_file, driver="GPKG", layer=map_layer.serialize()) + + # Write map metadata as a separate layer + metadata_df = gpd.GeoDataFrame(pd.DataFrame([self._map_metadata.to_dict()])) + metadata_df.to_file(self._map_file, driver="GPKG", layer="map_metadata") + + del self._map_file, self._map_data, self._map_metadata + self._map_file = None + self._map_data = None + self._map_metadata = None + + def _assert_initialized(self) -> None: + assert self._map_data is not None, "Call reset() before writing data." + assert self._map_file is not None, "Call reset() before writing data." + assert self._map_metadata is not None, "Call reset() before writing data." + + def _write_surface_layer(self, layer: MapLayer, surface_object: AbstractSurfaceMapObject) -> None: + """Helper to write surface map objects. + + :param layer: map layer of surface object + :param surface_object: surface map object to write + """ + self._assert_initialized() + self._map_data[layer]["id"].append(surface_object.object_id) + # NOTE: if the outline has a z-coordinate, we store it, otherwise we infer from the outline from the polygon + if isinstance(surface_object.outline, Polyline3D): + self._map_data[layer]["outline"].append(surface_object.outline.linestring) + self._map_data[layer]["geometry"].append(surface_object.shapely_polygon) + + def _write_line_layer(self, layer: MapLayer, line_object: AbstractLineMapObject) -> None: + """Helper to write line map objects. + + :param layer: map layer of line object + :param line_object: line map object to write + """ + self._assert_initialized() + self._map_data[layer]["id"].append(line_object.object_id) + self._map_data[layer]["geometry"].append(line_object.shapely_linestring) + + +def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: + """Helper function to remap string IDs to integers in the map dataframes.""" + + # initialize id mappings + lane_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE]["id"]) + lane_group_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE_GROUP]["id"]) + intersection_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.INTERSECTION]["id"]) + + walkway_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.WALKWAY]["id"]) + carpark_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.CARPARK]["id"]) + generic_drivable_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.GENERIC_DRIVABLE]["id"]) + road_line_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_LINE]["id"]) + road_edge_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_EDGE]["id"]) + + # 1. Remap lane ids in LANE layer + if len(map_dfs[MapLayer.LANE]) > 0: + map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].apply(lambda x: lane_id_mapping.map(x)) + map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].apply( + lambda x: lane_group_id_mapping.map(x) + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) + for column in ["left_lane_id", "right_lane_id"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map(x)) + + # 2. Remap lane group ids in LANE_GROUP + if len(map_dfs[MapLayer.LANE_GROUP]) > 0: + map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].apply( + lambda x: lane_group_id_mapping.map(x) + ) + map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( + lambda x: lane_id_mapping.map_list(x) + ) + map_dfs[MapLayer.LANE_GROUP]["intersection_id"] = map_dfs[MapLayer.LANE_GROUP]["intersection_id"].apply( + lambda x: intersection_id_mapping.map(x) + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( + lambda x: lane_group_id_mapping.map_list(x) + ) + + # 3. Remap lane group ids in INTERSECTION + if len(map_dfs[MapLayer.INTERSECTION]) > 0: + map_dfs[MapLayer.INTERSECTION]["id"] = map_dfs[MapLayer.INTERSECTION]["id"].apply( + lambda x: intersection_id_mapping.map(x) + ) + map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( + lambda x: lane_group_id_mapping.map_list(x) + ) + + # 4. Remap ids in other layers + if len(map_dfs[MapLayer.WALKWAY]) > 0: + map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].apply(lambda x: walkway_id_mapping.map(x)) + if len(map_dfs[MapLayer.CARPARK]) > 0: + map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].apply(lambda x: carpark_id_mapping.map(x)) + if len(map_dfs[MapLayer.GENERIC_DRIVABLE]) > 0: + map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].apply( + lambda x: generic_drivable_id_mapping.map(x) + ) + if len(map_dfs[MapLayer.ROAD_LINE]) > 0: + map_dfs[MapLayer.ROAD_LINE]["id"] = map_dfs[MapLayer.ROAD_LINE]["id"].apply( + lambda x: road_line_id_mapping.map(x) + ) + if len(map_dfs[MapLayer.ROAD_EDGE]) > 0: + map_dfs[MapLayer.ROAD_EDGE]["id"] = map_dfs[MapLayer.ROAD_EDGE]["id"].apply( + lambda x: road_edge_id_mapping.map(x) + ) diff --git a/src/py123d/conversion/map_writer/utils/gpkg_utils.py b/src/py123d/conversion/map_writer/utils/gpkg_utils.py new file mode 100644 index 00000000..2b9ab334 --- /dev/null +++ b/src/py123d/conversion/map_writer/utils/gpkg_utils.py @@ -0,0 +1,56 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, List, Optional + +import pandas as pd + + +@dataclass +class IntIDMapping: + + str_to_int: Dict[str, int] + + def __post_init__(self): + self.int_to_str = {v: k for k, v in self.str_to_int.items()} + + @classmethod + def from_series(cls, series: pd.Series) -> IntIDMapping: + # Drop NaN values and convert all to strings + unique_ids = series.dropna().astype(str).unique() + str_to_int = {str_id: idx for idx, str_id in enumerate(unique_ids)} + return IntIDMapping(str_to_int) + + def map(self, str_like: Any) -> Optional[int]: + # Handle NaN and None values + if pd.isna(str_like) or str_like is None: + return None + + # Convert to string for uniform handling + str_key = str(str_like) + return self.str_to_int.get(str_key, None) + + def map_list(self, id_list: Optional[List[str]]) -> List[int]: + if id_list is None: + return [] + list_ = [] + for id_str in id_list: + mapped_id = self.map(id_str) + if mapped_id is not None: + list_.append(mapped_id) + return list_ + + +class IncrementalIntIDMapping: + + def __init__(self): + self.str_to_int: Dict[str, int] = {} + self.int_to_str: Dict[int, str] = {} + self.next_id: int = 0 + + def get_int_id(self, str_id: str) -> int: + if str_id not in self.str_to_int: + self.str_to_int[str_id] = self.next_id + self.int_to_str[self.next_id] = str_id + self.next_id += 1 + return self.str_to_int[str_id] diff --git a/d123/common/visualization/__init__.py b/src/py123d/conversion/registry/__init__.py similarity index 100% rename from d123/common/visualization/__init__.py rename to src/py123d/conversion/registry/__init__.py diff --git a/src/py123d/conversion/registry/box_detection_label_registry.py b/src/py123d/conversion/registry/box_detection_label_registry.py new file mode 100644 index 00000000..0d39e2d3 --- /dev/null +++ b/src/py123d/conversion/registry/box_detection_label_registry.py @@ -0,0 +1,351 @@ +from __future__ import annotations + +import abc + +from py123d.common.utils.enums import SerialIntEnum + +BOX_DETECTION_LABEL_REGISTRY = {} + + +def register_box_detection_label(enum_class): + BOX_DETECTION_LABEL_REGISTRY[enum_class.__name__] = enum_class + return enum_class + + +class BoxDetectionLabel(SerialIntEnum): + + @abc.abstractmethod + def to_default(self) -> DefaultBoxDetectionLabel: + raise NotImplementedError("Subclasses must implement this method.") + + +@register_box_detection_label +class DefaultBoxDetectionLabel(BoxDetectionLabel): + """ + Enum for agents in py123d. + """ + + VEHICLE = 0 + BICYCLE = 1 + PEDESTRIAN = 2 + + TRAFFIC_CONE = 3 + BARRIER = 4 + CZONE_SIGN = 5 + GENERIC_OBJECT = 6 + + EGO = 7 + SIGN = 8 # TODO: Remove or extent + + def to_default(self) -> DefaultBoxDetectionLabel: + """Inherited, see superclass.""" + return self + + +@register_box_detection_label +class AV2SensorBoxDetectionLabel(BoxDetectionLabel): + """Sensor dataset annotation categories.""" + + ANIMAL = 1 + ARTICULATED_BUS = 2 + BICYCLE = 3 + BICYCLIST = 4 + BOLLARD = 5 + BOX_TRUCK = 6 + BUS = 7 + CONSTRUCTION_BARREL = 8 + CONSTRUCTION_CONE = 9 + DOG = 10 + LARGE_VEHICLE = 11 + MESSAGE_BOARD_TRAILER = 12 + MOBILE_PEDESTRIAN_CROSSING_SIGN = 13 + MOTORCYCLE = 14 + MOTORCYCLIST = 15 + OFFICIAL_SIGNALER = 16 + PEDESTRIAN = 17 + RAILED_VEHICLE = 18 + REGULAR_VEHICLE = 19 + SCHOOL_BUS = 20 + SIGN = 21 + STOP_SIGN = 22 + STROLLER = 23 + TRAFFIC_LIGHT_TRAILER = 24 + TRUCK = 25 + TRUCK_CAB = 26 + VEHICULAR_TRAILER = 27 + WHEELCHAIR = 28 + WHEELED_DEVICE = 29 + WHEELED_RIDER = 30 + + def to_default(self) -> DefaultBoxDetectionLabel: + """Inherited, see superclass.""" + mapping = { + AV2SensorBoxDetectionLabel.ANIMAL: DefaultBoxDetectionLabel.GENERIC_OBJECT, + AV2SensorBoxDetectionLabel.ARTICULATED_BUS: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.BICYCLE: DefaultBoxDetectionLabel.BICYCLE, + AV2SensorBoxDetectionLabel.BICYCLIST: DefaultBoxDetectionLabel.PEDESTRIAN, + AV2SensorBoxDetectionLabel.BOLLARD: DefaultBoxDetectionLabel.BARRIER, + AV2SensorBoxDetectionLabel.BOX_TRUCK: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.BUS: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.CONSTRUCTION_BARREL: DefaultBoxDetectionLabel.BARRIER, + AV2SensorBoxDetectionLabel.CONSTRUCTION_CONE: DefaultBoxDetectionLabel.TRAFFIC_CONE, + AV2SensorBoxDetectionLabel.DOG: DefaultBoxDetectionLabel.GENERIC_OBJECT, + AV2SensorBoxDetectionLabel.LARGE_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.MESSAGE_BOARD_TRAILER: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.MOBILE_PEDESTRIAN_CROSSING_SIGN: DefaultBoxDetectionLabel.CZONE_SIGN, + AV2SensorBoxDetectionLabel.MOTORCYCLE: DefaultBoxDetectionLabel.BICYCLE, + AV2SensorBoxDetectionLabel.MOTORCYCLIST: DefaultBoxDetectionLabel.BICYCLE, + AV2SensorBoxDetectionLabel.OFFICIAL_SIGNALER: DefaultBoxDetectionLabel.PEDESTRIAN, + AV2SensorBoxDetectionLabel.PEDESTRIAN: DefaultBoxDetectionLabel.PEDESTRIAN, + AV2SensorBoxDetectionLabel.RAILED_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.REGULAR_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.SCHOOL_BUS: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.SIGN: DefaultBoxDetectionLabel.SIGN, + AV2SensorBoxDetectionLabel.STOP_SIGN: DefaultBoxDetectionLabel.SIGN, + AV2SensorBoxDetectionLabel.STROLLER: DefaultBoxDetectionLabel.PEDESTRIAN, + AV2SensorBoxDetectionLabel.TRAFFIC_LIGHT_TRAILER: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.TRUCK: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.TRUCK_CAB: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.VEHICULAR_TRAILER: DefaultBoxDetectionLabel.VEHICLE, + AV2SensorBoxDetectionLabel.WHEELCHAIR: DefaultBoxDetectionLabel.PEDESTRIAN, + AV2SensorBoxDetectionLabel.WHEELED_DEVICE: DefaultBoxDetectionLabel.GENERIC_OBJECT, + AV2SensorBoxDetectionLabel.WHEELED_RIDER: DefaultBoxDetectionLabel.BICYCLE, + } + return mapping[self] + + +@register_box_detection_label +class KITTI360BoxDetectionLabel(BoxDetectionLabel): + + BICYCLE = 0 + BOX = 1 + BUS = 2 + CAR = 3 + CARAVAN = 4 + LAMP = 5 + MOTORCYCLE = 6 + PERSON = 7 + POLE = 8 + RIDER = 9 + SMALLPOLE = 10 + STOP = 11 + TRAFFIC_LIGHT = 12 + TRAFFIC_SIGN = 13 + TRAILER = 14 + TRAIN = 15 + TRASH_BIN = 16 + TRUCK = 17 + VENDING_MACHINE = 18 + + def to_default(self) -> DefaultBoxDetectionLabel: + mapping = { + KITTI360BoxDetectionLabel.BICYCLE: DefaultBoxDetectionLabel.BICYCLE, + KITTI360BoxDetectionLabel.BOX: DefaultBoxDetectionLabel.GENERIC_OBJECT, + KITTI360BoxDetectionLabel.BUS: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.CAR: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.CARAVAN: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.LAMP: DefaultBoxDetectionLabel.GENERIC_OBJECT, + KITTI360BoxDetectionLabel.MOTORCYCLE: DefaultBoxDetectionLabel.BICYCLE, + KITTI360BoxDetectionLabel.PERSON: DefaultBoxDetectionLabel.PEDESTRIAN, + KITTI360BoxDetectionLabel.POLE: DefaultBoxDetectionLabel.GENERIC_OBJECT, + KITTI360BoxDetectionLabel.RIDER: DefaultBoxDetectionLabel.BICYCLE, + KITTI360BoxDetectionLabel.SMALLPOLE: DefaultBoxDetectionLabel.GENERIC_OBJECT, + KITTI360BoxDetectionLabel.STOP: DefaultBoxDetectionLabel.SIGN, + KITTI360BoxDetectionLabel.TRAFFIC_LIGHT: DefaultBoxDetectionLabel.SIGN, + KITTI360BoxDetectionLabel.TRAFFIC_SIGN: DefaultBoxDetectionLabel.SIGN, + KITTI360BoxDetectionLabel.TRAILER: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.TRAIN: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.TRASH_BIN: DefaultBoxDetectionLabel.GENERIC_OBJECT, + KITTI360BoxDetectionLabel.TRUCK: DefaultBoxDetectionLabel.VEHICLE, + KITTI360BoxDetectionLabel.VENDING_MACHINE: DefaultBoxDetectionLabel.GENERIC_OBJECT, + } + return mapping[self] + + +@register_box_detection_label +class NuPlanBoxDetectionLabel(BoxDetectionLabel): + """ + Semantic labels for nuPlan bounding box detections. + + Descriptions in `.db` files: + - vehicle: Includes all four or more wheeled vehicles, as well as trailers. + - bicycle: Includes bicycles, motorcycles and tricycles. + - pedestrian: All types of pedestrians, incl. strollers and wheelchairs. + - traffic_cone: Cones that are temporarily placed to control the flow of traffic. + - barrier: Solid barriers that can be either temporary or permanent. + - czone_sign: Temporary signs that indicate construction zones. + - generic_object: Animals, debris, pushable/pullable objects, permanent poles. + """ + + VEHICLE = 0 + BICYCLE = 1 + PEDESTRIAN = 2 + TRAFFIC_CONE = 3 + BARRIER = 4 + CZONE_SIGN = 5 + GENERIC_OBJECT = 6 + + def to_default(self) -> DefaultBoxDetectionLabel: + mapping = { + NuPlanBoxDetectionLabel.VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + NuPlanBoxDetectionLabel.BICYCLE: DefaultBoxDetectionLabel.BICYCLE, + NuPlanBoxDetectionLabel.PEDESTRIAN: DefaultBoxDetectionLabel.PEDESTRIAN, + NuPlanBoxDetectionLabel.TRAFFIC_CONE: DefaultBoxDetectionLabel.TRAFFIC_CONE, + NuPlanBoxDetectionLabel.BARRIER: DefaultBoxDetectionLabel.BARRIER, + NuPlanBoxDetectionLabel.CZONE_SIGN: DefaultBoxDetectionLabel.CZONE_SIGN, + NuPlanBoxDetectionLabel.GENERIC_OBJECT: DefaultBoxDetectionLabel.GENERIC_OBJECT, + } + return mapping[self] + + +@register_box_detection_label +class NuScenesBoxDetectionLabel(BoxDetectionLabel): + """ + Semantic labels for nuScenes bounding box detections. + [1] https://github.com/nutonomy/nuscenes-devkit/blob/master/docs/instructions_nuscenes.md#labels + """ + + VEHICLE_CAR = 0 + VEHICLE_TRUCK = 1 + VEHICLE_BUS_BENDY = 2 + VEHICLE_BUS_RIGID = 3 + VEHICLE_CONSTRUCTION = 4 + VEHICLE_EMERGENCY_AMBULANCE = 5 + VEHICLE_EMERGENCY_POLICE = 6 + VEHICLE_TRAILER = 7 + VEHICLE_BICYCLE = 8 + VEHICLE_MOTORCYCLE = 9 + HUMAN_PEDESTRIAN_ADULT = 10 + HUMAN_PEDESTRIAN_CHILD = 11 + HUMAN_PEDESTRIAN_CONSTRUCTION_WORKER = 12 + HUMAN_PEDESTRIAN_PERSONAL_MOBILITY = 13 + HUMAN_PEDESTRIAN_POLICE_OFFICER = 14 + HUMAN_PEDESTRIAN_STROLLER = 15 + HUMAN_PEDESTRIAN_WHEELCHAIR = 16 + MOVABLE_OBJECT_TRAFFICCONE = 17 + MOVABLE_OBJECT_BARRIER = 18 + MOVABLE_OBJECT_PUSHABLE_PULLABLE = 19 + MOVABLE_OBJECT_DEBRIS = 20 + STATIC_OBJECT_BICYCLE_RACK = 21 + ANIMAL = 22 + + def to_default(self): + mapping = { + NuScenesBoxDetectionLabel.VEHICLE_CAR: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_TRUCK: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_BUS_BENDY: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_BUS_RIGID: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_CONSTRUCTION: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_EMERGENCY_AMBULANCE: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_EMERGENCY_POLICE: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_TRAILER: DefaultBoxDetectionLabel.VEHICLE, + NuScenesBoxDetectionLabel.VEHICLE_BICYCLE: DefaultBoxDetectionLabel.BICYCLE, + NuScenesBoxDetectionLabel.VEHICLE_MOTORCYCLE: DefaultBoxDetectionLabel.BICYCLE, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_ADULT: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_CHILD: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_CONSTRUCTION_WORKER: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_PERSONAL_MOBILITY: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_POLICE_OFFICER: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_STROLLER: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.HUMAN_PEDESTRIAN_WHEELCHAIR: DefaultBoxDetectionLabel.PEDESTRIAN, + NuScenesBoxDetectionLabel.MOVABLE_OBJECT_TRAFFICCONE: DefaultBoxDetectionLabel.TRAFFIC_CONE, + NuScenesBoxDetectionLabel.MOVABLE_OBJECT_BARRIER: DefaultBoxDetectionLabel.BARRIER, + NuScenesBoxDetectionLabel.MOVABLE_OBJECT_PUSHABLE_PULLABLE: DefaultBoxDetectionLabel.GENERIC_OBJECT, + NuScenesBoxDetectionLabel.MOVABLE_OBJECT_DEBRIS: DefaultBoxDetectionLabel.GENERIC_OBJECT, + NuScenesBoxDetectionLabel.STATIC_OBJECT_BICYCLE_RACK: DefaultBoxDetectionLabel.GENERIC_OBJECT, + NuScenesBoxDetectionLabel.ANIMAL: DefaultBoxDetectionLabel.GENERIC_OBJECT, + } + return mapping[self] + + +@register_box_detection_label +class PandasetBoxDetectionLabel(BoxDetectionLabel): + """ + Semantic labels for Pandaset bounding box detections. + [1] https://github.com/scaleapi/pandaset-devkit/blob/master/docs/annotation_instructions_cuboids.pdf + """ + + ANIMALS_BIRD = 0 + ANIMALS_OTHER = 1 + BICYCLE = 2 + BUS = 3 + CAR = 4 + CONES = 5 + CONSTRUCTION_SIGNS = 6 + EMERGENCY_VEHICLE = 7 + MEDIUM_SIZED_TRUCK = 8 + MOTORCYCLE = 9 + MOTORIZED_SCOOTER = 10 + OTHER_VEHICLE_CONSTRUCTION_VEHICLE = 11 + OTHER_VEHICLE_PEDICAB = 12 + OTHER_VEHICLE_UNCOMMON = 13 + PEDESTRIAN = 14 + PEDESTRIAN_WITH_OBJECT = 15 + PERSONAL_MOBILITY_DEVICE = 16 + PICKUP_TRUCK = 17 + PYLONS = 18 + ROAD_BARRIERS = 19 + ROLLING_CONTAINERS = 20 + SEMI_TRUCK = 21 + SIGNS = 22 + TEMPORARY_CONSTRUCTION_BARRIERS = 23 + TOWED_OBJECT = 24 + TRAIN = 25 + TRAM_SUBWAY = 26 + + def to_default(self) -> DefaultBoxDetectionLabel: + mapping = { + PandasetBoxDetectionLabel.ANIMALS_BIRD: DefaultBoxDetectionLabel.GENERIC_OBJECT, # TODO: Adjust default types + PandasetBoxDetectionLabel.ANIMALS_OTHER: DefaultBoxDetectionLabel.GENERIC_OBJECT, # TODO: Adjust default types + PandasetBoxDetectionLabel.BICYCLE: DefaultBoxDetectionLabel.BICYCLE, + PandasetBoxDetectionLabel.BUS: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.CAR: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.CONES: DefaultBoxDetectionLabel.TRAFFIC_CONE, + PandasetBoxDetectionLabel.CONSTRUCTION_SIGNS: DefaultBoxDetectionLabel.CZONE_SIGN, + PandasetBoxDetectionLabel.EMERGENCY_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.MEDIUM_SIZED_TRUCK: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.MOTORCYCLE: DefaultBoxDetectionLabel.BICYCLE, + PandasetBoxDetectionLabel.MOTORIZED_SCOOTER: DefaultBoxDetectionLabel.BICYCLE, + PandasetBoxDetectionLabel.OTHER_VEHICLE_CONSTRUCTION_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.OTHER_VEHICLE_PEDICAB: DefaultBoxDetectionLabel.BICYCLE, + PandasetBoxDetectionLabel.OTHER_VEHICLE_UNCOMMON: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.PEDESTRIAN: DefaultBoxDetectionLabel.PEDESTRIAN, + PandasetBoxDetectionLabel.PEDESTRIAN_WITH_OBJECT: DefaultBoxDetectionLabel.PEDESTRIAN, + PandasetBoxDetectionLabel.PERSONAL_MOBILITY_DEVICE: DefaultBoxDetectionLabel.BICYCLE, + PandasetBoxDetectionLabel.PICKUP_TRUCK: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.PYLONS: DefaultBoxDetectionLabel.TRAFFIC_CONE, + PandasetBoxDetectionLabel.ROAD_BARRIERS: DefaultBoxDetectionLabel.BARRIER, + PandasetBoxDetectionLabel.ROLLING_CONTAINERS: DefaultBoxDetectionLabel.GENERIC_OBJECT, + PandasetBoxDetectionLabel.SEMI_TRUCK: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.SIGNS: DefaultBoxDetectionLabel.SIGN, + PandasetBoxDetectionLabel.TEMPORARY_CONSTRUCTION_BARRIERS: DefaultBoxDetectionLabel.BARRIER, + PandasetBoxDetectionLabel.TOWED_OBJECT: DefaultBoxDetectionLabel.VEHICLE, + PandasetBoxDetectionLabel.TRAIN: DefaultBoxDetectionLabel.GENERIC_OBJECT, # TODO: Adjust default types + PandasetBoxDetectionLabel.TRAM_SUBWAY: DefaultBoxDetectionLabel.GENERIC_OBJECT, # TODO: Adjust default types + } + return mapping[self] + + +@register_box_detection_label +class WOPDBoxDetectionLabel(BoxDetectionLabel): + """ + Semantic labels for Waymo Open Dataset bounding box detections. + [1] https://github.com/waymo-research/waymo-open-dataset/blob/master/docs/labeling_specifications.md + [2] https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/label.proto#L63-L69 + """ + + TYPE_UNKNOWN = 0 + TYPE_VEHICLE = 1 + TYPE_PEDESTRIAN = 2 + TYPE_SIGN = 3 + TYPE_CYCLIST = 4 + + def to_default(self) -> DefaultBoxDetectionLabel: + mapping = { + WOPDBoxDetectionLabel.TYPE_UNKNOWN: DefaultBoxDetectionLabel.GENERIC_OBJECT, + WOPDBoxDetectionLabel.TYPE_VEHICLE: DefaultBoxDetectionLabel.VEHICLE, + WOPDBoxDetectionLabel.TYPE_PEDESTRIAN: DefaultBoxDetectionLabel.PEDESTRIAN, + WOPDBoxDetectionLabel.TYPE_SIGN: DefaultBoxDetectionLabel.SIGN, + WOPDBoxDetectionLabel.TYPE_CYCLIST: DefaultBoxDetectionLabel.BICYCLE, + } + return mapping[self] diff --git a/d123/common/datatypes/sensor/lidar_index.py b/src/py123d/conversion/registry/lidar_index_registry.py similarity index 51% rename from d123/common/datatypes/sensor/lidar_index.py rename to src/py123d/conversion/registry/lidar_index_registry.py index 0df92cff..a65903b4 100644 --- a/d123/common/datatypes/sensor/lidar_index.py +++ b/src/py123d/conversion/registry/lidar_index_registry.py @@ -1,6 +1,6 @@ from enum import IntEnum -from d123.common.utils.enums import classproperty +from py123d.common.utils.enums import classproperty LIDAR_INDEX_REGISTRY = {} @@ -28,24 +28,23 @@ def XYZ(self) -> slice: @register_lidar_index -class DefaultLidarIndex(LiDARIndex): +class DefaultLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @register_lidar_index -class NuplanLidarIndex(LiDARIndex): +class NuPlanLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 INTENSITY = 3 RING = 4 - ID = 5 @register_lidar_index -class CarlaLidarIndex(LiDARIndex): +class CARLALiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -53,10 +52,50 @@ class CarlaLidarIndex(LiDARIndex): @register_lidar_index -class WopdLidarIndex(LiDARIndex): +class WOPDLiDARIndex(LiDARIndex): RANGE = 0 INTENSITY = 1 ELONGATION = 2 X = 3 Y = 4 Z = 5 + + +@register_lidar_index +class Kitti360LiDARIndex(LiDARIndex): + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + + +@register_lidar_index +class AVSensorLiDARIndex(LiDARIndex): + """Argoverse Sensor LiDAR Indexing Scheme. + + NOTE: The LiDAR files also include, 'offset_ns', which we do not currently include. + """ + + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + + +@register_lidar_index +class PandasetLiDARIndex(LiDARIndex): + """Pandaset LiDAR Indexing Scheme.""" + + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + + +@register_lidar_index +class NuScenesLiDARIndex(LiDARIndex): + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + RING = 4 diff --git a/d123/common/visualization/color/__init__.py b/src/py123d/conversion/sensor_io/__init__.py similarity index 100% rename from d123/common/visualization/color/__init__.py rename to src/py123d/conversion/sensor_io/__init__.py diff --git a/d123/common/visualization/matplotlib/__init__.py b/src/py123d/conversion/sensor_io/camera/__init__.py similarity index 100% rename from d123/common/visualization/matplotlib/__init__.py rename to src/py123d/conversion/sensor_io/camera/__init__.py diff --git a/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py new file mode 100644 index 00000000..d942e729 --- /dev/null +++ b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py @@ -0,0 +1,29 @@ +from pathlib import Path + +import cv2 +import numpy as np +import numpy.typing as npt + + +def encode_image_as_jpeg_binary(image: npt.NDArray[np.uint8]) -> bytes: + _, encoded_img = cv2.imencode(".jpg", image) + jpeg_binary = encoded_img.tobytes() + return jpeg_binary + + +def decode_image_from_jpeg_binary(jpeg_binary: bytes) -> npt.NDArray[np.uint8]: + image = cv2.imdecode(np.frombuffer(jpeg_binary, np.uint8), cv2.IMREAD_UNCHANGED) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image + + +def load_jpeg_binary_from_jpeg_file(jpeg_path: Path) -> bytes: + with open(jpeg_path, "rb") as f: + jpeg_binary = f.read() + return jpeg_binary + + +def load_image_from_jpeg_file(jpeg_path: Path) -> npt.NDArray[np.uint8]: + image = cv2.imread(str(jpeg_path), cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + return image diff --git a/src/py123d/conversion/sensor_io/camera/mp4_camera_io.py b/src/py123d/conversion/sensor_io/camera/mp4_camera_io.py new file mode 100644 index 00000000..9cb91aad --- /dev/null +++ b/src/py123d/conversion/sensor_io/camera/mp4_camera_io.py @@ -0,0 +1,133 @@ +# TODO: add method of handling camera mp4 io +def load_image_from_mp4_file() -> None: + raise NotImplementedError + + +from functools import lru_cache +from pathlib import Path +from typing import Optional, Union + +import cv2 +import numpy as np + + +class MP4Writer: + """Write images sequentially to an MP4 video file.""" + + def __init__(self, output_path: Union[str, Path], fps: float = 30.0, codec: str = "mp4v"): + """ + Initialize MP4 writer. + + Args: + output_path: Path to output MP4 file + fps: Frames per second + codec: Video codec ('mp4v', 'avc1', 'h264') + """ + self.output_path = Path(output_path) + self.fps = fps + self.codec = codec + self.writer = None + self.frame_size = None + self.frame_count = 0 + + def write_frame(self, frame: np.ndarray) -> int: + """ + Write a single frame to the video. + + Args: + frame: Image as numpy array (RGB format) + """ + frame_idx = int(self.frame_count) + if self.writer is None: + # Initialize writer with first frame's dimensions + h, w = frame.shape[:2] + self.frame_size = (w, h) + fourcc = cv2.VideoWriter_fourcc(*self.codec) + self.output_path.parent.mkdir(parents=True, exist_ok=True) + self.writer = cv2.VideoWriter(self.output_path, fourcc, self.fps, self.frame_size) + + if frame.shape[:2][::-1] != self.frame_size: + raise ValueError(f"Frame size {frame.shape[:2][::-1]} doesn't match " f"video size {self.frame_size}") + + frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR) + self.writer.write(frame) + self.frame_count += 1 + return frame_idx + + def close(self): + """Release the video writer.""" + if self.writer is not None: + self.writer.release() + self.writer = None + + +class MP4Reader: + """Read MP4 video with random frame access.""" + + def __init__(self, video_path: Union[str, Path], read_all: bool = False): + """ + Initialize MP4 reader. + + Args: + video_path: Path to MP4 file + """ + self.video_path = video_path + if not Path(video_path).exists(): + raise FileNotFoundError(f"Video file not found: {video_path}") + + self.cap = cv2.VideoCapture(video_path) + if not self.cap.isOpened(): + raise ValueError(f"Cannot open video file: {video_path}") + + # Get video properties + self.frame_count = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT)) + self.fps = self.cap.get(cv2.CAP_PROP_FPS) + self.width = int(self.cap.get(cv2.CAP_PROP_FRAME_WIDTH)) + self.height = int(self.cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) + self.read_all = read_all + + if read_all: + self.frames = [] + for _ in range(self.frame_count): + ret, frame = self.cap.read() + if not ret: + break + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + self.frames.append(frame) + self.cap.release() + self.cap = None + + def get_frame(self, frame_index: int) -> Optional[np.ndarray]: + """ + Get a specific frame by index. + + Args: + frame_index: Zero-based frame index + + Returns: + Frame as numpy array (RGB format) or None if invalid index + """ + + if frame_index < 0 or frame_index >= self.frame_count: + raise IndexError(f"Frame index {frame_index} out of range " f"[0, {len(self.frames)})") + + if self.read_all: + return self.frames[frame_index] + + # Set the frame position + self.cap.set(cv2.CAP_PROP_POS_FRAMES, frame_index) + # Convert BGR to RGB for sane convention + ret, frame = self.cap.read() + if ret: + frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) + + return frame if ret else None + + def __getitem__(self, index: int) -> np.ndarray: + """Allow indexing like reader[10]""" + return self.get_frame(index) + + +@lru_cache(maxsize=64) +def get_mp4_reader_from_path(mp4_path: str) -> MP4Reader: + return MP4Reader(mp4_path, read_all=False) diff --git a/d123/common/visualization/viser/__init__.py b/src/py123d/conversion/sensor_io/lidar/__init__.py similarity index 100% rename from d123/common/visualization/viser/__init__.py rename to src/py123d/conversion/sensor_io/lidar/__init__.py diff --git a/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py new file mode 100644 index 00000000..61473f08 --- /dev/null +++ b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py @@ -0,0 +1,52 @@ +from typing import Final + +import DracoPy +import numpy as np +import numpy.typing as npt + +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata + +# TODO: add to config +DRACO_QUANTIZATION_BITS: Final[int] = 14 +DRACO_COMPRESSION_LEVEL: Final[int] = 10 # Range: 0 (fastest) to 10 (slowest, best compression) +DRACO_QUANTIZATION_RANGE: Final[int] = -1 # Use default range +DRACO_PRESERVE_ORDER: Final[bool] = False + + +def encode_lidar_pc_as_draco_binary(lidar_pc: npt.NDArray[np.float32], lidar_metadata: LiDARMetadata) -> bytes: + """Compress LiDAR point cloud data using Draco format. + + :param point_cloud: The LiDAR point cloud data to compress, as numpy array. + :param lidar_metadata: Metadata associated with the LiDAR data. + :return: The compressed Draco binary data. + """ + lidar_index = lidar_metadata.lidar_index + + binary = DracoPy.encode( + lidar_pc[:, lidar_index.XYZ], + quantization_bits=DRACO_QUANTIZATION_BITS, + compression_level=DRACO_COMPRESSION_LEVEL, + quantization_range=DRACO_QUANTIZATION_RANGE, + quantization_origin=None, + create_metadata=False, + preserve_order=DRACO_PRESERVE_ORDER, + ) + + return binary + + +def load_lidar_from_draco_binary(draco_binary: bytes, lidar_metadata: LiDARMetadata) -> LiDAR: + """Decompress LiDAR point cloud data from Draco format. + + :param draco_binary: The compressed Draco binary data. + :param lidar_metadata: Metadata associated with the LiDAR data. + :raises ValueError: If the LiDAR features are not found in the decompressed data. + :return: The decompressed LiDAR point cloud data as a LiDAR object. + """ + + # Decompress using Draco + mesh = DracoPy.decode(draco_binary) + points = mesh.points + points = np.array(points, dtype=np.float32) + + return LiDAR(point_cloud=points, metadata=lidar_metadata) diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py new file mode 100644 index 00000000..1a9e2583 --- /dev/null +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -0,0 +1,75 @@ +from pathlib import Path +from typing import Dict, Optional, Union + +import numpy as np +import numpy.typing as npt +from omegaconf import DictConfig + +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.script.utils.dataset_path_utils import get_dataset_paths + +DATASET_PATHS: DictConfig = get_dataset_paths() +DATASET_SENSOR_ROOT: Dict[str, Path] = { + "nuplan": DATASET_PATHS.nuplan_sensor_root, + "av2-sensor": DATASET_PATHS.av2_sensor_data_root, + "wopd": DATASET_PATHS.wopd_data_root, + "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, + "nuscenes": DATASET_PATHS.nuscenes_sensor_root, +} + + +def load_lidar_pcs_from_file( + relative_path: Union[str, Path], + log_metadata: LogMetadata, + index: Optional[int] = None, + sensor_root: Optional[Union[str, Path]] = None, +) -> Dict[LiDARType, npt.NDArray[np.float32]]: + assert relative_path is not None, "Relative path to LiDAR file must be provided." + + if sensor_root is None: + assert ( + log_metadata.dataset in DATASET_SENSOR_ROOT.keys() + ), f"Dataset path for sensor loading not found for dataset: {log_metadata.dataset}." + sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] + assert sensor_root is not None, f"Dataset path for sensor loading not found for dataset: {log_metadata.dataset}" + + full_lidar_path = Path(sensor_root) / relative_path + assert full_lidar_path.exists(), f"LiDAR file not found: {sensor_root} / {relative_path}" + + # NOTE: We move data specific import into if-else block, to avoid data specific import errors + if log_metadata.dataset == "nuplan": + from py123d.conversion.datasets.nuplan.nuplan_sensor_io import load_nuplan_lidar_pcs_from_file + + lidar_pcs_dict = load_nuplan_lidar_pcs_from_file(full_lidar_path) + + elif log_metadata.dataset == "av2-sensor": + from py123d.conversion.datasets.av2.av2_sensor_io import load_av2_sensor_lidar_pcs_from_file + + lidar_pcs_dict = load_av2_sensor_lidar_pcs_from_file(full_lidar_path) + + elif log_metadata.dataset == "wopd": + from py123d.conversion.datasets.wopd.waymo_sensor_io import load_wopd_lidar_pcs_from_file + + lidar_pcs_dict = load_wopd_lidar_pcs_from_file(full_lidar_path, index, keep_polar_features=False) + + elif log_metadata.dataset == "pandaset": + from py123d.conversion.datasets.pandaset.pandaset_sensor_io import load_pandaset_lidars_pcs_from_file + + lidar_pcs_dict = load_pandaset_lidars_pcs_from_file(full_lidar_path, index) + + elif log_metadata.dataset == "kitti360": + from py123d.conversion.datasets.kitti360.kitti360_sensor_io import load_kitti360_lidar_pcs_from_file + + lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path, log_metadata) + + elif log_metadata.dataset == "nuscenes": + from py123d.conversion.datasets.nuscenes.nuscenes_sensor_io import load_nuscenes_lidar_pcs_from_file + + lidar_pcs_dict = load_nuscenes_lidar_pcs_from_file(full_lidar_path, log_metadata) + + else: + raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") + + return lidar_pcs_dict diff --git a/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py new file mode 100644 index 00000000..b109c7ca --- /dev/null +++ b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py @@ -0,0 +1,71 @@ +import io + +import laspy +import numpy as np +import numpy.typing as npt + +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata + + +def encode_lidar_pc_as_laz_binary(point_cloud: npt.NDArray[np.float32], lidar_metadata: LiDARMetadata) -> bytes: + """Compress LiDAR point cloud data using LAZ format. + + :param point_cloud: The LiDAR point cloud data to compress, as numpy array. + :param lidar_metadata: Metadata associated with the LiDAR data. + :return: The compressed LAZ binary data. + """ + + lidar_index = lidar_metadata.lidar_index + + # Create a LAS file in memory, and populate it with point cloud data + las = laspy.create(point_format=3, file_version="1.4") + las.x = point_cloud[:, lidar_index.X] + las.y = point_cloud[:, lidar_index.Y] + las.z = point_cloud[:, lidar_index.Z] + + # Add additional LiDAR features if present + for feature in lidar_metadata.lidar_index: + if feature.name in ["X", "Y", "Z"]: + continue # Already saved above + las.add_extra_dim(laspy.ExtraBytesParams(name=feature.name, type="float32")) + las[feature.name] = point_cloud[:, feature.value] + + # Write to memory buffer and return compressed binary data + buffer = io.BytesIO() + las.write(buffer, do_compress=True) + laz_binary = buffer.getvalue() + + return laz_binary + + +def load_lidar_from_laz_binary(laz_binary: bytes, lidar_metadata: LiDARMetadata) -> npt.NDArray[np.float32]: + """Decompress LiDAR point cloud data from LAZ format. + + :param laz_binary: The compressed LAZ binary data. + :param lidar_metadata: Metadata associated with the LiDAR data. + :raises ValueError: If the LiDAR features are not found in the LAS file. + :return: The decompressed LiDAR point cloud data as a numpy array. + """ + + lidar_index = lidar_metadata.lidar_index + + # Read the LAS file from memory buffer + buffer = io.BytesIO(laz_binary) + las = laspy.read(buffer) + + # Extract the point cloud data + xyz = las.xyz + + num_points = len(xyz) + point_cloud = np.zeros((num_points, len(lidar_metadata.lidar_index)), dtype=np.float32) + point_cloud[:, lidar_index.XYZ] = xyz + + for feature in lidar_index: + if feature.name in ["X", "Y", "Z"]: + continue # Already loaded above + if hasattr(las, feature.name): + point_cloud[:, feature.value] = las[feature.name] + else: + raise ValueError(f"LiDAR feature {feature.name} not found in LAS file.") + + return LiDAR(point_cloud=point_cloud, metadata=lidar_metadata) diff --git a/d123/dataset/__init__.py b/src/py123d/conversion/utils/__init__.py similarity index 100% rename from d123/dataset/__init__.py rename to src/py123d/conversion/utils/__init__.py diff --git a/d123/dataset/conversion/__init__.py b/src/py123d/conversion/utils/map_utils/__init__.py similarity index 100% rename from d123/dataset/conversion/__init__.py rename to src/py123d/conversion/utils/map_utils/__init__.py diff --git a/d123/dataset/conversion/map/__init__.py b/src/py123d/conversion/utils/map_utils/opendrive/__init__.py similarity index 100% rename from d123/dataset/conversion/map/__init__.py rename to src/py123d/conversion/utils/map_utils/opendrive/__init__.py diff --git a/src/py123d/conversion/utils/map_utils/opendrive/opendrive_map_conversion.py b/src/py123d/conversion/utils/map_utils/opendrive/opendrive_map_conversion.py new file mode 100644 index 00000000..2bb831c8 --- /dev/null +++ b/src/py123d/conversion/utils/map_utils/opendrive/opendrive_map_conversion.py @@ -0,0 +1,345 @@ +import logging +from pathlib import Path +from typing import Dict, Final, List + +import numpy as np +import shapely + +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.utils.map_utils.opendrive.parser.opendrive import Junction, OpenDrive +from py123d.conversion.utils.map_utils.opendrive.utils.collection import collect_element_helpers +from py123d.conversion.utils.map_utils.opendrive.utils.lane_helper import ( + OpenDriveLaneGroupHelper, + OpenDriveLaneHelper, +) +from py123d.conversion.utils.map_utils.opendrive.utils.objects_helper import OpenDriveObjectHelper +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import ( + get_road_edges_3d_from_drivable_surfaces, + lift_outlines_to_3d, +) +from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, + CacheCrosswalk, + CacheGenericDrivable, + CacheIntersection, + CacheLane, + CacheLaneGroup, + CacheRoadEdge, + CacheRoadLine, + CacheWalkway, +) +from py123d.datatypes.maps.map_datatypes import RoadEdgeType, RoadLineType +from py123d.geometry.geometry_index import Point3DIndex +from py123d.geometry.polyline import Polyline3D + +logger = logging.getLogger(__name__) + +MAX_ROAD_EDGE_LENGTH: Final[float] = 100.0 # [m] + + +def convert_xodr_map( + xordr_file: Path, + map_writer: AbstractMapWriter, + interpolation_step_size: float = 1.0, + connection_distance_threshold: float = 0.1, +) -> None: + + opendrive = OpenDrive.parse_from_file(xordr_file) + + _, junction_dict, lane_helper_dict, lane_group_helper_dict, object_helper_dict = collect_element_helpers( + opendrive, interpolation_step_size, connection_distance_threshold + ) + + # Collect data frames and store (needed for road edge/line extraction) + lanes = _extract_and_write_lanes(lane_group_helper_dict, map_writer) + lane_groups = _extract_and_write_lane_groups(lane_group_helper_dict, map_writer) + car_parks = _extract_and_write_carparks(lane_helper_dict, map_writer) + generic_drivables = _extract_and_write_generic_drivables(lane_helper_dict, map_writer) + + # Write other map elements + _write_walkways(lane_helper_dict, map_writer) + _write_intersections(junction_dict, lane_group_helper_dict, map_writer) + _write_crosswalks(object_helper_dict, map_writer) + + # Extract polyline elements that are inferred of other road surfaces. + _write_road_lines(lanes, lane_groups, map_writer) + _write_road_edges( + lanes=lanes, + lane_groups=lane_groups, + car_parks=car_parks, + generic_drivables=generic_drivables, + map_writer=map_writer, + ) + + +def _extract_and_write_lanes( + lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper], + map_writer: AbstractMapWriter, +) -> List[CacheLane]: + + lanes: List[CacheLane] = [] + for lane_group_helper in lane_group_helper_dict.values(): + lane_group_id = lane_group_helper.lane_group_id + lane_helpers = lane_group_helper.lane_helpers + num_lanes = len(lane_helpers) + # NOTE: Lanes are going left to right, ie. inner to outer + for lane_idx, lane_helper in enumerate(lane_helpers): + left_lane_id = lane_helpers[lane_idx - 1].lane_id if lane_idx > 0 else None + right_lane_id = lane_helpers[lane_idx + 1].lane_id if lane_idx < num_lanes - 1 else None + lane = CacheLane( + object_id=lane_helper.lane_id, + lane_group_id=lane_group_id, + left_boundary=lane_helper.inner_polyline_3d, + right_boundary=lane_helper.outer_polyline_3d, + centerline=lane_helper.center_polyline_3d, + left_lane_id=left_lane_id, + right_lane_id=right_lane_id, + predecessor_ids=lane_helper.predecessor_lane_ids, + successor_ids=lane_helper.successor_lane_ids, + speed_limit_mps=lane_helper.speed_limit_mps, + outline=lane_helper.outline_polyline_3d, + geometry=None, + ) + lanes.append(lane) + map_writer.write_lane(lane) + + return lanes + + +def _extract_and_write_lane_groups( + lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper], map_writer: AbstractMapWriter +) -> List[CacheLaneGroup]: + + lane_groups: List[CacheLaneGroup] = [] + for lane_group_helper in lane_group_helper_dict.values(): + lane_group_helper: OpenDriveLaneGroupHelper + lane_group = CacheLaneGroup( + object_id=lane_group_helper.lane_group_id, + lane_ids=[lane_helper.lane_id for lane_helper in lane_group_helper.lane_helpers], + left_boundary=lane_group_helper.inner_polyline_3d, + right_boundary=lane_group_helper.outer_polyline_3d, + intersection_id=lane_group_helper.junction_id, + predecessor_ids=lane_group_helper.predecessor_lane_group_ids, + successor_ids=lane_group_helper.successor_lane_group_ids, + outline=lane_group_helper.outline_polyline_3d, + geometry=None, + ) + lane_groups.append(lane_group) + map_writer.write_lane_group(lane_group) + + return lane_groups + + +def _write_walkways(lane_helper_dict: Dict[str, OpenDriveLaneHelper], map_writer: AbstractMapWriter) -> None: + for lane_helper in lane_helper_dict.values(): + if lane_helper.type == "sidewalk": + map_writer.write_walkway( + CacheWalkway( + object_id=lane_helper.lane_id, + outline=lane_helper.outline_polyline_3d, + geometry=None, + ) + ) + + +def _extract_and_write_carparks( + lane_helper_dict: Dict[str, OpenDriveLaneHelper], map_writer: AbstractMapWriter +) -> List[CacheCarpark]: + + carparks: List[CacheCarpark] = [] + for lane_helper in lane_helper_dict.values(): + if lane_helper.type == "parking": + carpark = CacheCarpark( + object_id=lane_helper.lane_id, + outline=lane_helper.outline_polyline_3d, + geometry=None, + ) + carparks.append(carpark) + map_writer.write_carpark(carpark) + + return carparks + + +def _extract_and_write_generic_drivables( + lane_helper_dict: Dict[str, OpenDriveLaneHelper], map_writer: AbstractMapWriter +) -> List[CacheGenericDrivable]: + + generic_drivables: List[CacheGenericDrivable] = [] + for lane_helper in lane_helper_dict.values(): + if lane_helper.type in ["none", "border", "bidirectional"]: + generic_drivable = CacheGenericDrivable( + object_id=lane_helper.lane_id, + outline=lane_helper.outline_polyline_3d, + geometry=None, + ) + generic_drivables.append(generic_drivable) + map_writer.write_generic_drivable(generic_drivable) + return generic_drivables + + +def _write_intersections( + junction_dict: Dict[str, Junction], + lane_group_helper_dict: Dict[str, OpenDriveLaneGroupHelper], + map_writer: AbstractMapWriter, +) -> None: + + def _find_lane_group_helpers_with_junction_id(junction_id: int) -> List[OpenDriveLaneGroupHelper]: + return [ + lane_group_helper + for lane_group_helper in lane_group_helper_dict.values() + if lane_group_helper.junction_id == junction_id + ] + + for junction in junction_dict.values(): + lane_group_helpers = _find_lane_group_helpers_with_junction_id(junction.id) + lane_group_ids_ = [lane_group_helper.lane_group_id for lane_group_helper in lane_group_helpers] + if len(lane_group_ids_) == 0: + logger.debug(f"Skipping Junction {junction.id} without lane groups!") + continue + + # TODO @DanielDauner: Create a method that extracts 3D outlines of intersections. + outline = _extract_intersection_outline(lane_group_helpers, junction.id) + map_writer.write_intersection( + CacheIntersection( + object_id=junction.id, + lane_group_ids=lane_group_ids_, + outline=outline, + geometry=None, + ) + ) + + +def _write_crosswalks(object_helper_dict: Dict[int, OpenDriveObjectHelper], map_writer: AbstractMapWriter) -> None: + for object_helper in object_helper_dict.values(): + map_writer.write_crosswalk( + CacheCrosswalk( + object_id=object_helper.object_id, + outline=object_helper.outline_polyline_3d, + geometry=None, + ) + ) + + +def _write_road_lines(lanes: List[CacheLane], lane_groups: List[CacheLaneGroup], map_writer: AbstractMapWriter) -> None: + + # NOTE @DanielDauner: This method of extracting road lines is very simplistic and needs improvement. + # The OpenDRIVE format provides lane boundary types that could be used here. + # Additionally, the logic of inferring road lines is somewhat flawed, e.g, assuming constant types/colors of lines. + + lane_group_on_intersection: Dict[str, bool] = { + lane_group.object_id: lane_group.intersection_id is not None for lane_group in lane_groups + } + + ids: List[int] = [] + road_line_types: List[RoadLineType] = [] + polylines: List[Polyline3D] = [] + + running_id = 0 + for lane in lanes: + + on_intersection = lane_group_on_intersection.get(lane.lane_group_id, False) + if on_intersection: + # Skip road lines on intersections + continue + + if lane.right_lane_id is None: + # This is a boundary lane, e.g. a border or sidewalk + ids.append(running_id) + road_line_types.append(RoadLineType.SOLID_WHITE) + polylines.append(lane.right_boundary) + running_id += 1 + else: + # This is a regular lane + ids.append(running_id) + road_line_types.append(RoadLineType.DASHED_WHITE) + polylines.append(lane.right_boundary) + running_id += 1 + if lane.left_lane_id is None: + # This is a boundary lane, e.g. a border or sidewalk + ids.append(running_id) + road_line_types.append(RoadLineType.DASHED_WHITE) + polylines.append(lane.left_boundary) + running_id += 1 + + for object_id, road_line_type, polyline in zip(ids, road_line_types, polylines): + map_writer.write_road_line( + CacheRoadLine( + object_id=object_id, + road_line_type=road_line_type, + polyline=polyline, + ) + ) + + +def _write_road_edges( + lanes: List[CacheLane], + lane_groups: List[CacheLaneGroup], + car_parks: List[CacheCarpark], + generic_drivables: List[CacheGenericDrivable], + map_writer: AbstractMapWriter, +) -> None: + + road_edges_ = get_road_edges_3d_from_drivable_surfaces( + lanes=lanes, + lane_groups=lane_groups, + car_parks=car_parks, + generic_drivables=generic_drivables, + ) + road_edge_linestrings = split_line_geometry_by_max_length( + [road_edges.linestring for road_edges in road_edges_], MAX_ROAD_EDGE_LENGTH + ) + + running_id = 0 + for road_edge_linestring in road_edge_linestrings: + # TODO @DanielDauner: Figure out if other types should/could be assigned here. + map_writer.write_road_edge( + CacheRoadEdge( + object_id=running_id, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edge_linestring), + ) + ) + running_id += 1 + + +def _extract_intersection_outline(lane_group_helpers: List[OpenDriveLaneGroupHelper], junction_id: str) -> Polyline3D: + """Helper method to extract intersection outline in 3D from lane group helpers.""" + + # 1. Extract the intersection outlines in 2D + intersection_polygons: List[shapely.Polygon] = [ + lane_group_helper.shapely_polygon for lane_group_helper in lane_group_helpers + ] + intersection_edges = get_road_edge_linear_rings( + intersection_polygons, + buffer_distance=0.25, + add_interiors=False, + ) + + # 2. Lift the 2D outlines to 3D + lane_group_outlines: List[Polyline3D] = [ + lane_group_helper.outline_polyline_3d for lane_group_helper in lane_group_helpers + ] + intersection_outlines = lift_outlines_to_3d(intersection_edges, lane_group_outlines) + + # NOTE: When the intersection has multiple non-overlapping outlines, we cannot return a single outline in 3D. + # For now, we return the longest outline. + valid_outlines = [outline for outline in intersection_outlines if outline.array.shape[0] > 2] + if len(valid_outlines) == 0: + logging.warning( + f"Could not extract valid outline for intersection {junction_id} with {len(intersection_edges)} edges!" + ) + longest_outline_2d = max(intersection_edges, key=lambda outline: outline.length) + average_z = sum(outline.array[:, 2].mean() for outline in intersection_outlines) / len(intersection_outlines) + + outline_3d_array = np.zeros((len(longest_outline_2d.coords), 3)) + outline_3d_array[:, Point3DIndex.XY] = np.array(longest_outline_2d.coords) + outline_3d_array[:, Point3DIndex.Z] = average_z + longest_outline = Polyline3D.from_array(outline_3d_array) + else: + longest_outline = max(valid_outlines, key=lambda outline: outline.length) + + return longest_outline diff --git a/d123/dataset/conversion/map/opendrive/__init__.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/__init__.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/__init__.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/__init__.py diff --git a/d123/dataset/conversion/map/opendrive/parser/elevation.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/elevation.py similarity index 97% rename from d123/dataset/conversion/map/opendrive/parser/elevation.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/elevation.py index 7ee7aa67..2a1261ca 100644 --- a/d123/dataset/conversion/map/opendrive/parser/elevation.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/elevation.py @@ -4,7 +4,7 @@ from typing import List, Optional from xml.etree.ElementTree import Element -from d123.dataset.conversion.map.opendrive.parser.polynomial import Polynomial +from py123d.conversion.utils.map_utils.opendrive.parser.polynomial import Polynomial @dataclass diff --git a/d123/dataset/conversion/map/opendrive/parser/geometry.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/geometry.py similarity index 99% rename from d123/dataset/conversion/map/opendrive/parser/geometry.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/geometry.py index c1f337c0..3ddb24a8 100644 --- a/d123/dataset/conversion/map/opendrive/parser/geometry.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/geometry.py @@ -8,7 +8,7 @@ import numpy.typing as npt from scipy.special import fresnel -from d123.common.geometry.base import StateSE2Index +from py123d.geometry import StateSE2Index @dataclass diff --git a/d123/dataset/conversion/map/opendrive/parser/lane.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/lane.py similarity index 98% rename from d123/dataset/conversion/map/opendrive/parser/lane.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/lane.py index ccbfc9ff..bb6df9f5 100644 --- a/d123/dataset/conversion/map/opendrive/parser/lane.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/lane.py @@ -4,7 +4,7 @@ from typing import List, Optional from xml.etree.ElementTree import Element -from d123.dataset.conversion.map.opendrive.parser.polynomial import Polynomial +from py123d.conversion.utils.map_utils.opendrive.parser.polynomial import Polynomial @dataclass diff --git a/d123/dataset/conversion/map/opendrive/parser/objects.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/objects.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/parser/objects.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/objects.py diff --git a/d123/dataset/conversion/map/opendrive/parser/opendrive.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/opendrive.py similarity index 98% rename from d123/dataset/conversion/map/opendrive/parser/opendrive.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/opendrive.py index 2a07bb5f..719f7bf3 100644 --- a/d123/dataset/conversion/map/opendrive/parser/opendrive.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/opendrive.py @@ -6,7 +6,7 @@ from typing import List, Literal, Optional from xml.etree.ElementTree import Element, parse -from d123.dataset.conversion.map.opendrive.parser.road import Road +from py123d.conversion.utils.map_utils.opendrive.parser.road import Road @dataclass diff --git a/d123/dataset/conversion/map/opendrive/parser/polynomial.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/polynomial.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/parser/polynomial.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/polynomial.py diff --git a/d123/dataset/conversion/map/opendrive/parser/reference.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/reference.py similarity index 86% rename from d123/dataset/conversion/map/opendrive/parser/reference.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/reference.py index b4c88dc1..9ea3f0f0 100644 --- a/d123/dataset/conversion/map/opendrive/parser/reference.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/reference.py @@ -9,11 +9,11 @@ import numpy as np import numpy.typing as npt -from d123.common.geometry.base import Point3DIndex, StateSE2Index -from d123.dataset.conversion.map.opendrive.parser.elevation import Elevation -from d123.dataset.conversion.map.opendrive.parser.geometry import Arc, Geometry, Line, Spiral -from d123.dataset.conversion.map.opendrive.parser.lane import LaneOffset, Width -from d123.dataset.conversion.map.opendrive.parser.polynomial import Polynomial +from py123d.conversion.utils.map_utils.opendrive.parser.elevation import Elevation +from py123d.conversion.utils.map_utils.opendrive.parser.geometry import Arc, Geometry, Line, Spiral +from py123d.conversion.utils.map_utils.opendrive.parser.lane import LaneOffset, Width +from py123d.conversion.utils.map_utils.opendrive.parser.polynomial import Polynomial +from py123d.geometry import Point3DIndex, StateSE2Index TOLERANCE: Final[float] = 1e-3 @@ -60,7 +60,7 @@ def interpolate_se2(self, s: float, t: float = 0.0, lane_section_end: bool = Fal s = self.length else: raise ValueError( - f"s={s} is beyond the end of the plan view (length={self.length}) with tolerance={TOLERANCE}." + f"PlanView: s={s} is beyond the end of the plan view (length={self.length}) with tolerance={TOLERANCE}." ) # Find the geometry segment containing s @@ -131,6 +131,11 @@ def _find_polynomial(s: float, polynomials: List[Polynomial], lane_section_end: out_polynomial = polynomial break + # s_values = np.array([poly.s for poly in polynomials]) + # side = "left" if lane_section_end else "right" + # poly_idx = np.searchsorted(s_values, s, side=side) - 1 + # poly_idx = int(np.clip(poly_idx, 0, len(polynomials) - 1)) + # return polynomials[poly_idx] return out_polynomial def interpolate_se2(self, s: float, t: float = 0.0, lane_section_end: bool = False) -> npt.NDArray[np.float64]: diff --git a/d123/dataset/conversion/map/opendrive/parser/road.py b/src/py123d/conversion/utils/map_utils/opendrive/parser/road.py similarity index 93% rename from d123/dataset/conversion/map/opendrive/parser/road.py rename to src/py123d/conversion/utils/map_utils/opendrive/parser/road.py index e0171429..7db82b69 100644 --- a/d123/dataset/conversion/map/opendrive/parser/road.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/parser/road.py @@ -4,10 +4,10 @@ from typing import List, Optional from xml.etree.ElementTree import Element -from d123.dataset.conversion.map.opendrive.parser.elevation import ElevationProfile, LateralProfile -from d123.dataset.conversion.map.opendrive.parser.lane import Lanes -from d123.dataset.conversion.map.opendrive.parser.objects import Object -from d123.dataset.conversion.map.opendrive.parser.reference import PlanView +from py123d.conversion.utils.map_utils.opendrive.parser.elevation import ElevationProfile, LateralProfile +from py123d.conversion.utils.map_utils.opendrive.parser.lane import Lanes +from py123d.conversion.utils.map_utils.opendrive.parser.objects import Object +from py123d.conversion.utils.map_utils.opendrive.parser.reference import PlanView @dataclass diff --git a/d123/dataset/conversion/map/opendrive/parser/__init__.py b/src/py123d/conversion/utils/map_utils/opendrive/utils/__init__.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/parser/__init__.py rename to src/py123d/conversion/utils/map_utils/opendrive/utils/__init__.py diff --git a/d123/dataset/conversion/map/opendrive/utils/collection.py b/src/py123d/conversion/utils/map_utils/opendrive/utils/collection.py similarity index 96% rename from d123/dataset/conversion/map/opendrive/utils/collection.py rename to src/py123d/conversion/utils/map_utils/opendrive/utils/collection.py index be631445..fc6f96f3 100644 --- a/d123/dataset/conversion/map/opendrive/utils/collection.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/utils/collection.py @@ -3,21 +3,21 @@ import numpy as np -from d123.dataset.conversion.map.opendrive.parser.opendrive import Junction, OpenDrive -from d123.dataset.conversion.map.opendrive.parser.reference import ReferenceLine -from d123.dataset.conversion.map.opendrive.parser.road import Road -from d123.dataset.conversion.map.opendrive.utils.id_system import ( +from py123d.conversion.utils.map_utils.opendrive.parser.opendrive import Junction, OpenDrive +from py123d.conversion.utils.map_utils.opendrive.parser.reference import ReferenceLine +from py123d.conversion.utils.map_utils.opendrive.parser.road import Road +from py123d.conversion.utils.map_utils.opendrive.utils.id_system import ( build_lane_id, derive_lane_section_id, lane_group_id_from_lane_id, road_id_from_lane_group_id, ) -from d123.dataset.conversion.map.opendrive.utils.lane_helper import ( +from py123d.conversion.utils.map_utils.opendrive.utils.lane_helper import ( OpenDriveLaneGroupHelper, OpenDriveLaneHelper, lane_section_to_lane_helpers, ) -from d123.dataset.conversion.map.opendrive.utils.objects_helper import OpenDriveObjectHelper, get_object_helper +from py123d.conversion.utils.map_utils.opendrive.utils.objects_helper import OpenDriveObjectHelper, get_object_helper logger = logging.getLogger(__name__) diff --git a/d123/dataset/conversion/map/opendrive/utils/id_system.py b/src/py123d/conversion/utils/map_utils/opendrive/utils/id_system.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/utils/id_system.py rename to src/py123d/conversion/utils/map_utils/opendrive/utils/id_system.py diff --git a/d123/dataset/conversion/map/opendrive/utils/lane_helper.py b/src/py123d/conversion/utils/map_utils/opendrive/utils/lane_helper.py similarity index 68% rename from d123/dataset/conversion/map/opendrive/utils/lane_helper.py rename to src/py123d/conversion/utils/map_utils/opendrive/utils/lane_helper.py index 79e0f0c1..5b8045d2 100644 --- a/d123/dataset/conversion/map/opendrive/utils/lane_helper.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/utils/lane_helper.py @@ -6,16 +6,17 @@ import numpy.typing as npt import shapely -from d123.common.geometry.base import StateSE2Index -from d123.common.geometry.units import kmph_to_mps, mph_to_mps -from d123.dataset.conversion.map.opendrive.parser.lane import Lane, LaneSection -from d123.dataset.conversion.map.opendrive.parser.reference import ReferenceLine -from d123.dataset.conversion.map.opendrive.parser.road import RoadType -from d123.dataset.conversion.map.opendrive.utils.id_system import ( +from py123d.conversion.utils.map_utils.opendrive.parser.lane import Lane, LaneSection +from py123d.conversion.utils.map_utils.opendrive.parser.reference import ReferenceLine +from py123d.conversion.utils.map_utils.opendrive.parser.road import RoadType +from py123d.conversion.utils.map_utils.opendrive.utils.id_system import ( derive_lane_group_id, derive_lane_id, lane_group_id_from_lane_id, ) +from py123d.geometry import StateSE2Index +from py123d.geometry.polyline import Polyline3D, PolylineSE2 +from py123d.geometry.utils.units import kmph_to_mps, mph_to_mps @dataclass @@ -50,9 +51,9 @@ def type(self) -> str: def _s_positions(self) -> npt.NDArray[np.float64]: length = self.s_range[1] - self.s_range[0] _s_positions = np.linspace( - self.s_range[0], - self.s_range[1], - int(np.ceil(length / self.interpolation_step_size)) + 1, + start=self.s_range[0], + stop=self.s_range[1], + num=int(np.ceil(length / self.interpolation_step_size)) + 1, endpoint=True, dtype=np.float64, ) @@ -66,7 +67,7 @@ def _lane_section_end_mask(self) -> npt.NDArray[np.float64]: return lane_section_end_mask @cached_property - def inner_polyline_se2(self) -> npt.NDArray[np.float64]: + def inner_polyline_se2(self) -> PolylineSE2: inner_polyline = np.array( [ self.inner_boundary.interpolate_se2(self.s_inner_offset + s - self.s_range[0], lane_section_end=end) @@ -74,10 +75,11 @@ def inner_polyline_se2(self) -> npt.NDArray[np.float64]: ], dtype=np.float64, ) - return np.flip(inner_polyline, axis=0) if self.id > 0 else inner_polyline + polyline_array = np.flip(inner_polyline, axis=0) if self.id > 0 else inner_polyline + return PolylineSE2.from_array(polyline_array) @cached_property - def inner_polyline_3d(self) -> npt.NDArray[np.float64]: + def inner_polyline_3d(self) -> Polyline3D: inner_polyline = np.array( [ self.inner_boundary.interpolate_3d(self.s_inner_offset + s - self.s_range[0], lane_section_end=end) @@ -85,10 +87,11 @@ def inner_polyline_3d(self) -> npt.NDArray[np.float64]: ], dtype=np.float64, ) - return np.flip(inner_polyline, axis=0) if self.id > 0 else inner_polyline + polyline_array = np.flip(inner_polyline, axis=0) if self.id > 0 else inner_polyline + return Polyline3D.from_array(polyline_array) @cached_property - def outer_polyline_se2(self) -> npt.NDArray[np.float64]: + def outer_polyline_se2(self) -> PolylineSE2: outer_polyline = np.array( [ self.outer_boundary.interpolate_se2(s - self.s_range[0], lane_section_end=end) @@ -96,10 +99,11 @@ def outer_polyline_se2(self) -> npt.NDArray[np.float64]: ], dtype=np.float64, ) - return np.flip(outer_polyline, axis=0) if self.id > 0 else outer_polyline + polyline_array = np.flip(outer_polyline, axis=0) if self.id > 0 else outer_polyline + return PolylineSE2.from_array(polyline_array) @cached_property - def outer_polyline_3d(self) -> npt.NDArray[np.float64]: + def outer_polyline_3d(self) -> Polyline3D: outer_polyline = np.array( [ self.outer_boundary.interpolate_3d(s - self.s_range[0], lane_section_end=end) @@ -107,32 +111,62 @@ def outer_polyline_3d(self) -> npt.NDArray[np.float64]: ], dtype=np.float64, ) - return np.flip(outer_polyline, axis=0) if self.id > 0 else outer_polyline + polyline_array = np.flip(outer_polyline, axis=0) if self.id > 0 else outer_polyline + return Polyline3D.from_array(polyline_array) @property - def center_polyline_se2(self) -> npt.NDArray[np.float64]: - return np.concatenate([self.inner_polyline_se2[None, ...], self.outer_polyline_se2[None, ...]], axis=0).mean( - axis=0 + def center_polyline_se2(self) -> PolylineSE2: + return PolylineSE2.from_array( + np.concatenate( + [ + self.inner_polyline_se2.array[None, ...], + self.outer_polyline_se2.array[None, ...], + ], + axis=0, + ).mean(axis=0) ) @property - def center_polyline_3d(self) -> npt.NDArray[np.float64]: - return np.concatenate([self.outer_polyline_3d[None, ...], self.inner_polyline_3d[None, ...]], axis=0).mean( - axis=0 + def center_polyline_3d(self) -> Polyline3D: + return Polyline3D.from_array( + np.concatenate( + [ + self.outer_polyline_3d.array[None, ...], + self.inner_polyline_3d.array[None, ...], + ], + axis=0, + ).mean(axis=0) ) @property - def outline_polyline_3d(self) -> npt.NDArray[np.float64]: - inner_polyline = self.inner_polyline_3d[::-1] - outer_polyline = self.outer_polyline_3d - return np.concatenate([inner_polyline, outer_polyline, inner_polyline[None, 0]], axis=0, dtype=np.float64) + def outline_polyline_3d(self) -> Polyline3D: + inner_polyline = self.inner_polyline_3d.array + outer_polyline = self.outer_polyline_3d.array[::-1] + return Polyline3D.from_array( + np.concatenate( + [ + inner_polyline, + outer_polyline, + inner_polyline[None, 0], + ], + axis=0, + dtype=np.float64, + ) + ) @property def shapely_polygon(self) -> shapely.Polygon: - inner_polyline = self.inner_polyline_se2[..., StateSE2Index.XY][::-1] - outer_polyline = self.outer_polyline_se2[..., StateSE2Index.XY] - polygon_exterior = np.concatenate([inner_polyline, outer_polyline], axis=0, dtype=np.float64) - + inner_polyline = self.inner_polyline_se2[..., StateSE2Index.XY] + outer_polyline = self.outer_polyline_se2[..., StateSE2Index.XY][::-1] + polygon_exterior = np.concatenate( + [ + inner_polyline, + outer_polyline, + inner_polyline[None, 0], + ], + axis=0, + dtype=np.float64, + ) return shapely.Polygon(polygon_exterior) @@ -172,26 +206,50 @@ def _get_outer_lane_helper(self) -> OpenDriveLaneHelper: return self.lane_helpers[outer_lane_helper_idx] @cached_property - def inner_polyline_se2(self): + def inner_polyline_se2(self) -> PolylineSE2: return self._get_inner_lane_helper().inner_polyline_se2 @cached_property - def outer_polyline_se2(self): + def outer_polyline_se2(self) -> PolylineSE2: return self._get_outer_lane_helper().outer_polyline_se2 @cached_property - def inner_polyline_3d(self): + def inner_polyline_3d(self) -> Polyline3D: return self._get_inner_lane_helper().inner_polyline_3d @cached_property - def outer_polyline_3d(self): + def outer_polyline_3d(self) -> Polyline3D: return self._get_outer_lane_helper().outer_polyline_3d + @property + def outline_polyline_3d(self) -> Polyline3D: + inner_polyline = self.inner_polyline_3d.array + outer_polyline = self.outer_polyline_3d.array[::-1] + return Polyline3D.from_array( + np.concatenate( + [ + inner_polyline, + outer_polyline, + inner_polyline[None, 0], + ], + axis=0, + dtype=np.float64, + ) + ) + @property def shapely_polygon(self) -> shapely.Polygon: - inner_polyline = self.inner_polyline_se2[..., StateSE2Index.XY][::-1] - outer_polyline = self.outer_polyline_se2[..., StateSE2Index.XY] - polygon_exterior = np.concatenate([inner_polyline, outer_polyline], axis=0, dtype=np.float64) + inner_polyline = self.inner_polyline_se2[..., StateSE2Index.XY] + outer_polyline = self.outer_polyline_se2[..., StateSE2Index.XY][::-1] + polygon_exterior = np.concatenate( + [ + inner_polyline, + outer_polyline, + inner_polyline[None, 0], + ], + axis=0, + dtype=np.float64, + ) return shapely.Polygon(polygon_exterior) diff --git a/d123/dataset/conversion/map/opendrive/utils/objects_helper.py b/src/py123d/conversion/utils/map_utils/opendrive/utils/objects_helper.py similarity index 58% rename from d123/dataset/conversion/map/opendrive/utils/objects_helper.py rename to src/py123d/conversion/utils/map_utils/opendrive/utils/objects_helper.py index 3cbb569e..2e0117a4 100644 --- a/d123/dataset/conversion/map/opendrive/utils/objects_helper.py +++ b/src/py123d/conversion/utils/map_utils/opendrive/utils/objects_helper.py @@ -5,13 +5,13 @@ import numpy.typing as npt import shapely -from d123.common.geometry.base import Point2D, Point3D, Point3DIndex, StateSE2 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.utils import normalize_angle -from d123.dataset.conversion.map.opendrive.parser.objects import Object -from d123.dataset.conversion.map.opendrive.parser.reference import ReferenceLine - -# TODO: make naming consistent with group_collections.py +from py123d.conversion.utils.map_utils.opendrive.parser.objects import Object +from py123d.conversion.utils.map_utils.opendrive.parser.reference import ReferenceLine +from py123d.geometry import Point3D, Point3DIndex, StateSE2, Vector2D +from py123d.geometry.geometry_index import StateSE2Index +from py123d.geometry.polyline import Polyline3D +from py123d.geometry.transform.transform_se2 import translate_se2_along_body_frame +from py123d.geometry.utils.rotation_utils import normalize_angle @dataclass @@ -24,6 +24,10 @@ def __post_init__(self) -> None: assert self.outline_3d.ndim == 2 assert self.outline_3d.shape[1] == len(Point3DIndex) + @property + def outline_polyline_3d(self) -> Polyline3D: + return Polyline3D.from_array(self.outline_3d) + @property def shapely_polygon(self) -> shapely.Polygon: return shapely.geometry.Polygon(self.outline_3d[:, Point3DIndex.XY]) @@ -39,23 +43,24 @@ def get_object_helper(object: Object, reference_line: ReferenceLine) -> OpenDriv object_3d: Point3D = Point3D.from_array(reference_line.interpolate_3d(s=object.s, t=object.t)) # Adjust yaw angle from object data - object_se2.yaw = normalize_angle(object_se2.yaw + object.hdg) + # TODO: Consider adding setters to StateSE2 to make this cleaner + object_se2._array[StateSE2Index.YAW] = normalize_angle(object_se2.yaw + object.hdg) if len(object.outline) == 0: outline_3d = np.zeros((4, len(Point3DIndex)), dtype=np.float64) # Fill XY - outline_3d[0, Point3DIndex.XY] = translate_along_yaw( - object_se2, Point2D(object.length / 2.0, object.width / 2.0) + outline_3d[0, Point3DIndex.XY] = translate_se2_along_body_frame( + object_se2, Vector2D(object.length / 2.0, object.width / 2.0) ).point_2d.array - outline_3d[1, Point3DIndex.XY] = translate_along_yaw( - object_se2, Point2D(object.length / 2.0, -object.width / 2.0) + outline_3d[1, Point3DIndex.XY] = translate_se2_along_body_frame( + object_se2, Vector2D(object.length / 2.0, -object.width / 2.0) ).point_2d.array - outline_3d[2, Point3DIndex.XY] = translate_along_yaw( - object_se2, Point2D(-object.length / 2.0, -object.width / 2.0) + outline_3d[2, Point3DIndex.XY] = translate_se2_along_body_frame( + object_se2, Vector2D(-object.length / 2.0, -object.width / 2.0) ).point_2d.array - outline_3d[3, Point3DIndex.XY] = translate_along_yaw( - object_se2, Point2D(-object.length / 2.0, object.width / 2.0) + outline_3d[3, Point3DIndex.XY] = translate_se2_along_body_frame( + object_se2, Vector2D(-object.length / 2.0, object.width / 2.0) ).point_2d.array # Fill Z @@ -66,8 +71,8 @@ def get_object_helper(object: Object, reference_line: ReferenceLine) -> OpenDriv assert len(object.outline) > 3, f"Object outline must have at least 3 corners, got {len(object.outline)}" outline_3d = np.zeros((len(object.outline), len(Point3DIndex)), dtype=np.float64) for corner_idx, corner_local in enumerate(object.outline): - outline_3d[corner_idx, Point3DIndex.XY] = translate_along_yaw( - object_se2, Point2D(corner_local.u, corner_local.v) + outline_3d[corner_idx, Point3DIndex.XY] = translate_se2_along_body_frame( + object_se2, Vector2D(corner_local.u, corner_local.v) ).point_2d.array outline_3d[corner_idx, Point3DIndex.Z] = object_3d.z + corner_local.z object_helper = OpenDriveObjectHelper(object_id=object.id, outline_3d=outline_3d) diff --git a/d123/dataset/conversion/map/road_edge/__init__.py b/src/py123d/conversion/utils/map_utils/road_edge/__init__.py similarity index 100% rename from d123/dataset/conversion/map/road_edge/__init__.py rename to src/py123d/conversion/utils/map_utils/road_edge/__init__.py diff --git a/src/py123d/conversion/utils/map_utils/road_edge/road_edge_2d_utils.py b/src/py123d/conversion/utils/map_utils/road_edge/road_edge_2d_utils.py new file mode 100644 index 00000000..f8059b12 --- /dev/null +++ b/src/py123d/conversion/utils/map_utils/road_edge/road_edge_2d_utils.py @@ -0,0 +1,107 @@ +from typing import List, Union + +import numpy as np +import shapely +from shapely import LinearRing, LineString, Polygon, box, union_all +from shapely.strtree import STRtree + + +def get_road_edge_linear_rings( + drivable_polygons: List[Polygon], + buffer_distance: float = 0.05, + add_interiors: bool = True, +) -> List[LinearRing]: + """ + Helper function to extract road edges (i.e. linear rings) from drivable area polygons. + TODO: Move and rename for general use. + """ + + def _polygon_to_linear_rings(polygon: Polygon) -> List[LinearRing]: + assert polygon.geom_type == "Polygon" + linear_ring_list = [] + linear_ring_list.append(polygon.exterior) + if add_interiors: + for interior in polygon.interiors: + linear_ring_list.append(interior) + return linear_ring_list + + union_polygon = union_all([polygon.buffer(buffer_distance, join_style=2) for polygon in drivable_polygons]).buffer( + -buffer_distance, join_style=2 + ) + + linear_ring_list = [] + if union_polygon.geom_type == "Polygon": + for polyline in _polygon_to_linear_rings(union_polygon): + linear_ring_list.append(LinearRing(polyline)) + elif union_polygon.geom_type == "MultiPolygon": + for polygon in union_polygon.geoms: + for polyline in _polygon_to_linear_rings(polygon): + linear_ring_list.append(LinearRing(polyline)) + + return linear_ring_list + + +def split_line_geometry_by_max_length( + geometries: Union[LineString, LinearRing, List[Union[LineString, LinearRing]]], + max_length_meters: float, +) -> List[LineString]: + """ + Splits LineString or LinearRing geometries into smaller segments based on a maximum length. + TODO: Move and rename for general use. + """ + + if not isinstance(geometries, list): + geometries = [geometries] + + all_segments = [] + for geom in geometries: + if geom.length <= max_length_meters: + all_segments.append(LineString(geom.coords)) + continue + + num_segments = int(np.ceil(geom.length / max_length_meters)) + segment_length = geom.length / num_segments + + for i in range(num_segments): + start_dist = i * segment_length + end_dist = min((i + 1) * segment_length, geom.length) + segment = shapely.ops.substring(geom, start_dist, end_dist) + all_segments.append(segment) + + return all_segments + + +def split_polygon_by_grid(polygon: Polygon, cell_size: float) -> List[Polygon]: + """ + Split a polygon by grid-like cells of given size. + TODO: Move and rename for general use. + """ + + minx, miny, maxx, maxy = polygon.bounds + + # Generate all grid cells + x_coords = np.arange(minx, maxx, cell_size) + y_coords = np.arange(miny, maxy, cell_size) + + grid_cells = [box(x, y, x + cell_size, y + cell_size) for x in x_coords for y in y_coords] + + # Build spatial index for fast queries + tree = STRtree(grid_cells) + + # Query cells that potentially intersect + candidate_indices = tree.query(polygon, predicate="intersects") + + cells = [] + for idx in candidate_indices: + cell = grid_cells[idx] + intersection = polygon.intersection(cell) + + if intersection.is_empty: + continue + + if intersection.geom_type == "Polygon": + cells.append(intersection) + elif intersection.geom_type == "MultiPolygon": + cells.extend(intersection.geoms) + + return cells diff --git a/src/py123d/conversion/utils/map_utils/road_edge/road_edge_3d_utils.py b/src/py123d/conversion/utils/map_utils/road_edge/road_edge_3d_utils.py new file mode 100644 index 00000000..42d01faf --- /dev/null +++ b/src/py123d/conversion/utils/map_utils/road_edge/road_edge_3d_utils.py @@ -0,0 +1,384 @@ +import logging +from collections import defaultdict +from typing import Dict, List, Set + +import networkx as nx +import numpy as np +import numpy.typing as npt +import shapely +import shapely.geometry as geom + +from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import get_road_edge_linear_rings +from py123d.datatypes.maps.abstract_map_objects import ( + AbstractCarpark, + AbstractGenericDrivable, + AbstractLane, + AbstractLaneGroup, + AbstractSurfaceMapObject, + MapObjectIDType, +) +from py123d.geometry import Point3DIndex +from py123d.geometry.occupancy_map import OccupancyMap2D +from py123d.geometry.polyline import Polyline3D + +logger = logging.getLogger(__name__) + + +def get_road_edges_3d_from_drivable_surfaces( + lanes: List[AbstractLane], + lane_groups: List[AbstractLaneGroup], + car_parks: List[AbstractCarpark], + generic_drivables: List[AbstractGenericDrivable], +) -> List[Polyline3D]: + """Generates 3D road edges from drivable surfaces, i.e., lane groups, car parks, and generic drivables. + This method merges polygons in 2D and lifts them to 3D using the boundaries/outlines of elements. + Conflicting lane groups (e.g., bridges) are merged/lifted separately to ensure correct Z-values. + + :param lanes: A list of lanes in the map. + :param lane_groups: A list of lane groups in the map. + :param car_parks: A list of car parks in the map. + :param generic_drivables: A list of generic drivable areas in the map. + :return: A list of 3D interpolatable polylines representing the road edges. + """ + + # 1. Find conflicting lane groups, e.g. groups of lanes that overlap in 2D but have different Z-values (bridges) + conflicting_lane_groups = _get_conflicting_lane_groups(lane_groups, lanes) + + # 2. Extract road edges in 2D (including conflicting lane groups) + drivable_polygons: List[shapely.Polygon] = [] + for map_surface in lane_groups + car_parks + generic_drivables: + map_surface: AbstractSurfaceMapObject + drivable_polygons.append(map_surface.shapely_polygon) + road_edges_2d = get_road_edge_linear_rings(drivable_polygons) + + # 3. Collect 3D boundaries of non-conflicting lane groups and other drivable areas + non_conflicting_boundaries: List[Polyline3D] = [] + for lane_group in lane_groups: + lane_group_id = lane_group.object_id + if lane_group_id not in conflicting_lane_groups.keys(): + non_conflicting_boundaries.append(lane_group.left_boundary) + non_conflicting_boundaries.append(lane_group.right_boundary) + for drivable_surface in car_parks + generic_drivables: + non_conflicting_boundaries.append(drivable_surface.outline) + + # 4. Lift road edges to 3D using the boundaries of non-conflicting elements + non_conflicting_road_edges = lift_road_edges_to_3d(road_edges_2d, non_conflicting_boundaries) + + # 5. Add road edges from conflicting lane groups + resolved_road_edges = _resolve_conflicting_lane_groups(conflicting_lane_groups, lane_groups) + + all_road_edges = non_conflicting_road_edges + resolved_road_edges + + return all_road_edges + + +def _get_conflicting_lane_groups( + lane_groups: List[AbstractLaneGroup], lanes: List[AbstractLane], z_threshold: float = 5.0 +) -> Dict[int, List[int]]: + """Identifies conflicting lane groups based on their 2D footprints and Z-values. + The z-values are inferred from the centerlines of the lanes within each lane group. + + :param lane_groups: List of all lane groups in the map. + :param lanes: List of all lanes in the map. + :param z_threshold: Z-value threshold over which a 2D overlap is considered a conflict. + :return: A dictionary mapping lane group IDs to conflicting lane IDs. + """ + + # Convert to regular dictionaries for simpler access + lane_group_dict: Dict[MapObjectIDType, AbstractLaneGroup] = { + lane_group.object_id: lane_group for lane_group in lane_groups + } + lane_centerline_dict: Dict[MapObjectIDType, Polyline3D] = {lane.object_id: lane.centerline for lane in lanes} + + # Pre-compute all centerlines + centerlines_cache: Dict[MapObjectIDType, npt.NDArray[np.float64]] = {} + polygons: List[geom.Polygon] = [] + ids: List[MapObjectIDType] = [] + + for lane_group_id, lane_group in lane_group_dict.items(): + centerlines = [lane_centerline_dict[lane_id].array for lane_id in lane_group.lane_ids] + centerlines_3d_array = np.concatenate(centerlines, axis=0) + + centerlines_cache[lane_group_id] = centerlines_3d_array + polygons.append(lane_group.shapely_polygon) + ids.append(lane_group_id) + + occupancy_map = OccupancyMap2D(polygons, ids) + conflicting_lane_groups: Dict[MapObjectIDType, List[MapObjectIDType]] = defaultdict(list) + processed_pairs = set() + + for i, lane_group_id in enumerate(ids): + lane_group_polygon = polygons[i] + lane_group_centerlines = centerlines_cache[lane_group_id] + + # Get all intersecting geometries at once + intersecting_ids = occupancy_map.intersects(lane_group_polygon) + intersecting_ids.remove(lane_group_id) + + for intersecting_id in intersecting_ids: + pair_key = tuple(sorted([lane_group_id, intersecting_id])) + if pair_key in processed_pairs: + continue + processed_pairs.add(pair_key) + + intersecting_geometry = occupancy_map[intersecting_id] + if intersecting_geometry.geom_type != "Polygon": + continue + try: + # Compute actual intersection for better centroid + intersection = lane_group_polygon.intersection(intersecting_geometry) + except shapely.errors.GEOSException as e: + logger.debug(f"Error computing intersection for {pair_key}: {e}") + continue + + if intersection.is_empty: + continue + + # NOTE @DanielDauner: We query the centroid of the intersection polygon to get a representative point + # We cannot calculate the Z-difference at any area, e.g. due to arcs or complex shapes of bridges. + intersection_centroid = np.array(intersection.centroid.coords[0], dtype=np.float64) + intersecting_centerlines = centerlines_cache[intersecting_id] + + z_at_intersecting = _get_nearest_z_from_points_3d(intersecting_centerlines, intersection_centroid) + z_at_lane_group = _get_nearest_z_from_points_3d(lane_group_centerlines, intersection_centroid) + if np.abs(z_at_lane_group - z_at_intersecting) >= z_threshold: + conflicting_lane_groups[lane_group_id].append(intersecting_id) + conflicting_lane_groups[intersecting_id].append(lane_group_id) + + return conflicting_lane_groups + + +def lift_road_edges_to_3d( + road_edges_2d: List[shapely.LinearRing], + boundaries: List[Polyline3D], + max_distance: float = 0.01, +) -> List[Polyline3D]: + """Lift 2D road edges to 3D by querying elevation from boundary segments. + + :param road_edges_2d: List of 2D road edge geometries. + :param boundaries: List of 3D boundary geometries. + :param max_distance: Maximum 2D distance for edge-boundary association. + :return: List of lifted 3D road edge geometries. + """ + + road_edges_3d: List[Polyline3D] = [] + + if len(road_edges_2d) >= 1 and len(boundaries) >= 1: + + # 1. Build comprehensive spatial index with all boundary segments + # NOTE @DanielDauner: We split each boundary polyline into small segments. + # The spatial indexing uses axis-aligned bounding boxes, where small geometries lead to better performance. + boundary_segments = [] + for boundary in boundaries: + coords = boundary.array.reshape(-1, 1, 3) + segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1) + boundary_segments.append(segment_coords_boundary) + + boundary_segments = np.concatenate(boundary_segments, axis=0) + boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments) + occupancy_map = OccupancyMap2D(boundary_segment_linestrings) + + for linear_ring in road_edges_2d: + points_2d = np.array(linear_ring.coords, dtype=np.float64) + points_3d = np.zeros((len(points_2d), len(Point3DIndex)), dtype=np.float64) + points_3d[..., Point3DIndex.XY] = points_2d + + # 3. Batch query for all points + query_points = shapely.creation.points(points_2d) + results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True) + + for query_idx, geometry_idx in zip(*results): + query_point = query_points[query_idx] + segment_coords = boundary_segments[geometry_idx] + best_z = _interpolate_z_on_segment(query_point, segment_coords) + points_3d[query_idx, Point3DIndex.Z] = best_z + + continuous_segments = _split_continuous_segments(np.array(results[0])) + for segment_indices in continuous_segments: + if len(segment_indices) >= 2: + segment_points = points_3d[segment_indices] + road_edges_3d.append(Polyline3D.from_array(segment_points)) + + return road_edges_3d + + +def lift_outlines_to_3d( + outlines_2d: List[shapely.LinearRing], + boundaries: List[Polyline3D], + max_distance: float = 10.0, +) -> List[Polyline3D]: + """Lift 2D outlines to 3D by querying elevation from boundary segments. + + :param outlines_2d: List of 2D outline geometries. + :param boundaries: List of 3D boundary geometries. + :param max_distance: Maximum 2D distance for outline-boundary association. + :return: List of lifted 3D outline geometries. + """ + + outlines_3d: List[Polyline3D] = [] + if len(outlines_2d) >= 1 and len(boundaries) >= 1: + boundary_segments = [] + for boundary in boundaries: + coords = boundary.array.reshape(-1, 1, 3) + segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1) + boundary_segments.append(segment_coords_boundary) + + boundary_segments = np.concatenate(boundary_segments, axis=0) + boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments) + occupancy_map = OccupancyMap2D(boundary_segment_linestrings) + + for linear_ring in outlines_2d: + points_2d = np.array(linear_ring.coords, dtype=np.float64) + points_3d = np.zeros((len(points_2d), len(Point3DIndex)), dtype=np.float64) + points_3d[..., Point3DIndex.XY] = points_2d + + # 3. Batch query for all points + query_points = shapely.creation.points(points_2d) + results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True) + + found_nearest = np.zeros(len(points_2d), dtype=bool) + for query_idx, geometry_idx in zip(*results): + query_point = query_points[query_idx] + segment_coords = boundary_segments[geometry_idx] + best_z = _interpolate_z_on_segment(query_point, segment_coords) + points_3d[query_idx, Point3DIndex.Z] = best_z + found_nearest[query_idx] = True + + if not np.all(found_nearest): + logger.warning("Some outline points could not find a nearest boundary segment for Z-lifting.") + points_3d[~found_nearest, Point3DIndex.Z] = np.mean(points_3d[found_nearest, Point3DIndex.Z]) + + outlines_3d.append(Polyline3D.from_array(points_3d)) + + return outlines_3d + + +def _resolve_conflicting_lane_groups( + conflicting_lane_groups: Dict[MapObjectIDType, List[MapObjectIDType]], + lane_groups: List[AbstractLaneGroup], +) -> List[Polyline3D]: + """Resolve conflicting lane groups by merging their geometries. + + :param conflicting_lane_groups: A dictionary mapping lane group IDs to their conflicting lane group IDs. + :param lane_groups: A list of all lane groups. + :return: A list of merged 3D road edge geometries. + """ + + # Helper dictionary for easy access to lane group data + lane_group_dict: Dict[MapObjectIDType, AbstractLaneGroup] = { + lane_group.object_id: lane_group for lane_group in lane_groups + } + + # NOTE @DanielDauner: A non-conflicting set has overlapping lane groups separated into different layers (e.g., bridges). + # For each non-conflicting set, we can repeat the process of merging polygons in 2D and lifting to 3D. + # For edge-continuity, we include the neighboring lane groups (predecessors and successors) as well in the 2D merging + # but only use the original lane group boundaries for lifting to 3D. + + # Split conflicting lane groups into non-conflicting sets for further merging + non_conflicting_sets = _create_non_conflicting_sets(conflicting_lane_groups) + + road_edges_3d: List[Polyline3D] = [] + for non_conflicting_set in non_conflicting_sets: + + # Collect 2D polygons of non-conflicting lane group set and their neighbors + merge_lane_group_data: Dict[MapObjectIDType, geom.Polygon] = {} + for lane_group_id in non_conflicting_set: + merge_lane_group_data[lane_group_id] = lane_group_dict[lane_group_id].shapely_polygon + for neighbor_id in ( + lane_group_dict[lane_group_id].predecessor_ids + lane_group_dict[lane_group_id].successor_ids + ): + merge_lane_group_data[neighbor_id] = lane_group_dict[neighbor_id].shapely_polygon + + # Get 2D road edge linestrings for the non-conflicting set + set_road_edges_2d = get_road_edge_linear_rings(list(merge_lane_group_data.values())) + + # Collect 3D boundaries only of non-conflicting lane groups + set_boundaries_3d: List[Polyline3D] = [] + for lane_group_id in non_conflicting_set: + set_boundaries_3d.append(lane_group_dict[lane_group_id].left_boundary) + set_boundaries_3d.append(lane_group_dict[lane_group_id].right_boundary) + + # Lift road edges to 3D using the boundaries of non-conflicting lane groups + lifted_road_edges_3d = lift_road_edges_to_3d(set_road_edges_2d, set_boundaries_3d) + road_edges_3d.extend(lifted_road_edges_3d) + + return road_edges_3d + + +def _get_nearest_z_from_points_3d(points_3d: npt.NDArray[np.float64], query_point: npt.NDArray[np.float64]) -> float: + """Helpers function to get the Z-value of the nearest 3D point to a query point.""" + assert points_3d.ndim == 2 and points_3d.shape[1] == len( + Point3DIndex + ), "points_3d must be a 2D array with shape (N, 3)" + distances = np.linalg.norm(points_3d[..., Point3DIndex.XY] - query_point[..., Point3DIndex.XY], axis=1) + closest_point = points_3d[np.argmin(distances)] + return closest_point[2] + + +def _interpolate_z_on_segment(point: shapely.Point, segment_coords: npt.NDArray[np.float64]) -> float: + """Helpers function to interpolate the Z-value on a 3D segment given a 2D point.""" + p1, p2 = segment_coords[0], segment_coords[1] + + # Project point onto segment + segment_vec = p2[:2] - p1[:2] + point_vec = np.array([point.x, point.y]) - p1[:2] + + # Handle degenerate case + segment_length_sq = np.dot(segment_vec, segment_vec) + if segment_length_sq == 0: + return p1[2] + + # Calculate projection parameter + t = np.dot(point_vec, segment_vec) / segment_length_sq + t = np.clip(t, 0, 1) # Clamp to segment bounds + + # Interpolate Z + return p1[2] + t * (p2[2] - p1[2]) + + +def _split_continuous_segments(indices: npt.NDArray[np.int64]) -> List[npt.NDArray[np.int64]]: + """Helper function to find continuous segments in a list of indices.""" + if len(indices) == 0: + return [] + + # Find breaks in continuity + breaks = np.where(np.diff(indices) != 1)[0] + 1 + segments = np.split(indices, breaks) + + # Filter segments with at least 2 points + return [seg for seg in segments if len(seg) >= 2] + + +def _create_non_conflicting_sets(conflicts: Dict[MapObjectIDType, List[MapObjectIDType]]) -> List[Set[MapObjectIDType]]: + """Helper function to create non-conflicting sets from a conflict dictionary.""" + + # NOTE @DanielDauner: The conflict problem is a graph coloring problem. Map objects are nodes, conflicts are edges. + # https://en.wikipedia.org/wiki/Graph_coloring + + # Create graph from conflicts + G = nx.Graph() + for idx, conflict_list in conflicts.items(): + for conflict_idx in conflict_list: + G.add_edge(idx, conflict_idx) + + result: List[Set[MapObjectIDType]] = [] + + # Process each connected component + for component in nx.connected_components(G): + subgraph = G.subgraph(component) + + # Try bipartite coloring first + if nx.is_bipartite(subgraph): + sets = nx.bipartite.sets(subgraph) + result.extend([set(s) for s in sets]) + else: + # Fall back to greedy coloring for non-bipartite graphs + coloring = nx.greedy_color(subgraph, strategy="largest_first") + color_groups = {} + for node, color in coloring.items(): + if color not in color_groups: + color_groups[color] = set() + color_groups[color].add(node) + result.extend(color_groups.values()) + + return result diff --git a/src/py123d/conversion/utils/sensor_utils/camera_conventions.py b/src/py123d/conversion/utils/sensor_utils/camera_conventions.py new file mode 100644 index 00000000..75837932 --- /dev/null +++ b/src/py123d/conversion/utils/sensor_utils/camera_conventions.py @@ -0,0 +1,95 @@ +""" +Default Camera Coordinate System in 123D: + + -Y (up) /| H + | / | e + | / | i + | / | g + | / | h + | | | t + O────────────|──●──|──────────── +Z (forward), aka. optical/principal axis + / | / h + / | / t + / | / d + / | / i ++X (right) |/ W + +We use COLMAP/OpenCV convention (+Z forward, -Y up, +X right), + abbreviated as "pZmYpX" for the forward-up-right axes. + +Other common conventions include, for forward-up-right axes. + - (+X forward, +Z up, -Y right), "pXpZmY", e.g. Waymo Open Dataset + +NOTE: This file should be extended if other conventions are needed in the future. +""" + +from enum import Enum +from typing import Union + +import numpy as np + +from py123d.geometry import StateSE3 + + +class CameraConvention(Enum): + """Camera coordinate system conventions + p/m: positive/negative + X/Y/Z: axes directions + order: forward, up, right + + Example: pZmYpX means +Z forward, -Y up, +X right + + TODO: Figure out a more intuitive naming scheme. + """ + + pZmYpX = "pZmYpX" # Default in 123D (OpenCV/COLMAP) + pXpZmY = "pXpZmY" # e.g. Waymo Open Dataset + + +def convert_camera_convention( + from_pose: StateSE3, + from_convention: Union[CameraConvention, str], + to_convention: Union[CameraConvention, str], +) -> StateSE3: + """Convert camera pose between different conventions. + 123D default is pZmYpX (+Z forward, -Y up, +X right). + + :param from_pose: StateSE3 representing the camera pose to convert + :param from_convention: CameraConvention representing the current convention of the pose + :param to_convention: CameraConvention representing the target convention to convert to + :return: StateSE3 representing the converted camera pose + """ + # TODO: Write tests for this function + # TODO: Create function over batch/array of poses + + if isinstance(from_convention, str): + from_convention = CameraConvention(from_convention) + if isinstance(to_convention, str): + to_convention = CameraConvention(to_convention) + + if from_convention == to_convention: + return from_pose + + flip_matrices = { + (CameraConvention.pXpZmY, CameraConvention.pZmYpX): np.array( + [ + [0.0, -1.0, 0.0], # new X = old -Y (right) + [0.0, 0.0, -1.0], # new Y = old -Z (down) + [1.0, 0.0, 0.0], # new Z = old X (forward) + ], + dtype=np.float64, + ).T, + (CameraConvention.pZmYpX, CameraConvention.pXpZmY): np.array( + [ + [0.0, 0.0, 1.0], # new X = old Z (right) + [-1.0, 0.0, 0.0], # new Y = old -X (down) + [0.0, -1.0, 0.0], # new Z = old -Y (forward) + ], + dtype=np.float64, + ).T, + } + + pose_transformation = from_pose.transformation_matrix.copy() + F = flip_matrices[(from_convention, to_convention)] + pose_transformation[:3, :3] = pose_transformation[:3, :3] @ F + return StateSE3.from_transformation_matrix(pose_transformation) diff --git a/d123/common/datatypes/__init__.py b/src/py123d/datatypes/__init__.py similarity index 100% rename from d123/common/datatypes/__init__.py rename to src/py123d/datatypes/__init__.py diff --git a/d123/dataset/dataset_specific/__init__.py b/src/py123d/datatypes/detections/__init__.py similarity index 100% rename from d123/dataset/dataset_specific/__init__.py rename to src/py123d/datatypes/detections/__init__.py diff --git a/d123/common/datatypes/detection/detection.py b/src/py123d/datatypes/detections/box_detections.py similarity index 53% rename from d123/common/datatypes/detection/detection.py rename to src/py123d/datatypes/detections/box_detections.py index a836fd4f..64ebb851 100644 --- a/d123/common/datatypes/detection/detection.py +++ b/src/py123d/datatypes/detections/box_detections.py @@ -1,25 +1,26 @@ from dataclasses import dataclass from functools import cached_property -from typing import Iterable +from typing import List, Optional, Union import shapely -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.geometry.base import StateSE2, StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2, BoundingBoxSE3 -from d123.common.geometry.occupancy_map import OccupancyMap2D -from d123.common.geometry.vector import Vector2D, Vector3D -from d123.common.utils.enums import SerialIntEnum +from py123d.conversion.registry.box_detection_label_registry import BoxDetectionLabel, DefaultBoxDetectionLabel +from py123d.datatypes.time.time_point import TimePoint +from py123d.geometry import BoundingBoxSE2, BoundingBoxSE3, OccupancyMap2D, StateSE2, StateSE3, Vector2D, Vector3D @dataclass class BoxDetectionMetadata: - detection_type: DetectionType - timepoint: TimePoint + label: BoxDetectionLabel track_token: str - confidence: float | None = None + confidence: Optional[float] = None + num_lidar_points: Optional[int] = None + timepoint: Optional[TimePoint] = None + + @property + def default_label(self) -> DefaultBoxDetectionLabel: + return self.label.to_default() @dataclass @@ -27,7 +28,7 @@ class BoxDetectionSE2: metadata: BoxDetectionMetadata bounding_box_se2: BoundingBoxSE2 - velocity: Vector2D | None = None + velocity: Optional[Vector2D] = None @property def shapely_polygon(self) -> shapely.geometry.Polygon: @@ -47,7 +48,7 @@ class BoxDetectionSE3: metadata: BoxDetectionMetadata bounding_box_se3: BoundingBoxSE3 - velocity: Vector3D | None = None + velocity: Optional[Vector3D] = None @property def shapely_polygon(self) -> shapely.geometry.Polygon: @@ -78,13 +79,13 @@ def box_detection_se2(self) -> BoxDetectionSE2: ) -BoxDetection = BoxDetectionSE2 | BoxDetectionSE3 +BoxDetection = Union[BoxDetectionSE2, BoxDetectionSE3] @dataclass class BoxDetectionWrapper: - box_detections: list[BoxDetection] + box_detections: List[BoxDetection] def __getitem__(self, index: int) -> BoxDetection: return self.box_detections[index] @@ -95,11 +96,8 @@ def __len__(self) -> int: def __iter__(self): return iter(self.box_detections) - def get_box_detections_by_types(self, detection_types: Iterable[DetectionType]) -> list[BoxDetection]: - return [detection for detection in self.box_detections if detection.metadata.detection_type in detection_types] - - def get_detection_by_track_token(self, track_token: str) -> BoxDetection | None: - box_detection: BoxDetection | None = None + def get_detection_by_track_token(self, track_token: str) -> Optional[BoxDetection]: + box_detection: Optional[BoxDetection] = None for detection in self.box_detections: if detection.metadata.track_token == track_token: box_detection = detection @@ -111,45 +109,3 @@ def occupancy_map(self) -> OccupancyMap2D: ids = [detection.metadata.track_token for detection in self.box_detections] geometries = [detection.bounding_box.shapely_polygon for detection in self.box_detections] return OccupancyMap2D(geometries=geometries, ids=ids) - - -class TrafficLightStatus(SerialIntEnum): - """ - Enum for TrafficLightStatus. - """ - - GREEN = 0 - YELLOW = 1 - RED = 2 - OFF = 3 - UNKNOWN = 4 - - -@dataclass -class TrafficLightDetection: - - timepoint: TimePoint - lane_id: int - status: TrafficLightStatus - - -@dataclass -class TrafficLightDetectionWrapper: - traffic_light_detections: list[TrafficLightDetection] - - def __getitem__(self, index: int) -> TrafficLightDetection: - return self.traffic_light_detections[index] - - def __len__(self) -> int: - return len(self.traffic_light_detections) - - def __iter__(self): - return iter(self.traffic_light_detections) - - def get_detection_by_lane_id(self, lane_id: int) -> TrafficLightDetection | None: - traffic_light_detection: TrafficLightDetection | None = None - for detection in self.traffic_light_detections: - if int(detection.lane_id) == int(lane_id): - traffic_light_detection = detection - break - return traffic_light_detection diff --git a/src/py123d/datatypes/detections/traffic_light_detections.py b/src/py123d/datatypes/detections/traffic_light_detections.py new file mode 100644 index 00000000..c4f1c57a --- /dev/null +++ b/src/py123d/datatypes/detections/traffic_light_detections.py @@ -0,0 +1,48 @@ +from dataclasses import dataclass +from typing import List, Optional + +from py123d.common.utils.enums import SerialIntEnum +from py123d.datatypes.time.time_point import TimePoint + + +class TrafficLightStatus(SerialIntEnum): + """ + Enum for TrafficLightStatus. + """ + + GREEN = 0 + YELLOW = 1 + RED = 2 + OFF = 3 + UNKNOWN = 4 + + +@dataclass +class TrafficLightDetection: + + lane_id: int + status: TrafficLightStatus + timepoint: Optional[TimePoint] = None + + +@dataclass +class TrafficLightDetectionWrapper: + + traffic_light_detections: List[TrafficLightDetection] + + def __getitem__(self, index: int) -> TrafficLightDetection: + return self.traffic_light_detections[index] + + def __len__(self) -> int: + return len(self.traffic_light_detections) + + def __iter__(self): + return iter(self.traffic_light_detections) + + def get_detection_by_lane_id(self, lane_id: int) -> Optional[TrafficLightDetection]: + traffic_light_detection: Optional[TrafficLightDetection] = None + for detection in self.traffic_light_detections: + if int(detection.lane_id) == int(lane_id): + traffic_light_detection = detection + break + return traffic_light_detection diff --git a/d123/dataset/maps/abstract_map.py b/src/py123d/datatypes/maps/abstract_map.py similarity index 86% rename from d123/dataset/maps/abstract_map.py rename to src/py123d/datatypes/maps/abstract_map.py index 681c90cf..3aee0e99 100644 --- a/d123/dataset/maps/abstract_map.py +++ b/src/py123d/datatypes/maps/abstract_map.py @@ -5,9 +5,10 @@ import shapely -from d123.common.geometry.base import Point2D -from d123.dataset.maps.abstract_map_objects import AbstractMapObject -from d123.dataset.maps.map_datatypes import MapLayer +from py123d.datatypes.maps.abstract_map_objects import AbstractMapObject +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.geometry import Point2D # TODO: # - add docstrings @@ -18,9 +19,8 @@ class AbstractMap(abc.ABC): - @property @abc.abstractmethod - def map_name(self) -> str: + def get_map_metadata(self) -> MapMetadata: pass @abc.abstractmethod @@ -82,3 +82,7 @@ def query_nearest( exclusive: bool = False, ) -> Dict[MapLayer, Union[List[AbstractMapObject], Dict[int, List[AbstractMapObject]]]]: pass + + @property + def location(self) -> str: + return self.get_map_metadata().location diff --git a/d123/dataset/maps/abstract_map_objects.py b/src/py123d/datatypes/maps/abstract_map_objects.py similarity index 61% rename from d123/dataset/maps/abstract_map_objects.py rename to src/py123d/datatypes/maps/abstract_map_objects.py index 96ce79de..baea8d87 100644 --- a/d123/dataset/maps/abstract_map_objects.py +++ b/src/py123d/datatypes/maps/abstract_map_objects.py @@ -1,13 +1,18 @@ from __future__ import annotations import abc -from typing import List, Optional, Tuple +from typing import List, Optional, Tuple, Union import shapely.geometry as geom import trimesh +from typing_extensions import TypeAlias -from d123.common.geometry.line.polylines import Polyline2D, Polyline3D, PolylineSE2 -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from py123d.geometry import Polyline2D, Polyline3D, PolylineSE2 + +# TODO: Refactor and just use int +# type MapObjectIDType = Union[str, int] for Python >= 3.12 +MapObjectIDType: TypeAlias = Union[str, int] class AbstractMapObject(abc.ABC): @@ -15,12 +20,12 @@ class AbstractMapObject(abc.ABC): Base interface representation of all map objects. """ - def __init__(self, object_id: str): + def __init__(self, object_id: MapObjectIDType): """ Constructor of the base map object type. :param object_id: unique identifier of the map object. """ - self.id = str(object_id) + self.object_id: MapObjectIDType = object_id @property @abc.abstractmethod @@ -46,10 +51,10 @@ def shapely_polygon(self) -> geom.Polygon: @property @abc.abstractmethod - def outline_3d(self) -> Polyline3D: + def outline(self) -> Union[Polyline2D, Polyline3D]: """ - Returns the 3D outline of the map object. - :return: 3D polyline + Returns the 2D or 3D outline of the map surface, if available. + :return: 2D or 3D polyline """ @property @@ -60,19 +65,49 @@ def trimesh_mesh(self) -> trimesh.Trimesh: :return: Trimesh """ + @property + def outline_3d(self) -> Polyline3D: + """Returns the 3D outline of the map surface, or converts 2D to 3D if necessary. + + :return: 3D polyline + """ + if isinstance(self.outline, Polyline3D): + return self.outline + # Converts 2D polyline to 3D by adding a default (zero) z-coordinate + return Polyline3D.from_linestring(self.outline.linestring) + + @property def outline_2d(self) -> Polyline2D: - return self.outline_3d.polyline_2d + """Returns the 2D outline of the map surface, or converts 3D to 2D if necessary. + + :return: 2D polyline + """ + if isinstance(self.outline, Polyline2D): + return self.outline + # Converts 3D polyline to 2D by dropping the z-coordinate + return self.outline.polyline_2d class AbstractLineMapObject(AbstractMapObject): @property @abc.abstractmethod + def polyline(self) -> Union[Polyline2D, Polyline3D]: + """ + Returns the polyline of the road edge, either 2D or 3D. + :return: polyline + """ + + @property def polyline_3d(self) -> Polyline3D: """ Returns the 3D polyline of the road edge. :return: 3D polyline """ + if isinstance(self.polyline, Polyline3D): + return self.polyline + # Converts 2D polyline to 3D by adding a default (zero) z-coordinate + return Polyline3D.from_linestring(self.polyline.linestring) @property def polyline_2d(self) -> Polyline2D: @@ -80,7 +115,10 @@ def polyline_2d(self) -> Polyline2D: Returns the 2D polyline of the road line. :return: 2D polyline """ - return self.polyline_3d.polyline_2d + if isinstance(self.polyline, Polyline2D): + return self.polyline + # Converts 3D polyline to 2D by dropping the z-coordinate + return self.polyline.polyline_2d @property def polyline_se2(self) -> PolylineSE2: @@ -88,7 +126,15 @@ def polyline_se2(self) -> PolylineSE2: Returns the 2D polyline of the road line in SE(2) coordinates. :return: 2D polyline in SE(2) """ - return self.polyline_3d.polyline_se2 + return self.polyline_2d.polyline_se2 + + @property + def shapely_linestring(self) -> geom.LineString: + """ + Returns the shapely linestring of the line, either 2D or 3D. + :return: shapely linestring + """ + return self.polyline.linestring class AbstractLane(AbstractSurfaceMapObject): @@ -106,6 +152,14 @@ def speed_limit_mps(self) -> Optional[float]: :return: float or none """ + @property + @abc.abstractmethod + def successor_ids(self) -> List[MapObjectIDType]: + """ + Property of succeeding lane object ids (front). + :return: list of lane ids + """ + @property @abc.abstractmethod def successors(self) -> List[AbstractLane]: @@ -114,6 +168,14 @@ def successors(self) -> List[AbstractLane]: :return: list of lane class """ + @property + @abc.abstractmethod + def predecessor_ids(self) -> List[MapObjectIDType]: + """ + Property of preceding lane object ids (behind). + :return: list of lane ids + """ + @property @abc.abstractmethod def predecessors(self) -> List[AbstractLane]: @@ -138,6 +200,14 @@ def right_boundary(self) -> Polyline3D: :return: returns 3D polyline """ + @property + @abc.abstractmethod + def left_lane_id(self) -> Optional[MapObjectIDType]: + """ + Property of left lane id of lane. + :return: returns left lane id or none, if no left lane + """ + @property @abc.abstractmethod def left_lane(self) -> Optional[AbstractLane]: @@ -146,6 +216,14 @@ def left_lane(self) -> Optional[AbstractLane]: :return: returns left lane or none, if no left lane """ + @property + @abc.abstractmethod + def right_lane_id(self) -> Optional[MapObjectIDType]: + """ + Property of right lane id of lane. + :return: returns right lane id or none, if no right lane + """ + @property @abc.abstractmethod def right_lane(self) -> Optional[AbstractLane]: @@ -162,6 +240,14 @@ def centerline(self) -> Polyline3D: :return: returns 3D polyline """ + @property + @abc.abstractmethod + def lane_group_id(self) -> AbstractLaneGroup: + """ + Property of lane group id of lane. + :return: returns lane group id + """ + @property @abc.abstractmethod def lane_group(self) -> AbstractLaneGroup: @@ -186,20 +272,36 @@ class AbstractLaneGroup(AbstractSurfaceMapObject): def layer(self) -> MapLayer: return MapLayer.LANE_GROUP + @property + @abc.abstractmethod + def successor_ids(self) -> List[MapObjectIDType]: + """ + Property of succeeding lane object ids (front). + :return: list of lane group ids + """ + @property @abc.abstractmethod def successors(self) -> List[AbstractLaneGroup]: """ - Property of succeeding lane objects (front). - :return: list of lane class + Property of succeeding lane group objects (front). + :return: list of lane group class + """ + + @property + @abc.abstractmethod + def predecessor_ids(self) -> List[MapObjectIDType]: + """ + Property of preceding lane object ids (behind). + :return: list of lane group ids """ @property @abc.abstractmethod def predecessors(self) -> List[AbstractLaneGroup]: """ - Property of preceding lane objects (behind). - :return: list of lane class + Property of preceding lane group objects (behind). + :return: list of lane group class """ @property @@ -218,6 +320,14 @@ def right_boundary(self) -> Polyline3D: :return: returns 3D polyline """ + @property + @abc.abstractmethod + def lane_ids(self) -> List[MapObjectIDType]: + """ + Property of interior lane ids of a lane group. + :return: returns list of lane ids + """ + @property @abc.abstractmethod def lanes(self) -> List[AbstractLane]: @@ -226,6 +336,14 @@ def lanes(self) -> List[AbstractLane]: :return: returns list of lanes """ + @property + @abc.abstractmethod + def intersection_id(self) -> Optional[MapObjectIDType]: + """ + Property of intersection id of a lane group. + :return: returns intersection id or none, if lane group not in intersection + """ + @property @abc.abstractmethod def intersection(self) -> Optional[AbstractIntersection]: @@ -242,6 +360,14 @@ class AbstractIntersection(AbstractSurfaceMapObject): def layer(self) -> MapLayer: return MapLayer.INTERSECTION + @property + @abc.abstractmethod + def lane_group_ids(self) -> List[MapObjectIDType]: + """ + Property of lane group ids of intersection. + :return: returns list of lane group ids + """ + @property @abc.abstractmethod def lane_groups(self) -> List[AbstractLaneGroup]: diff --git a/d123/dataset/dataset_specific/carla/__init__.py b/src/py123d/datatypes/maps/cache/__init__.py similarity index 100% rename from d123/dataset/dataset_specific/carla/__init__.py rename to src/py123d/datatypes/maps/cache/__init__.py diff --git a/src/py123d/datatypes/maps/cache/cache_map_objects.py b/src/py123d/datatypes/maps/cache/cache_map_objects.py new file mode 100644 index 00000000..498c01c1 --- /dev/null +++ b/src/py123d/datatypes/maps/cache/cache_map_objects.py @@ -0,0 +1,311 @@ +from __future__ import annotations + +from typing import List, Optional, Union + +import numpy as np +import shapely.geometry as geom +import trimesh + +from py123d.datatypes.maps.abstract_map_objects import ( + AbstractCarpark, + AbstractCrosswalk, + AbstractGenericDrivable, + AbstractIntersection, + AbstractLane, + AbstractLaneGroup, + AbstractLineMapObject, + AbstractRoadEdge, + AbstractRoadLine, + AbstractSurfaceMapObject, + AbstractWalkway, + MapObjectIDType, +) +from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from py123d.geometry import Polyline3D +from py123d.geometry.polyline import Polyline2D + + +class CacheSurfaceObject(AbstractSurfaceMapObject): + """ + Base interface representation of all map objects. + """ + + def __init__( + self, + object_id: MapObjectIDType, + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ) -> None: + super().__init__(object_id) + + assert outline is not None or geometry is not None, "Either outline or geometry must be provided." + + if outline is None: + outline = Polyline3D.from_linestring(geometry.exterior) + + if geometry is None: + geometry = geom.Polygon(outline.array[:, :2]) + + self._outline = outline + self._geometry = geometry + + outline = property(lambda self: self._outline) + + @property + def shapely_polygon(self) -> geom.Polygon: + """Inherited, see superclass.""" + return self._geometry + + @property + def outline_3d(self) -> Polyline3D: + """Inherited, see superclass.""" + if isinstance(self.outline, Polyline3D): + return self.outline + # Converts 2D polyline to 3D by adding a default (zero) z-coordinate + return Polyline3D.from_linestring(self.outline.linestring) + + @property + def trimesh_mesh(self) -> trimesh.Trimesh: + """Inherited, see superclass.""" + raise NotImplementedError + + +class CacheLineObject(AbstractLineMapObject): + + def __init__(self, object_id: MapObjectIDType, polyline: Union[Polyline2D, Polyline3D]) -> None: + """ + Constructor of the base line map object type. + :param object_id: unique identifier of a line map object. + """ + super().__init__(object_id) + self._polyline = polyline + + polyline = property(lambda self: self._polyline) + + +class CacheLane(CacheSurfaceObject, AbstractLane): + + def __init__( + self, + object_id: MapObjectIDType, + lane_group_id: MapObjectIDType, + left_boundary: Polyline3D, + right_boundary: Polyline3D, + centerline: Polyline3D, + left_lane_id: Optional[MapObjectIDType] = None, + right_lane_id: Optional[MapObjectIDType] = None, + predecessor_ids: List[MapObjectIDType] = [], + successor_ids: List[MapObjectIDType] = [], + speed_limit_mps: Optional[float] = None, + outline: Optional[Polyline3D] = None, + geometry: Optional[geom.Polygon] = None, + ) -> None: + + if outline is None: + outline_array = np.vstack( + ( + left_boundary.array, + right_boundary.array[::-1], + left_boundary.array[0], + ) + ) + outline = Polyline3D.from_linestring(geom.LineString(outline_array)) + + super().__init__(object_id, outline, geometry) + + self._lane_group_id = lane_group_id + self._left_boundary = left_boundary + self._right_boundary = right_boundary + self._centerline = centerline + self._left_lane_id = left_lane_id + self._right_lane_id = right_lane_id + self._predecessor_ids = predecessor_ids + self._successor_ids = successor_ids + self._speed_limit_mps = speed_limit_mps + + lane_group_id = property(lambda self: self._lane_group_id) + left_boundary = property(lambda self: self._left_boundary) + right_boundary = property(lambda self: self._right_boundary) + centerline = property(lambda self: self._centerline) + left_lane_id = property(lambda self: self._left_lane_id) + right_lane_id = property(lambda self: self._right_lane_id) + predecessor_ids = property(lambda self: self._predecessor_ids) + successor_ids = property(lambda self: self._successor_ids) + speed_limit_mps = property(lambda self: self._speed_limit_mps) + + @property + def layer(self) -> MapLayer: + """Inherited, see superclass.""" + return MapLayer.LANE + + @property + def successors(self) -> List[AbstractLane]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def predecessors(self) -> List[AbstractLane]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def left_lane(self) -> Optional[AbstractLane]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def right_lane(self) -> Optional[AbstractLane]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def lane_group(self) -> AbstractLaneGroup: + """Inherited, see superclass.""" + raise NotImplementedError + + +class CacheLaneGroup(CacheSurfaceObject, AbstractLaneGroup): + def __init__( + self, + object_id: MapObjectIDType, + lane_ids: List[MapObjectIDType], + left_boundary: Polyline3D, + right_boundary: Polyline3D, + intersection_id: Optional[MapObjectIDType] = None, + predecessor_ids: List[MapObjectIDType] = [], + successor_ids: List[MapObjectIDType] = [], + outline: Optional[Polyline3D] = None, + geometry: Optional[geom.Polygon] = None, + ): + if outline is None: + outline_array = np.vstack( + ( + left_boundary.array, + right_boundary.array[::-1], + left_boundary.array[0], + ) + ) + outline = Polyline3D.from_linestring(geom.LineString(outline_array)) + super().__init__(object_id, outline, geometry) + + self._lane_ids = lane_ids + self._left_boundary = left_boundary + self._right_boundary = right_boundary + self._intersection_id = intersection_id + self._predecessor_ids = predecessor_ids + self._successor_ids = successor_ids + + layer = property(lambda self: MapLayer.LANE_GROUP) + lane_ids = property(lambda self: self._lane_ids) + intersection_id = property(lambda self: self._intersection_id) + predecessor_ids = property(lambda self: self._predecessor_ids) + successor_ids = property(lambda self: self._successor_ids) + left_boundary = property(lambda self: self._left_boundary) + right_boundary = property(lambda self: self._right_boundary) + + @property + def successors(self) -> List[AbstractLaneGroup]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def predecessors(self) -> List[AbstractLaneGroup]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def lanes(self) -> List[AbstractLane]: + """Inherited, see superclass.""" + raise NotImplementedError + + @property + def intersection(self) -> Optional[AbstractIntersection]: + """Inherited, see superclass.""" + raise NotImplementedError + + +class CacheIntersection(CacheSurfaceObject, AbstractIntersection): + def __init__( + self, + object_id: MapObjectIDType, + lane_group_ids: List[MapObjectIDType], + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ): + + super().__init__(object_id, outline, geometry) + self._lane_group_ids = lane_group_ids + + layer = property(lambda self: MapLayer.INTERSECTION) + lane_group_ids = property(lambda self: self._lane_group_ids) + + @property + def lane_groups(self) -> List[CacheLaneGroup]: + """Inherited, see superclass.""" + raise NotImplementedError + + +class CacheCrosswalk(CacheSurfaceObject, AbstractCrosswalk): + def __init__( + self, + object_id: MapObjectIDType, + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ): + super().__init__(object_id, outline, geometry) + + +class CacheCarpark(CacheSurfaceObject, AbstractCarpark): + def __init__( + self, + object_id: MapObjectIDType, + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ): + super().__init__(object_id, outline, geometry) + + +class CacheWalkway(CacheSurfaceObject, AbstractWalkway): + def __init__( + self, + object_id: MapObjectIDType, + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ): + super().__init__(object_id, outline, geometry) + + +class CacheGenericDrivable(CacheSurfaceObject, AbstractGenericDrivable): + def __init__( + self, + object_id: MapObjectIDType, + outline: Optional[Union[Polyline2D, Polyline3D]] = None, + geometry: Optional[geom.Polygon] = None, + ): + super().__init__(object_id, outline, geometry) + + +class CacheRoadEdge(CacheLineObject, AbstractRoadEdge): + def __init__( + self, + object_id: MapObjectIDType, + road_edge_type: RoadEdgeType, + polyline: Union[Polyline2D, Polyline3D], + ): + super().__init__(object_id, polyline) + self._road_edge_type = road_edge_type + + road_edge_type = property(lambda self: self._road_edge_type) + + +class CacheRoadLine(CacheLineObject, AbstractRoadLine): + def __init__( + self, + object_id: MapObjectIDType, + road_line_type: RoadLineType, + polyline: Union[Polyline2D, Polyline3D], + ): + super().__init__(object_id, polyline) + self._road_line_type = road_line_type + + road_line_type = property(lambda self: self._road_line_type) diff --git a/d123/dataset/dataset_specific/nuplan/__init__.py b/src/py123d/datatypes/maps/gpkg/__init__.py similarity index 100% rename from d123/dataset/dataset_specific/nuplan/__init__.py rename to src/py123d/datatypes/maps/gpkg/__init__.py diff --git a/d123/dataset/maps/gpkg/gpkg_map.py b/src/py123d/datatypes/maps/gpkg/gpkg_map.py similarity index 90% rename from d123/dataset/maps/gpkg/gpkg_map.py rename to src/py123d/datatypes/maps/gpkg/gpkg_map.py index 32b02d41..acc1cfbb 100644 --- a/d123/dataset/maps/gpkg/gpkg_map.py +++ b/src/py123d/datatypes/maps/gpkg/gpkg_map.py @@ -1,20 +1,18 @@ from __future__ import annotations -import os import warnings from collections import defaultdict from functools import lru_cache from pathlib import Path -from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union +from typing import Callable, Dict, Final, Iterable, List, Optional, Tuple, Union import geopandas as gpd import shapely import shapely.geometry as geom -from d123.common.geometry.base import Point2D -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.maps.abstract_map_objects import AbstractMapObject -from d123.dataset.maps.gpkg.gpkg_map_objects import ( +from py123d.datatypes.maps.abstract_map import AbstractMap +from py123d.datatypes.maps.abstract_map_objects import AbstractMapObject +from py123d.datatypes.maps.gpkg.gpkg_map_objects import ( GPKGCarpark, GPKGCrosswalk, GPKGGenericDrivable, @@ -25,10 +23,14 @@ GPKGRoadLine, GPKGWalkway, ) -from d123.dataset.maps.gpkg.utils import load_gdf_with_geometry_columns -from d123.dataset.maps.map_datatypes import MapLayer +from py123d.datatypes.maps.gpkg.gpkg_utils import load_gdf_with_geometry_columns +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.geometry import Point2D +from py123d.script.utils.dataset_path_utils import get_dataset_paths USE_ARROW: bool = True +MAX_LRU_CACHED_TABLES: Final[int] = 128 # TODO: add to some configs class GPKGMap(AbstractMap): @@ -49,11 +51,7 @@ def __init__(self, file_path: Path) -> None: # loaded during `.initialize()` self._gpd_dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} - - @property - def map_name(self) -> str: - """Inherited, see superclass.""" - return self._file_path.with_suffix("").name + self._map_metadata: Optional[MapMetadata] = None def initialize(self) -> None: """Inherited, see superclass.""" @@ -67,7 +65,7 @@ def initialize(self) -> None: ) load_gdf_with_geometry_columns( self._gpd_dataframes[map_layer], - geometry_column_names=["baseline_path", "right_boundary", "left_boundary", "outline"], + geometry_column_names=["centerline", "right_boundary", "left_boundary", "outline"], ) # TODO: remove the temporary fix and enforce consistent id types in the GPKG files if "id" in self._gpd_dataframes[map_layer].columns: @@ -76,6 +74,10 @@ def initialize(self) -> None: warnings.warn(f"GPKGMap: {map_layer_name} not available in {str(self._file_path)}") self._gpd_dataframes[map_layer] = None + assert "map_metadata" in list(gpd.list_layers(self._file_path).name) + metadata_gdf = gpd.read_file(self._file_path, layer="map_metadata", use_arrow=USE_ARROW) + self._map_metadata = MapMetadata.from_dict(metadata_gdf.iloc[0].to_dict()) + def _assert_initialize(self) -> None: "Checks if `.initialize()` was called, before retrieving data." assert len(self._gpd_dataframes) > 0, "GPKGMap: Call `.initialize()` before retrieving data!" @@ -84,6 +86,9 @@ def _assert_layer_available(self, layer: MapLayer) -> None: "Checks if layer is available." assert layer in self.get_available_map_objects(), f"GPKGMap: MapLayer {layer.name} is unavailable." + def get_map_metadata(self): + return self._map_metadata + def get_available_map_objects(self) -> List[MapLayer]: """Inherited, see superclass.""" self._assert_initialize() @@ -370,21 +375,20 @@ def _get_road_line(self, id: str) -> Optional[GPKGRoadLine]: # return list(map_object_ids) -@lru_cache(maxsize=32) -def get_map_api_from_names(dataset: str, location: str) -> GPKGMap: - D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) - gpkg_path = D123_MAPS_ROOT / f"{dataset}_{location}.gpkg" - assert gpkg_path.is_file(), f"{dataset}_{location}.gpkg not found in {str(D123_MAPS_ROOT)}." +@lru_cache(maxsize=MAX_LRU_CACHED_TABLES) +def get_global_map_api(dataset: str, location: str) -> GPKGMap: + PY123D_MAPS_ROOT: Path = Path(get_dataset_paths().py123d_maps_root) + gpkg_path = PY123D_MAPS_ROOT / dataset / f"{dataset}_{location}.gpkg" + assert gpkg_path.is_file(), f"{dataset}_{location}.gpkg not found in {str(PY123D_MAPS_ROOT)}." map_api = GPKGMap(gpkg_path) map_api.initialize() return map_api def get_local_map_api(split_name: str, log_name: str) -> GPKGMap: - print(split_name, log_name) - D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) - gpkg_path = D123_MAPS_ROOT / split_name / f"{log_name}.gpkg" - assert gpkg_path.is_file(), f"{log_name}.gpkg not found in {str(D123_MAPS_ROOT)}." + PY123D_MAPS_ROOT: Path = Path(get_dataset_paths().py123d_maps_root) + gpkg_path = PY123D_MAPS_ROOT / split_name / f"{log_name}.gpkg" + assert gpkg_path.is_file(), f"{log_name}.gpkg not found in {str(PY123D_MAPS_ROOT)}." map_api = GPKGMap(gpkg_path) map_api.initialize() return map_api diff --git a/d123/dataset/maps/gpkg/gpkg_map_objects.py b/src/py123d/datatypes/maps/gpkg/gpkg_map_objects.py similarity index 69% rename from d123/dataset/maps/gpkg/gpkg_map_objects.py rename to src/py123d/datatypes/maps/gpkg/gpkg_map_objects.py index 1fa6de6a..97b11d73 100644 --- a/d123/dataset/maps/gpkg/gpkg_map_objects.py +++ b/src/py123d/datatypes/maps/gpkg/gpkg_map_objects.py @@ -2,7 +2,7 @@ import ast from functools import cached_property -from typing import List, Optional +from typing import List, Optional, Union import geopandas as gpd import numpy as np @@ -10,10 +10,7 @@ import shapely.geometry as geom import trimesh -from d123.common.geometry.base import Point3DIndex -from d123.common.geometry.line.polylines import Polyline3D -from d123.common.visualization.viser.utils import get_trimesh_from_boundaries -from d123.dataset.maps.abstract_map_objects import ( +from py123d.datatypes.maps.abstract_map_objects import ( AbstractCarpark, AbstractCrosswalk, AbstractGenericDrivable, @@ -25,9 +22,12 @@ AbstractRoadLine, AbstractSurfaceMapObject, AbstractWalkway, + MapObjectIDType, ) -from d123.dataset.maps.gpkg.utils import get_row_with_value -from d123.dataset.maps.map_datatypes import RoadEdgeType, RoadLineType +from py123d.datatypes.maps.gpkg.gpkg_utils import get_row_with_value, get_trimesh_from_boundaries +from py123d.datatypes.maps.map_datatypes import RoadEdgeType, RoadLineType +from py123d.geometry import Point3DIndex, Polyline3D +from py123d.geometry.polyline import Polyline2D class GPKGSurfaceObject(AbstractSurfaceMapObject): @@ -35,7 +35,7 @@ class GPKGSurfaceObject(AbstractSurfaceMapObject): Base interface representation of all map objects. """ - def __init__(self, object_id: str, surface_df: gpd.GeoDataFrame) -> None: + def __init__(self, object_id: MapObjectIDType, surface_df: gpd.GeoDataFrame) -> None: """ Constructor of the base surface map object type. :param object_id: unique identifier of a surface map object. @@ -51,10 +51,10 @@ def shapely_polygon(self) -> geom.Polygon: @cached_property def _object_row(self) -> gpd.GeoSeries: - return get_row_with_value(self._object_df, "id", self.id) + return get_row_with_value(self._object_df, "id", self.object_id) - @cached_property - def outline_3d(self) -> Polyline3D: + @property + def outline(self) -> Polyline3D: """Inherited, see superclass.""" outline_3d: Optional[Polyline3D] = None if "outline" in self._object_df.columns: @@ -92,7 +92,7 @@ def trimesh_mesh(self) -> trimesh.Trimesh: class GPKGLineObject(AbstractLineMapObject): - def __init__(self, object_id: str, line_df: gpd.GeoDataFrame) -> None: + def __init__(self, object_id: MapObjectIDType, line_df: gpd.GeoDataFrame) -> None: """ Constructor of the base line map object type. :param object_id: unique identifier of a line map object. @@ -103,10 +103,10 @@ def __init__(self, object_id: str, line_df: gpd.GeoDataFrame) -> None: @cached_property def _object_row(self) -> gpd.GeoSeries: - return get_row_with_value(self._object_df, "id", self.id) + return get_row_with_value(self._object_df, "id", self.object_id) @property - def polyline_3d(self) -> Polyline3D: + def polyline(self) -> Union[Polyline2D, Polyline3D]: """Inherited, see superclass.""" return Polyline3D.from_linestring(self._object_row.geometry) @@ -114,7 +114,7 @@ def polyline_3d(self) -> Polyline3D: class GPKGLane(GPKGSurfaceObject, AbstractLane): def __init__( self, - object_id: str, + object_id: MapObjectIDType, object_df: gpd.GeoDataFrame, lane_group_df: gpd.GeoDataFrame, intersection_df: gpd.GeoDataFrame, @@ -128,17 +128,25 @@ def speed_limit_mps(self) -> Optional[float]: """Inherited, see superclass.""" return self._object_row.speed_limit_mps + @property + def successor_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.successor_ids) + @property def successors(self) -> List[GPKGLane]: """Inherited, see superclass.""" - successor_ids = ast.literal_eval(self._object_row.successor_ids) - return [GPKGLane(lane_id, self._object_df) for lane_id in successor_ids] + return [GPKGLane(lane_id, self._object_df) for lane_id in self.successor_ids] + + @property + def predecessor_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.predecessor_ids) @property def predecessors(self) -> List[GPKGLane]: """Inherited, see superclass.""" - predecessor_ids = ast.literal_eval(self._object_row.predecessor_ids) - return [GPKGLane(lane_id, self._object_df) for lane_id in predecessor_ids] + return [GPKGLane(lane_id, self._object_df) for lane_id in self.predecessor_ids] @property def left_boundary(self) -> Polyline3D: @@ -150,30 +158,38 @@ def right_boundary(self) -> Polyline3D: """Inherited, see superclass.""" return Polyline3D.from_linestring(self._object_row.right_boundary) + @property + def left_lane_id(self) -> Optional[MapObjectIDType]: + """ "Inherited, see superclass.""" + return self._object_row.left_lane_id + @property def left_lane(self) -> Optional[GPKGLane]: """Inherited, see superclass.""" - left_lane_id = self._object_row.left_lane_id return ( - GPKGLane(left_lane_id, self._object_df, self._lane_group_df, self._intersection_df) - if left_lane_id is not None and not pd.isna(left_lane_id) + GPKGLane(self.left_lane_id, self._object_df, self._lane_group_df, self._intersection_df) + if self.left_lane_id is not None and not pd.isna(self.left_lane_id) else None ) + @property + def right_lane_id(self) -> Optional[MapObjectIDType]: + """Inherited, see superclass.""" + return self._object_row.right_lane_id + @property def right_lane(self) -> Optional[GPKGLane]: """Inherited, see superclass.""" - right_lane_id = self._object_row.right_lane_id return ( - GPKGLane(right_lane_id, self._object_df, self._lane_group_df, self._intersection_df) - if right_lane_id is not None and not pd.isna(right_lane_id) + GPKGLane(self.right_lane_id, self._object_df, self._lane_group_df, self._intersection_df) + if self.right_lane_id is not None and not pd.isna(self.right_lane_id) else None ) @property def centerline(self) -> Polyline3D: """Inherited, see superclass.""" - return Polyline3D.from_linestring(self._object_row.baseline_path) + return Polyline3D.from_linestring(self._object_row.centerline) @property def outline_3d(self) -> Polyline3D: @@ -182,12 +198,16 @@ def outline_3d(self) -> Polyline3D: outline_array = np.vstack((outline_array, outline_array[0])) return Polyline3D.from_linestring(geom.LineString(outline_array)) + @property + def lane_group_id(self) -> MapObjectIDType: + """Inherited, see superclass.""" + return self._object_row.lane_group_id + @property def lane_group(self) -> GPKGLaneGroup: """Inherited, see superclass.""" - lane_group_id = self._object_row.lane_group_id return GPKGLaneGroup( - lane_group_id, + self.lane_group_id, self._lane_group_df, self._object_df, self._intersection_df, @@ -197,7 +217,7 @@ def lane_group(self) -> GPKGLaneGroup: class GPKGLaneGroup(GPKGSurfaceObject, AbstractLaneGroup): def __init__( self, - object_id: str, + object_id: MapObjectIDType, object_df: gpd.GeoDataFrame, lane_df: gpd.GeoDataFrame, intersection_df: gpd.GeoDataFrame, @@ -206,22 +226,30 @@ def __init__( self._lane_df = lane_df self._intersection_df = intersection_df + @property + def successor_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.successor_ids) + @property def successors(self) -> List[GPKGLaneGroup]: """Inherited, see superclass.""" - successor_ids = ast.literal_eval(self._object_row.successor_ids) return [ GPKGLaneGroup(lane_group_id, self._object_df, self._lane_df, self._intersection_df) - for lane_group_id in successor_ids + for lane_group_id in self.successor_ids ] + @property + def predecessor_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.predecessor_ids) + @property def predecessors(self) -> List[GPKGLaneGroup]: """Inherited, see superclass.""" - predecessor_ids = ast.literal_eval(self._object_row.predecessor_ids) return [ GPKGLaneGroup(lane_group_id, self._object_df, self._lane_df, self._intersection_df) - for lane_group_id in predecessor_ids + for lane_group_id in self.predecessor_ids ] @property @@ -240,10 +268,14 @@ def outline_3d(self) -> Polyline3D: outline_array = np.vstack((self.left_boundary.array, self.right_boundary.array[::-1])) return Polyline3D.from_linestring(geom.LineString(outline_array)) + @property + def lane_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.lane_ids) + @property def lanes(self) -> List[GPKGLane]: """Inherited, see superclass.""" - lane_ids = ast.literal_eval(self._object_row.lane_ids) return [ GPKGLane( lane_id, @@ -251,21 +283,25 @@ def lanes(self) -> List[GPKGLane]: self._object_df, self._intersection_df, ) - for lane_id in lane_ids + for lane_id in self.lane_ids ] + @property + def intersection_id(self) -> Optional[MapObjectIDType]: + """Inherited, see superclass.""" + return self._object_row.intersection_id + @property def intersection(self) -> Optional[GPKGIntersection]: """Inherited, see superclass.""" - intersection_id = self._object_row.intersection_id return ( GPKGIntersection( - intersection_id, + self.intersection_id, self._intersection_df, self._lane_df, self._object_df, ) - if intersection_id is not None and not pd.isna(intersection_id) + if self.intersection_id is not None and not pd.isna(self.intersection_id) else None ) @@ -273,7 +309,7 @@ def intersection(self) -> Optional[GPKGIntersection]: class GPKGIntersection(GPKGSurfaceObject, AbstractIntersection): def __init__( self, - object_id: str, + object_id: MapObjectIDType, object_df: gpd.GeoDataFrame, lane_df: gpd.GeoDataFrame, lane_group_df: gpd.GeoDataFrame, @@ -282,10 +318,14 @@ def __init__( self._lane_df = lane_df self._lane_group_df = lane_group_df + @property + def lane_group_ids(self) -> List[MapObjectIDType]: + """Inherited, see superclass.""" + return ast.literal_eval(self._object_row.lane_group_ids) + @property def lane_groups(self) -> List[GPKGLaneGroup]: """Inherited, see superclass.""" - lane_group_ids = ast.literal_eval(self._object_row.lane_group_ids) return [ GPKGLaneGroup( lane_group_id, @@ -293,37 +333,37 @@ def lane_groups(self) -> List[GPKGLaneGroup]: self._lane_df, self._object_df, ) - for lane_group_id in lane_group_ids + for lane_group_id in self.lane_group_ids ] class GPKGCrosswalk(GPKGSurfaceObject, AbstractCrosswalk): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) class GPKGCarpark(GPKGSurfaceObject, AbstractCarpark): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) class GPKGWalkway(GPKGSurfaceObject, AbstractWalkway): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) class GPKGGenericDrivable(GPKGSurfaceObject, AbstractGenericDrivable): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) class GPKGRoadEdge(GPKGLineObject, AbstractRoadEdge): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) @cached_property def _object_row(self) -> gpd.GeoSeries: - return get_row_with_value(self._object_df, "id", self.id) + return get_row_with_value(self._object_df, "id", self.object_id) @property def road_edge_type(self) -> RoadEdgeType: @@ -332,12 +372,12 @@ def road_edge_type(self) -> RoadEdgeType: class GPKGRoadLine(GPKGLineObject, AbstractRoadLine): - def __init__(self, object_id: str, object_df: gpd.GeoDataFrame): + def __init__(self, object_id: MapObjectIDType, object_df: gpd.GeoDataFrame): super().__init__(object_id, object_df) @cached_property def _object_row(self) -> gpd.GeoSeries: - return get_row_with_value(self._object_df, "id", self.id) + return get_row_with_value(self._object_df, "id", self.object_id) @property def road_line_type(self) -> RoadLineType: diff --git a/d123/dataset/maps/gpkg/utils.py b/src/py123d/datatypes/maps/gpkg/gpkg_utils.py similarity index 55% rename from d123/dataset/maps/gpkg/utils.py rename to src/py123d/datatypes/maps/gpkg/gpkg_utils.py index 8b359f75..54dd93e6 100644 --- a/d123/dataset/maps/gpkg/utils.py +++ b/src/py123d/datatypes/maps/gpkg/gpkg_utils.py @@ -2,8 +2,12 @@ import geopandas as gpd import numpy as np +import numpy.typing as npt +import trimesh from shapely import wkt +from py123d.geometry.polyline import Polyline3D + def load_gdf_with_geometry_columns(gdf: gpd.GeoDataFrame, geometry_column_names: List[str] = []): # TODO: refactor @@ -46,3 +50,41 @@ def get_row_with_value(elements: gpd.geodataframe.GeoDataFrame, column_label: st f"{len(matching_rows)} matching keys found. Expected to only find one." "Try using get_all_rows_with_value" ) return matching_rows.iloc[0] + + +def get_trimesh_from_boundaries( + left_boundary: Polyline3D, right_boundary: Polyline3D, resolution: float = 0.25 +) -> trimesh.Trimesh: + + def _interpolate_polyline(polyline_3d: Polyline3D, num_samples: int) -> npt.NDArray[np.float64]: + if num_samples < 2: + num_samples = 2 + distances = np.linspace(0, polyline_3d.length, num=num_samples, endpoint=True, dtype=np.float64) + return polyline_3d.interpolate(distances) + + average_length = (left_boundary.length + right_boundary.length) / 2 + num_samples = int(average_length // resolution) + 1 + left_boundary_array = _interpolate_polyline(left_boundary, num_samples) + right_boundary_array = _interpolate_polyline(right_boundary, num_samples) + return _create_lane_mesh_from_boundary_arrays(left_boundary_array, right_boundary_array) + + +def _create_lane_mesh_from_boundary_arrays( + left_boundary_array: npt.NDArray[np.float64], right_boundary_array: npt.NDArray[np.float64] +) -> trimesh.Trimesh: + + # Ensure both polylines have the same number of points + if left_boundary_array.shape[0] != right_boundary_array.shape[0]: + raise ValueError("Both polylines must have the same number of points") + + n_points = left_boundary_array.shape[0] + vertices = np.vstack([left_boundary_array, right_boundary_array]) + + faces = [] + for i in range(n_points - 1): + faces.append([i, i + n_points, i + 1]) + faces.append([i + 1, i + n_points, i + n_points + 1]) + + faces = np.array(faces) + mesh = trimesh.Trimesh(vertices=vertices, faces=faces) + return mesh diff --git a/d123/dataset/maps/map_datatypes.py b/src/py123d/datatypes/maps/map_datatypes.py similarity index 80% rename from d123/dataset/maps/map_datatypes.py rename to src/py123d/datatypes/maps/map_datatypes.py index f948422f..dc9e4820 100644 --- a/d123/dataset/maps/map_datatypes.py +++ b/src/py123d/datatypes/maps/map_datatypes.py @@ -1,8 +1,12 @@ from __future__ import annotations -from d123.common.utils.enums import SerialIntEnum +from py123d.common.utils.enums import SerialIntEnum # TODO: Add stop pads or stop lines. +# - Add type for stop zones. +# - Add type for carparks, e.g. outline, driveway (Waymo), or other types. +# - Check if intersections should have types. +# - Use consistent naming conventions unknown, undefined, none, etc. class MapLayer(SerialIntEnum): @@ -25,6 +29,7 @@ class MapLayer(SerialIntEnum): class LaneType(SerialIntEnum): """ Enum for LaneType. + NOTE: We use the lane types from Waymo. https://github.com/waymo-research/waymo-open-dataset/blob/99a4cb3ff07e2fe06c2ce73da001f850f628e45a/src/waymo_open_dataset/protos/map.proto#L147 """ @@ -49,8 +54,7 @@ class RoadEdgeType(SerialIntEnum): class RoadLineType(SerialIntEnum): """ Enum for RoadLineType. - NOTE: We use the road line types from Waymo. - TODO: Use the Argoverse 2 road line types instead. + TODO: Use the Argoverse 2 road line types. https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/protos/map.proto#L208 """ diff --git a/src/py123d/datatypes/maps/map_metadata.py b/src/py123d/datatypes/maps/map_metadata.py new file mode 100644 index 00000000..14fd13c8 --- /dev/null +++ b/src/py123d/datatypes/maps/map_metadata.py @@ -0,0 +1,27 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + +import py123d + +# TODO: Refactor the usage of the map map metadata in this repo. + + +@dataclass +class MapMetadata: + """Class to hold metadata information about a map.""" + + dataset: str + split: Optional[str] # None, if map is not per log + log_name: Optional[str] # None, if map is per log + location: str + map_has_z: bool + map_is_local: bool # True, if map is per log + version: str = str(py123d.__version__) + + def to_dict(self) -> dict: + return asdict(self) + + def from_dict(data_dict: Dict[str, Any]) -> MapMetadata: + return MapMetadata(**data_dict) diff --git a/d123/dataset/logs/__init__.py b/src/py123d/datatypes/scene/__init__.py similarity index 100% rename from d123/dataset/logs/__init__.py rename to src/py123d/datatypes/scene/__init__.py diff --git a/src/py123d/datatypes/scene/abstract_scene.py b/src/py123d/datatypes/scene/abstract_scene.py new file mode 100644 index 00000000..33611539 --- /dev/null +++ b/src/py123d/datatypes/scene/abstract_scene.py @@ -0,0 +1,116 @@ +from __future__ import annotations + +import abc +from typing import List, Optional + +from py123d.datatypes.detections.box_detections import BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper +from py123d.datatypes.maps.abstract_map import AbstractMap +from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters + + +class AbstractScene(abc.ABC): + + #################################################################################################################### + # Abstract Methods, to be implemented by subclasses + #################################################################################################################### + + @abc.abstractmethod + def get_log_metadata(self) -> LogMetadata: + raise NotImplementedError + + @abc.abstractmethod + def get_scene_extraction_metadata(self) -> SceneExtractionMetadata: + raise NotImplementedError + + @abc.abstractmethod + def get_map_api(self) -> Optional[AbstractMap]: + raise NotImplementedError + + @abc.abstractmethod + def get_timepoint_at_iteration(self, iteration: int) -> TimePoint: + raise NotImplementedError + + @abc.abstractmethod + def get_ego_state_at_iteration(self, iteration: int) -> Optional[EgoStateSE3]: + raise NotImplementedError + + @abc.abstractmethod + def get_box_detections_at_iteration(self, iteration: int) -> Optional[BoxDetectionWrapper]: + raise NotImplementedError + + @abc.abstractmethod + def get_traffic_light_detections_at_iteration(self, iteration: int) -> Optional[TrafficLightDetectionWrapper]: + raise NotImplementedError + + @abc.abstractmethod + def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: + raise NotImplementedError + + @abc.abstractmethod + def get_pinhole_camera_at_iteration( + self, iteration: int, camera_type: PinholeCameraType + ) -> Optional[PinholeCamera]: + raise NotImplementedError + + @abc.abstractmethod + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: + raise NotImplementedError + + @abc.abstractmethod + def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> Optional[LiDAR]: + raise NotImplementedError + + #################################################################################################################### + # Syntactic Sugar / Properties, for easier access to common attributes + #################################################################################################################### + + # 1. Log Metadata properties + @property + def log_metadata(self) -> LogMetadata: + return self.get_log_metadata() + + @property + def log_name(self) -> str: + return self.log_metadata.log_name + + @property + def vehicle_parameters(self) -> VehicleParameters: + return self.log_metadata.vehicle_parameters + + @property + def available_pinhole_camera_types(self) -> List[PinholeCameraType]: + return list(self.log_metadata.pinhole_camera_metadata.keys()) + + @property + def available_fisheye_mei_camera_types(self) -> List[FisheyeMEICameraType]: + return list(self.log_metadata.fisheye_mei_camera_metadata.keys()) + + @property + def available_lidar_types(self) -> List[LiDARType]: + return list(self.log_metadata.lidar_metadata.keys()) + + # 2. Scene Extraction Metadata properties + @property + def scene_extraction_metadata(self) -> SceneExtractionMetadata: + return self.get_scene_extraction_metadata() + + @property + def uuid(self) -> str: + return self.scene_extraction_metadata.initial_uuid + + @property + def number_of_iterations(self) -> int: + return self.scene_extraction_metadata.number_of_iterations + + @property + def number_of_history_iterations(self) -> int: + return self.scene_extraction_metadata.number_of_history_iterations diff --git a/src/py123d/datatypes/scene/abstract_scene_builder.py b/src/py123d/datatypes/scene/abstract_scene_builder.py new file mode 100644 index 00000000..17652549 --- /dev/null +++ b/src/py123d/datatypes/scene/abstract_scene_builder.py @@ -0,0 +1,22 @@ +import abc +from typing import Iterator + +from py123d.common.multithreading.worker_utils import WorkerPool +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.scene.scene_filter import SceneFilter + + +class SceneBuilder(abc.ABC): + """ + Abstract base class for building scenes from a dataset. + """ + + @abc.abstractmethod + def get_scenes(self, filter: SceneFilter, worker: WorkerPool) -> Iterator[AbstractScene]: + """ + Returns an iterator over scenes that match the given filter. + :param filter: SceneFilter object to filter the scenes. + :param worker: WorkerPool to parallelize the scene extraction. + :return: Iterator over AbstractScene objects. + """ + raise NotImplementedError diff --git a/d123/dataset/maps/gpkg/__init__.py b/src/py123d/datatypes/scene/arrow/__init__.py similarity index 100% rename from d123/dataset/maps/gpkg/__init__.py rename to src/py123d/datatypes/scene/arrow/__init__.py diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene.py b/src/py123d/datatypes/scene/arrow/arrow_scene.py new file mode 100644 index 00000000..ee26a5f4 --- /dev/null +++ b/src/py123d/datatypes/scene/arrow/arrow_scene.py @@ -0,0 +1,170 @@ +from pathlib import Path +from typing import List, Optional, Union + +import pyarrow as pa + +from py123d.common.utils.arrow_helper import get_lru_cached_arrow_table +from py123d.datatypes.detections.box_detections import BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper +from py123d.datatypes.maps.abstract_map import AbstractMap +from py123d.datatypes.maps.gpkg.gpkg_map import get_global_map_api, get_local_map_api +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.scene.arrow.utils.arrow_getters import ( + get_box_detections_from_arrow_table, + get_camera_from_arrow_table, + get_ego_vehicle_state_from_arrow_table, + get_lidar_from_arrow_table, + get_timepoint_from_arrow_table, + get_traffic_light_detections_from_arrow_table, +) +from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow +from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 + + +class ArrowScene(AbstractScene): + + def __init__( + self, + arrow_file_path: Union[Path, str], + scene_extraction_metadata: Optional[SceneExtractionMetadata] = None, + ) -> None: + + self._arrow_file_path: Path = Path(arrow_file_path) + self._log_metadata: LogMetadata = get_log_metadata_from_arrow(arrow_file_path) + + with pa.memory_map(str(self._arrow_file_path), "r") as source: + reader = pa.ipc.open_file(source) + table = reader.read_all() + num_rows = table.num_rows + initial_uuid = table["uuid"][0].as_py() + + if scene_extraction_metadata is None: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=initial_uuid, + initial_idx=0, + duration_s=self._log_metadata.timestep_seconds * num_rows, + history_s=0.0, + iteration_duration_s=self._log_metadata.timestep_seconds, + ) + self._scene_extraction_metadata: SceneExtractionMetadata = scene_extraction_metadata + + # NOTE: Lazy load a log-specific map API, and keep reference. + # Global maps are LRU cached internally. + self._local_map_api: Optional[AbstractMap] = None + + #################################################################################################################### + # Helpers for ArrowScene + #################################################################################################################### + + def __reduce__(self): + """Helper for pickling the object.""" + return ( + self.__class__, + ( + self._arrow_file_path, + self._scene_extraction_metadata, + ), + ) + + def _get_recording_table(self) -> pa.Table: + """Helper function to return an LRU cached reference to the arrow table.""" + return get_lru_cached_arrow_table(self._arrow_file_path) + + def _get_table_index(self, iteration: int) -> int: + assert -self.number_of_history_iterations <= iteration < self.number_of_iterations, "Iteration out of bounds" + table_index = self._scene_extraction_metadata.initial_idx + iteration + return table_index + + #################################################################################################################### + # Implementation of AbstractScene + #################################################################################################################### + + def get_log_metadata(self) -> LogMetadata: + return self._log_metadata + + def get_scene_extraction_metadata(self) -> SceneExtractionMetadata: + return self._scene_extraction_metadata + + def get_map_api(self) -> Optional[AbstractMap]: + map_api: Optional[AbstractMap] = None + if self.log_metadata.map_metadata is not None: + if self.log_metadata.map_metadata.map_is_local: + if self._local_map_api is None: + map_api = get_local_map_api(self.log_metadata.split, self.log_name) + self._local_map_api = map_api + else: + map_api = self._local_map_api + else: + map_api = get_global_map_api(self.log_metadata.dataset, self.log_metadata.location) + return map_api + + def get_timepoint_at_iteration(self, iteration: int) -> TimePoint: + return get_timepoint_from_arrow_table(self._get_recording_table(), self._get_table_index(iteration)) + + def get_ego_state_at_iteration(self, iteration: int) -> Optional[EgoStateSE3]: + return get_ego_vehicle_state_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + self.log_metadata.vehicle_parameters, + ) + + def get_box_detections_at_iteration(self, iteration: int) -> Optional[BoxDetectionWrapper]: + return get_box_detections_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + self.log_metadata, + ) + + def get_traffic_light_detections_at_iteration(self, iteration: int) -> Optional[TrafficLightDetectionWrapper]: + return get_traffic_light_detections_from_arrow_table( + self._get_recording_table(), self._get_table_index(iteration) + ) + + def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: + route_lane_group_ids: Optional[List[int]] = None + table = self._get_recording_table() + if "route_lane_group_ids" in table.column_names: + route_lane_group_ids = table["route_lane_group_ids"][self._get_table_index(iteration)].as_py() + return route_lane_group_ids + + def get_pinhole_camera_at_iteration( + self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] + ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + pinhole_camera: Optional[PinholeCamera] = None + if camera_type in self.available_pinhole_camera_types: + pinhole_camera = get_camera_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + camera_type, + self.log_metadata, + ) + return pinhole_camera + + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: + fisheye_mei_camera: Optional[FisheyeMEICamera] = None + if camera_type in self.available_fisheye_mei_camera_types: + fisheye_mei_camera = get_camera_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + camera_type, + self.log_metadata, + ) + return fisheye_mei_camera + + def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> Optional[LiDAR]: + lidar: Optional[LiDAR] = None + if lidar_type in self.available_lidar_types or lidar_type == LiDARType.LIDAR_MERGED: + lidar = get_lidar_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + lidar_type, + self.log_metadata, + ) + return lidar diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py new file mode 100644 index 00000000..2afebbba --- /dev/null +++ b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py @@ -0,0 +1,200 @@ +import random +from functools import partial +from pathlib import Path +from typing import Iterator, List, Optional, Set, Union + +from py123d.common.multithreading.worker_utils import WorkerPool, worker_map +from py123d.common.utils.arrow_helper import get_lru_cached_arrow_table +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.scene.abstract_scene_builder import SceneBuilder +from py123d.datatypes.scene.arrow.arrow_scene import ArrowScene +from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow +from py123d.datatypes.scene.scene_filter import SceneFilter +from py123d.datatypes.scene.scene_metadata import SceneExtractionMetadata +from py123d.script.utils.dataset_path_utils import get_dataset_paths + + +class ArrowSceneBuilder(SceneBuilder): + """ + A class to build a scene from a dataset. + """ + + def __init__( + self, + logs_root: Optional[Union[str, Path]] = None, + maps_root: Optional[Union[str, Path]] = None, + ): + if logs_root is None: + logs_root = get_dataset_paths().py123d_logs_root + if maps_root is None: + maps_root = get_dataset_paths().py123d_maps_root + + self._logs_root = Path(logs_root) + self._maps_root = Path(maps_root) + + def get_scenes(self, filter: SceneFilter, worker: WorkerPool) -> Iterator[AbstractScene]: + """See superclass.""" + + split_types = set(filter.split_types) if filter.split_types else {"train", "val", "test"} + split_names = ( + set(filter.split_names) if filter.split_names else _discover_split_names(self._logs_root, split_types) + ) + filter_log_names = set(filter.log_names) if filter.log_names else None + log_paths = _discover_log_paths(self._logs_root, split_names, filter_log_names) + if len(log_paths) == 0: + return [] + scenes = worker_map(worker, partial(_extract_scenes_from_logs, filter=filter), log_paths) + + if filter.shuffle: + random.shuffle(scenes) + + if filter.max_num_scenes is not None: + scenes = scenes[: filter.max_num_scenes] + + return scenes + + +def _discover_split_names(logs_root: Path, split_types: Set[str]) -> Set[str]: + assert set(split_types).issubset( + {"train", "val", "test"} + ), f"Invalid split types: {split_types}. Valid split types are 'train', 'val', 'test'." + split_names: List[str] = [] + for split in logs_root.iterdir(): + split_name = split.name + if split.is_dir() and split.name != "maps": + if any(split_type in split_name for split_type in split_types): + split_names.append(split_name) + + return split_names + + +def _discover_log_paths(logs_root: Path, split_names: Set[str], log_names: Optional[List[str]]) -> List[Path]: + log_paths: List[Path] = [] + for split_name in split_names: + for log_path in (logs_root / split_name).iterdir(): + if log_path.is_file() and log_path.name.endswith(".arrow"): + if log_names is None or log_path.stem in log_names: + log_paths.append(log_path) + return log_paths + + +def _extract_scenes_from_logs(log_paths: List[Path], filter: SceneFilter) -> List[AbstractScene]: + scenes: List[AbstractScene] = [] + for log_path in log_paths: + try: + scene_extraction_metadatas = _get_scene_extraction_metadatas(log_path, filter) + except Exception as e: + print(f"Error extracting scenes from {log_path}: {e}") + continue + for scene_extraction_metadata in scene_extraction_metadatas: + scenes.append( + ArrowScene( + arrow_file_path=log_path, + scene_extraction_metadata=scene_extraction_metadata, + ) + ) + return scenes + + +def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFilter) -> List[SceneExtractionMetadata]: + scene_extraction_metadatas: List[SceneExtractionMetadata] = [] + + recording_table = get_lru_cached_arrow_table(log_path) + log_metadata = get_log_metadata_from_arrow(log_path) + + start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 + end_idx = ( + len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) + if filter.duration_s is not None + else len(recording_table) + ) + + # 1. Filter location + if ( + filter.locations is not None + and log_metadata.map_metadata is not None + and log_metadata.map_metadata.location not in filter.locations + ): + pass + + elif filter.duration_s is None: + scene_extraction_metadatas.append( + SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][start_idx].as_py()), + initial_idx=start_idx, + duration_s=(end_idx - start_idx) * log_metadata.timestep_seconds, + history_s=filter.history_s if filter.history_s is not None else 0.0, + iteration_duration_s=log_metadata.timestep_seconds, + ) + ) + else: + scene_uuid_set = set(filter.scene_uuids) if filter.scene_uuids is not None else None + for idx in range(start_idx, end_idx): + scene_extraction_metadata: Optional[SceneExtractionMetadata] = None + + if scene_uuid_set is None: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) + elif str(recording_table["uuid"][idx]) in scene_uuid_set: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) + + if scene_extraction_metadata is not None: + # Check of timestamp threshold exceeded between previous scene, if specified in filter + if filter.timestamp_threshold_s is not None and len(scene_extraction_metadatas) > 0: + iteration_delta = idx - scene_extraction_metadatas[-1].initial_idx + if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: + continue + + scene_extraction_metadatas.append(scene_extraction_metadata) + + scene_extraction_metadatas_ = [] + for scene_extraction_metadata in scene_extraction_metadatas: + + add_scene = True + start_idx = scene_extraction_metadata.initial_idx + if filter.pinhole_camera_types is not None: + for pinhole_camera_type in filter.pinhole_camera_types: + column_name = f"{pinhole_camera_type.serialize()}_data" + + if ( + pinhole_camera_type in log_metadata.pinhole_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): + continue + else: + add_scene = False + break + + if filter.fisheye_mei_camera_types is not None: + for fisheye_mei_camera_type in filter.fisheye_mei_camera_types: + column_name = f"{fisheye_mei_camera_type.serialize()}_data" + + if ( + fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): + continue + else: + add_scene = False + break + + if add_scene: + scene_extraction_metadatas_.append(scene_extraction_metadata) + + scene_extraction_metadatas = scene_extraction_metadatas_ + + del recording_table, log_metadata + return scene_extraction_metadatas diff --git a/d123/dataset/scene/__init__.py b/src/py123d/datatypes/scene/arrow/utils/__init__.py similarity index 100% rename from d123/dataset/scene/__init__.py rename to src/py123d/datatypes/scene/arrow/utils/__init__.py diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py new file mode 100644 index 00000000..71c08154 --- /dev/null +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -0,0 +1,230 @@ +from pathlib import Path +from typing import Dict, List, Optional, Union + +import cv2 +import numpy as np +import numpy.typing as npt +import pyarrow as pa +from omegaconf import DictConfig + +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex +from py123d.conversion.sensor_io.camera.jpeg_camera_io import decode_image_from_jpeg_binary +from py123d.conversion.sensor_io.camera.mp4_camera_io import get_mp4_reader_from_path +from py123d.conversion.sensor_io.lidar.draco_lidar_io import load_lidar_from_draco_binary +from py123d.conversion.sensor_io.lidar.file_lidar_io import load_lidar_pcs_from_file +from py123d.conversion.sensor_io.lidar.laz_lidar_io import load_lidar_from_laz_binary +from py123d.datatypes.detections.box_detections import ( + BoxDetection, + BoxDetectionMetadata, + BoxDetectionSE3, + BoxDetectionWrapper, +) +from py123d.datatypes.detections.traffic_light_detections import ( + TrafficLightDetection, + TrafficLightDetectionWrapper, + TrafficLightStatus, +) +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters +from py123d.geometry import BoundingBoxSE3, StateSE3, Vector3D +from py123d.script.utils.dataset_path_utils import get_dataset_paths + +DATASET_PATHS: DictConfig = get_dataset_paths() +DATASET_SENSOR_ROOT: Dict[str, Path] = { + "av2-sensor": DATASET_PATHS.av2_sensor_data_root, + "nuplan": DATASET_PATHS.nuplan_sensor_root, + "nuscenes": DATASET_PATHS.nuscenes_data_root, + "wopd": DATASET_PATHS.wopd_data_root, + "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, +} + + +def get_timepoint_from_arrow_table(arrow_table: pa.Table, index: int) -> TimePoint: + return TimePoint.from_us(arrow_table["timestamp"][index].as_py()) + + +def get_ego_vehicle_state_from_arrow_table( + arrow_table: pa.Table, index: int, vehicle_parameters: VehicleParameters +) -> EgoStateSE3: + timepoint = get_timepoint_from_arrow_table(arrow_table, index) + return EgoStateSE3.from_array( + array=pa.array(arrow_table["ego_state"][index]).to_numpy(), + vehicle_parameters=vehicle_parameters, + timepoint=timepoint, + ) + + +def get_box_detections_from_arrow_table( + arrow_table: pa.Table, + index: int, + log_metadata: LogMetadata, +) -> BoxDetectionWrapper: + timepoint = get_timepoint_from_arrow_table(arrow_table, index) + box_detections: List[BoxDetection] = [] + box_detection_label_class = log_metadata.box_detection_label_class + + for detection_state, detection_velocity, detection_token, detection_label in zip( + arrow_table["box_detection_state"][index].as_py(), + arrow_table["box_detection_velocity"][index].as_py(), + arrow_table["box_detection_token"][index].as_py(), + arrow_table["box_detection_label"][index].as_py(), + ): + box_detection = BoxDetectionSE3( + metadata=BoxDetectionMetadata( + label=box_detection_label_class(detection_label), + timepoint=timepoint, + track_token=detection_token, + confidence=None, + ), + bounding_box_se3=BoundingBoxSE3.from_array(np.array(detection_state)), + velocity=Vector3D.from_array(np.array(detection_velocity)) if detection_velocity else None, + ) + box_detections.append(box_detection) + return BoxDetectionWrapper(box_detections=box_detections) + + +def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: int) -> TrafficLightDetectionWrapper: + timepoint = get_timepoint_from_arrow_table(arrow_table, index) + traffic_light_detections: Optional[List[TrafficLightDetection]] = None + + if "traffic_light_ids" in arrow_table.schema.names and "traffic_light_types" in arrow_table.schema.names: + traffic_light_detections: List[TrafficLightDetection] = [] + for lane_id, status in zip( + arrow_table["traffic_light_ids"][index].as_py(), + arrow_table["traffic_light_types"][index].as_py(), + ): + traffic_light_detection = TrafficLightDetection( + timepoint=timepoint, + lane_id=lane_id, + status=TrafficLightStatus(status), + ) + traffic_light_detections.append(traffic_light_detection) + + traffic_light_detections = TrafficLightDetectionWrapper(traffic_light_detections=traffic_light_detections) + + return traffic_light_detections + + +def get_camera_from_arrow_table( + arrow_table: pa.Table, + index: int, + camera_type: Union[PinholeCameraType, FisheyeMEICameraType], + log_metadata: LogMetadata, +) -> Union[PinholeCamera, FisheyeMEICamera]: + + camera_name = camera_type.serialize() + table_data = arrow_table[f"{camera_name}_data"][index].as_py() + extrinsic_values = arrow_table[f"{camera_name}_extrinsic"][index].as_py() + extrinsic = StateSE3.from_list(extrinsic_values) if extrinsic_values is not None else None + + if table_data is None or extrinsic is None: + return None + + image: Optional[npt.NDArray[np.uint8]] = None + + if isinstance(table_data, str): + sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] + assert sensor_root is not None, f"Dataset path for sensor loading not found for dataset: {log_metadata.dataset}" + full_image_path = Path(sensor_root) / table_data + assert full_image_path.exists(), f"Camera file not found: {full_image_path}" + + image = cv2.imread(str(full_image_path), cv2.IMREAD_COLOR) + image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + + elif isinstance(table_data, bytes): + image = decode_image_from_jpeg_binary(table_data) + elif isinstance(table_data, int): + image = _unoptimized_demo_mp4_read(log_metadata, camera_name, table_data) + else: + raise NotImplementedError( + f"Only string file paths, bytes, or int frame indices are supported for camera data, got {type(table_data)}" + ) + + if camera_name.startswith("fcam"): + camera_metadata = log_metadata.fisheye_mei_camera_metadata[camera_type] + return FisheyeMEICamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) + else: + camera_metadata = log_metadata.pinhole_camera_metadata[camera_type] + return PinholeCamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) + + +def get_lidar_from_arrow_table( + arrow_table: pa.Table, + index: int, + lidar_type: LiDARType, + log_metadata: LogMetadata, +) -> LiDAR: + + lidar: Optional[LiDAR] = None + lidar_column_name = f"{lidar_type.serialize()}_data" + lidar_column_name = ( + f"{LiDARType.LIDAR_MERGED.serialize()}_data" + if lidar_column_name not in arrow_table.schema.names + else lidar_column_name + ) + if lidar_column_name in arrow_table.schema.names: + + lidar_data = arrow_table[lidar_column_name][index].as_py() + if isinstance(lidar_data, str): + lidar_pc_dict = load_lidar_pcs_from_file(relative_path=lidar_data, log_metadata=log_metadata, index=index) + if lidar_type == LiDARType.LIDAR_MERGED: + # Merge all available LiDAR point clouds into one + merged_pc = np.vstack(list(lidar_pc_dict.values())) + lidar = LiDAR( + metadata=LiDARMetadata( + lidar_type=LiDARType.LIDAR_MERGED, + lidar_index=DefaultLiDARIndex, + extrinsic=None, + ), + point_cloud=merged_pc, + ) + elif lidar_type in lidar_pc_dict: + lidar = LiDAR( + metadata=log_metadata.lidar_metadata[lidar_type], + point_cloud=lidar_pc_dict[lidar_type], + ) + elif isinstance(lidar_data, bytes): + lidar_metadata = log_metadata.lidar_metadata[lidar_type] + if lidar_data.startswith(b"DRACO"): + # NOTE: DRACO only allows XYZ compression, so we need to override the lidar index here. + lidar_metadata.lidar_index = DefaultLiDARIndex + + lidar = load_lidar_from_draco_binary(lidar_data, lidar_metadata) + elif lidar_data.startswith(b"LASF"): + + lidar = load_lidar_from_laz_binary(lidar_data, lidar_metadata) + elif lidar_data is None: + lidar = None + else: + raise NotImplementedError( + f"Only string file paths or bytes for LiDAR data are supported, got {type(lidar_data)}" + ) + + return lidar + + +def _unoptimized_demo_mp4_read(log_metadata: LogMetadata, camera_name: str, frame_index: int) -> Optional[np.ndarray]: + """A quick and dirty MP4 reader for testing purposes only. Not optimized for performance.""" + image: Optional[npt.NDArray[np.uint8]] = None + + py123d_sensor_root = Path(DATASET_PATHS.py123d_sensors_root) + mp4_path = py123d_sensor_root / log_metadata.split / log_metadata.log_name / f"{camera_name}.mp4" + if mp4_path.exists(): + reader = get_mp4_reader_from_path(str(mp4_path)) + image = reader.get_frame(frame_index) + + return image diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_metadata_utils.py b/src/py123d/datatypes/scene/arrow/utils/arrow_metadata_utils.py new file mode 100644 index 00000000..3f264b62 --- /dev/null +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_metadata_utils.py @@ -0,0 +1,21 @@ +import json +from functools import lru_cache +from pathlib import Path +from typing import Union + +import pyarrow as pa + +from py123d.common.utils.arrow_helper import get_lru_cached_arrow_table +from py123d.datatypes.scene.scene_metadata import LogMetadata + + +@lru_cache(maxsize=10000) +def get_log_metadata_from_arrow(arrow_file_path: Union[Path, str]) -> LogMetadata: + table = get_lru_cached_arrow_table(arrow_file_path) + log_metadata = LogMetadata.from_dict(json.loads(table.schema.metadata[b"log_metadata"].decode())) + return log_metadata + + +def add_log_metadata_to_arrow_schema(schema: pa.schema, log_metadata: LogMetadata) -> pa.schema: + schema = schema.with_metadata({"log_metadata": json.dumps(log_metadata.to_dict())}) + return schema diff --git a/src/py123d/datatypes/scene/scene_filter.py b/src/py123d/datatypes/scene/scene_filter.py new file mode 100644 index 00000000..62ad9301 --- /dev/null +++ b/src/py123d/datatypes/scene/scene_filter.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from typing import List, Optional, Union + +from py123d.common.utils.enums import SerialIntEnum +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType + +# TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) + + +@dataclass +class SceneFilter: + + split_types: Optional[List[str]] = None + split_names: Optional[List[str]] = None + log_names: Optional[List[str]] = None + + locations: Optional[List[str]] = None # TODO: + scene_uuids: Optional[List[str]] = None # TODO: + + timestamp_threshold_s: Optional[float] = None # TODO: + ego_displacement_minimum_m: Optional[float] = None # TODO: + + duration_s: Optional[float] = 10.0 + history_s: Optional[float] = 3.0 + + pinhole_camera_types: Optional[List[PinholeCameraType]] = None + fisheye_mei_camera_types: Optional[List[FisheyeMEICameraType]] = None + + max_num_scenes: Optional[int] = None + shuffle: bool = False + + def __post_init__(self): + def _resolve_enum_arguments( + serial_enum_cls: SerialIntEnum, input: Optional[List[Union[int, str, SerialIntEnum]]] + ) -> List[SerialIntEnum]: + + if input is None: + return None + assert isinstance(input, list), f"input must be a list of {serial_enum_cls.__name__}" + return [serial_enum_cls.from_arbitrary(value) for value in input] + + self.pinhole_camera_types = _resolve_enum_arguments(PinholeCameraType, self.pinhole_camera_types) + self.fisheye_mei_camera_types = _resolve_enum_arguments(FisheyeMEICameraType, self.fisheye_mei_camera_types) diff --git a/src/py123d/datatypes/scene/scene_metadata.py b/src/py123d/datatypes/scene/scene_metadata.py new file mode 100644 index 00000000..2bc271f1 --- /dev/null +++ b/src/py123d/datatypes/scene/scene_metadata.py @@ -0,0 +1,111 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass, field +from typing import Dict, Optional, Type + +import py123d +from py123d.conversion.registry.box_detection_label_registry import BOX_DETECTION_LABEL_REGISTRY, BoxDetectionLabel +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraMetadata, PinholeCameraType +from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters + + +@dataclass +class LogMetadata: + + dataset: str + split: str + log_name: str + location: str + timestep_seconds: float + + vehicle_parameters: Optional[VehicleParameters] = None + box_detection_label_class: Optional[Type[BoxDetectionLabel]] = None + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = field(default_factory=dict) + fisheye_mei_camera_metadata: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = field(default_factory=dict) + lidar_metadata: Dict[LiDARType, LiDARMetadata] = field(default_factory=dict) + + map_metadata: Optional[MapMetadata] = None + version: str = str(py123d.__version__) + + @classmethod + def from_dict(cls, data_dict: Dict) -> LogMetadata: + + # Ego Vehicle Parameters + if data_dict["vehicle_parameters"] is not None: + data_dict["vehicle_parameters"] = VehicleParameters.from_dict(data_dict["vehicle_parameters"]) + + # Box detection label class specific to the dataset + if data_dict["box_detection_label_class"] in BOX_DETECTION_LABEL_REGISTRY: + data_dict["box_detection_label_class"] = BOX_DETECTION_LABEL_REGISTRY[ + data_dict["box_detection_label_class"] + ] + elif data_dict["box_detection_label_class"] is None: + data_dict["box_detection_label_class"] = None + else: + raise ValueError(f"Unknown box detection label class: {data_dict['box_detection_label_class']}") + + # Pinhole Camera Metadata + pinhole_camera_metadata = {} + for key, value in data_dict.get("pinhole_camera_metadata", {}).items(): + pinhole_camera_metadata[PinholeCameraType.deserialize(key)] = PinholeCameraMetadata.from_dict(value) + data_dict["pinhole_camera_metadata"] = pinhole_camera_metadata + + # Fisheye MEI Camera Metadata + fisheye_mei_camera_metadata = {} + for key, value in data_dict.get("fisheye_mei_camera_metadata", {}).items(): + fisheye_mei_camera_metadata[FisheyeMEICameraType.deserialize(key)] = FisheyeMEICameraMetadata.from_dict( + value + ) + data_dict["fisheye_mei_camera_metadata"] = fisheye_mei_camera_metadata + + # LiDAR Metadata + data_dict["lidar_metadata"] = { + LiDARType.deserialize(key): LiDARMetadata.from_dict(value) + for key, value in data_dict.get("lidar_metadata", {}).items() + } + + # Map Metadata + if data_dict["map_metadata"] is not None: + data_dict["map_metadata"] = MapMetadata.from_dict(data_dict["map_metadata"]) + + return LogMetadata(**data_dict) + + def to_dict(self) -> Dict: + data_dict = asdict(self) + data_dict["vehicle_parameters"] = self.vehicle_parameters.to_dict() if self.vehicle_parameters else None + if self.box_detection_label_class is not None: + data_dict["box_detection_label_class"] = self.box_detection_label_class.__name__ + data_dict["pinhole_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.pinhole_camera_metadata.items() + } + data_dict["fisheye_mei_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.fisheye_mei_camera_metadata.items() + } + data_dict["lidar_metadata"] = {key.serialize(): value.to_dict() for key, value in self.lidar_metadata.items()} + data_dict["map_metadata"] = self.map_metadata.to_dict() if self.map_metadata else None + return data_dict + + +@dataclass(frozen=True) +class SceneExtractionMetadata: + + initial_uuid: str + initial_idx: int + duration_s: float + history_s: float + iteration_duration_s: float + + @property + def number_of_iterations(self) -> int: + return round(self.duration_s / self.iteration_duration_s) + + @property + def number_of_history_iterations(self) -> int: + return round(self.history_s / self.iteration_duration_s) + + @property + def end_idx(self) -> int: + return self.initial_idx + self.number_of_iterations diff --git a/src/py123d/datatypes/sensors/__init__.py b/src/py123d/datatypes/sensors/__init__.py new file mode 100644 index 00000000..54cd70a1 --- /dev/null +++ b/src/py123d/datatypes/sensors/__init__.py @@ -0,0 +1,22 @@ +from py123d.datatypes.sensors.pinhole_camera import ( + PinholeCameraType, + PinholeCamera, + PinholeIntrinsicsIndex, + PinholeIntrinsics, + PinholeDistortionIndex, + PinholeDistortion, + PinholeCameraMetadata, +) +from py123d.datatypes.sensors.fisheye_mei_camera import ( + FisheyeMEICameraType, + FisheyeMEICamera, + FisheyeMEIDistortionIndex, + FisheyeMEIProjectionIndex, + FisheyeMEIProjection, + FisheyeMEICameraMetadata, +) +from py123d.datatypes.sensors.lidar import ( + LiDARType, + LiDARMetadata, + LiDAR, +) diff --git a/src/py123d/datatypes/sensors/fisheye_mei_camera.py b/src/py123d/datatypes/sensors/fisheye_mei_camera.py new file mode 100644 index 00000000..4a98afad --- /dev/null +++ b/src/py123d/datatypes/sensors/fisheye_mei_camera.py @@ -0,0 +1,197 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + +import numpy as np +import numpy.typing as npt +from zmq import IntEnum + +from py123d.common.utils.enums import SerialIntEnum +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.se import StateSE3 + + +class FisheyeMEICameraType(SerialIntEnum): + """ + Enum for fisheye cameras in d123. + """ + + FCAM_L = 0 + FCAM_R = 1 + + +@dataclass +class FisheyeMEICamera: + + metadata: FisheyeMEICameraMetadata + image: npt.NDArray[np.uint8] + extrinsic: StateSE3 + + +class FisheyeMEIDistortionIndex(IntEnum): + + K1 = 0 + K2 = 1 + P1 = 2 + P2 = 3 + + +class FisheyeMEIDistortion(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, k1: float, k2: float, p1: float, p2: float) -> None: + array = np.zeros(len(FisheyeMEIDistortionIndex), dtype=np.float64) + array[FisheyeMEIDistortionIndex.K1] = k1 + array[FisheyeMEIDistortionIndex.K2] = k2 + array[FisheyeMEIDistortionIndex.P1] = p1 + array[FisheyeMEIDistortionIndex.P2] = p2 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIDistortion: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIDistortionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def k1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K1] + + @property + def k2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K2] + + @property + def p1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P1] + + @property + def p2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P2] + + +class FisheyeMEIProjectionIndex(IntEnum): + + GAMMA1 = 0 + GAMMA2 = 1 + U0 = 2 + V0 = 3 + + +class FisheyeMEIProjection(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, gamma1: float, gamma2: float, u0: float, v0: float) -> None: + array = np.zeros(len(FisheyeMEIProjectionIndex), dtype=np.float64) + array[FisheyeMEIProjectionIndex.GAMMA1] = gamma1 + array[FisheyeMEIProjectionIndex.GAMMA2] = gamma2 + array[FisheyeMEIProjectionIndex.U0] = u0 + array[FisheyeMEIProjectionIndex.V0] = v0 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIProjection: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIProjectionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def gamma1(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA1] + + @property + def gamma2(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA2] + + @property + def u0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.U0] + + @property + def v0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.V0] + + +@dataclass +class FisheyeMEICameraMetadata: + + camera_type: FisheyeMEICameraType + mirror_parameter: Optional[float] + distortion: Optional[FisheyeMEIDistortion] + projection: Optional[FisheyeMEIProjection] + width: int + height: int + + @classmethod + def from_dict(cls, data_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: + data_dict["camera_type"] = FisheyeMEICameraType(data_dict["camera_type"]) + data_dict["distortion"] = ( + FisheyeMEIDistortion.from_array(np.array(data_dict["distortion"])) + if data_dict["distortion"] is not None + else None + ) + data_dict["projection"] = ( + FisheyeMEIProjection.from_array(np.array(data_dict["projection"])) + if data_dict["projection"] is not None + else None + ) + return FisheyeMEICameraMetadata(**data_dict) + + @property + def aspect_ratio(self) -> float: + return self.width / self.height + + def to_dict(self) -> Dict[str, Any]: + data_dict = asdict(self) + data_dict["camera_type"] = int(self.camera_type) + data_dict["distortion"] = self.distortion.array.tolist() if self.distortion is not None else None + data_dict["projection"] = self.projection.array.tolist() if self.projection is not None else None + return data_dict + + def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """camera coordinate to image plane""" + norm = np.linalg.norm(points_3d, axis=1) + + x = points_3d[:, 0] / norm + y = points_3d[:, 1] / norm + z = points_3d[:, 2] / norm + + x /= z + self.mirror_parameter + y /= z + self.mirror_parameter + + if self.distortion is not None: + k1 = self.distortion.k1 + k2 = self.distortion.k2 + else: + k1 = k2 = 0.0 + + if self.projection is not None: + gamma1 = self.projection.gamma1 + gamma2 = self.projection.gamma2 + u0 = self.projection.u0 + v0 = self.projection.v0 + else: + gamma1 = gamma2 = 1.0 + u0 = v0 = 0.0 + + ro2 = x * x + y * y + x *= 1 + k1 * ro2 + k2 * ro2 * ro2 + y *= 1 + k1 * ro2 + k2 * ro2 * ro2 + + x = gamma1 * x + u0 + y = gamma2 * y + v0 + + return x, y, norm * points_3d[:, 2] / np.abs(points_3d[:, 2]) diff --git a/d123/common/datatypes/sensor/lidar.py b/src/py123d/datatypes/sensors/lidar.py similarity index 53% rename from d123/common/datatypes/sensor/lidar.py rename to src/py123d/datatypes/sensors/lidar.py index fe178760..0950c77c 100644 --- a/d123/common/datatypes/sensor/lidar.py +++ b/src/py123d/datatypes/sensors/lidar.py @@ -1,14 +1,14 @@ from __future__ import annotations -import json from dataclasses import dataclass -from typing import Dict, Optional, Type +from typing import Optional, Type import numpy as np import numpy.typing as npt -from d123.common.datatypes.sensor.lidar_index import LIDAR_INDEX_REGISTRY, LiDARIndex -from d123.common.utils.enums import SerialIntEnum +from py123d.common.utils.enums import SerialIntEnum +from py123d.conversion.registry.lidar_index_registry import LIDAR_INDEX_REGISTRY, LiDARIndex +from py123d.geometry import StateSE3 class LiDARType(SerialIntEnum): @@ -20,6 +20,7 @@ class LiDARType(SerialIntEnum): LIDAR_SIDE_LEFT = 4 LIDAR_SIDE_RIGHT = 5 LIDAR_BACK = 6 + LIDAR_DOWN = 7 @dataclass @@ -27,8 +28,7 @@ class LiDARMetadata: lidar_type: LiDARType lidar_index: Type[LiDARIndex] - extrinsic: Optional[npt.NDArray[np.float64]] = None # 4x4 matrix - + extrinsic: Optional[StateSE3] = None # TODO: add identifier if point cloud is returned in lidar or ego frame. def to_dict(self) -> dict: @@ -39,40 +39,15 @@ def to_dict(self) -> dict: } @classmethod - def from_dict(cls, json_dict: dict) -> LiDARMetadata: - lidar_type = LiDARType[json_dict["lidar_type"]] - if json_dict["lidar_index"] not in LIDAR_INDEX_REGISTRY: - raise ValueError(f"Unknown lidar index: {json_dict['lidar_index']}") - lidar_index_class = LIDAR_INDEX_REGISTRY[json_dict["lidar_index"]] - extrinsic = np.array(json_dict["extrinsic"]) if json_dict["extrinsic"] is not None else None + def from_dict(cls, data_dict: dict) -> LiDARMetadata: + lidar_type = LiDARType[data_dict["lidar_type"]] + if data_dict["lidar_index"] not in LIDAR_INDEX_REGISTRY: + raise ValueError(f"Unknown lidar index: {data_dict['lidar_index']}") + lidar_index_class = LIDAR_INDEX_REGISTRY[data_dict["lidar_index"]] + extrinsic = StateSE3.from_list(data_dict["extrinsic"]) if data_dict["extrinsic"] is not None else None return cls(lidar_type=lidar_type, lidar_index=lidar_index_class, extrinsic=extrinsic) -def lidar_metadata_dict_to_json(lidar_metadata: Dict[LiDARType, LiDARMetadata]) -> str: - """ - Converts a dictionary of LiDARMetadata to a JSON-serializable format. - :param lidar_metadata: Dictionary of LiDARMetadata. - :return: JSON string. - """ - lidar_metadata_dict = { - lidar_type.serialize(): metadata.to_dict() for lidar_type, metadata in lidar_metadata.items() - } - return json.dumps(lidar_metadata_dict) - - -def lidar_metadata_dict_from_json(json_str: str) -> Dict[LiDARType, LiDARMetadata]: - """ - Converts a JSON string back to a dictionary of LiDARMetadata. - :param json_str: JSON string. - :return: Dictionary of LiDARMetadata. - """ - lidar_metadata_dict = json.loads(json_str) - return { - LiDARType.deserialize(lidar_type): LiDARMetadata.from_dict(metadata) - for lidar_type, metadata in lidar_metadata_dict.items() - } - - @dataclass class LiDAR: @@ -84,14 +59,14 @@ def xyz(self) -> npt.NDArray[np.float32]: """ Returns the point cloud as an Nx3 array of x, y, z coordinates. """ - return self.point_cloud[self.metadata.lidar_index.XYZ].T + return self.point_cloud[:, self.metadata.lidar_index.XYZ] @property def xy(self) -> npt.NDArray[np.float32]: """ Returns the point cloud as an Nx2 array of x, y coordinates. """ - return self.point_cloud[self.metadata.lidar_index.XY].T + return self.point_cloud[:, self.metadata.lidar_index.XY] @property def intensity(self) -> Optional[npt.NDArray[np.float32]]: @@ -100,7 +75,7 @@ def intensity(self) -> Optional[npt.NDArray[np.float32]]: Returns None if intensity is not part of the point cloud. """ if hasattr(self.metadata.lidar_index, "INTENSITY"): - return self.point_cloud[self.metadata.lidar_index.INTENSITY] + return self.point_cloud[:, self.metadata.lidar_index.INTENSITY] return None @property @@ -110,7 +85,7 @@ def range(self) -> Optional[npt.NDArray[np.float32]]: Returns None if range is not part of the point cloud. """ if hasattr(self.metadata.lidar_index, "RANGE"): - return self.point_cloud[self.metadata.lidar_index.RANGE] + return self.point_cloud[:, self.metadata.lidar_index.RANGE] return None @property @@ -120,5 +95,5 @@ def elongation(self) -> Optional[npt.NDArray[np.float32]]: Returns None if elongation is not part of the point cloud. """ if hasattr(self.metadata.lidar_index, "ELONGATION"): - return self.point_cloud[self.metadata.lidar_index.ELONGATION] + return self.point_cloud[:, self.metadata.lidar_index.ELONGATION] return None diff --git a/src/py123d/datatypes/sensors/pinhole_camera.py b/src/py123d/datatypes/sensors/pinhole_camera.py new file mode 100644 index 00000000..beefa883 --- /dev/null +++ b/src/py123d/datatypes/sensors/pinhole_camera.py @@ -0,0 +1,222 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + +import numpy as np +import numpy.typing as npt +from zmq import IntEnum + +from py123d.common.utils.enums import SerialIntEnum +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.se import StateSE3 + + +class PinholeCameraType(SerialIntEnum): + + PCAM_F0 = 0 + PCAM_B0 = 1 + PCAM_L0 = 2 + PCAM_L1 = 3 + PCAM_L2 = 4 + PCAM_R0 = 5 + PCAM_R1 = 6 + PCAM_R2 = 7 + PCAM_STEREO_L = 8 + PCAM_STEREO_R = 9 + + +@dataclass +class PinholeCamera: + + metadata: PinholeCameraMetadata + image: npt.NDArray[np.uint8] + extrinsic: StateSE3 + + +class PinholeIntrinsicsIndex(IntEnum): + + FX = 0 + FY = 1 + CX = 2 + CY = 3 + SKEW = 4 # NOTE: not used, but added for completeness + + +class PinholeIntrinsics(ArrayMixin): + + _array: npt.NDArray[np.float64] + + def __init__(self, fx: float, fy: float, cx: float, cy: float, skew: float = 0.0) -> None: + array = np.zeros(len(PinholeIntrinsicsIndex), dtype=np.float64) + array[PinholeIntrinsicsIndex.FX] = fx + array[PinholeIntrinsicsIndex.FY] = fy + array[PinholeIntrinsicsIndex.CX] = cx + array[PinholeIntrinsicsIndex.CY] = cy + array[PinholeIntrinsicsIndex.SKEW] = skew + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> PinholeIntrinsics: + assert array.ndim == 1 + assert array.shape[-1] == len(PinholeIntrinsicsIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @classmethod + def from_camera_matrix(cls, intrinsic: npt.NDArray[np.float64]) -> PinholeIntrinsics: + """ + Create a PinholeIntrinsics from a 3x3 intrinsic matrix. + :param intrinsic: A 3x3 numpy array representing the intrinsic matrix. + :return: A PinholeIntrinsics instance. + """ + assert intrinsic.shape == (3, 3) + fx = intrinsic[0, 0] + fy = intrinsic[1, 1] + cx = intrinsic[0, 2] + cy = intrinsic[1, 2] + skew = intrinsic[0, 1] # Not used in most cases. + array = np.array([fx, fy, cx, cy, skew], dtype=np.float64) + return cls.from_array(array, copy=False) + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def fx(self) -> float: + return self._array[PinholeIntrinsicsIndex.FX] + + @property + def fy(self) -> float: + return self._array[PinholeIntrinsicsIndex.FY] + + @property + def cx(self) -> float: + return self._array[PinholeIntrinsicsIndex.CX] + + @property + def cy(self) -> float: + return self._array[PinholeIntrinsicsIndex.CY] + + @property + def skew(self) -> float: + return self._array[PinholeIntrinsicsIndex.SKEW] + + @property + def camera_matrix(self) -> npt.NDArray[np.float64]: + """ + Returns the intrinsic matrix. + :return: A 3x3 numpy array representing the intrinsic matrix. + """ + K = np.array( + [ + [self.fx, self.skew, self.cx], + [0.0, self.fy, self.cy], + [0.0, 0.0, 1.0], + ], + dtype=np.float64, + ) + return K + + +class PinholeDistortionIndex(IntEnum): + K1 = 0 + K2 = 1 + P1 = 2 + P2 = 3 + K3 = 4 + + +class PinholeDistortion(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, k1: float, k2: float, p1: float, p2: float, k3: float) -> None: + array = np.zeros(len(PinholeDistortionIndex), dtype=np.float64) + array[PinholeDistortionIndex.K1] = k1 + array[PinholeDistortionIndex.K2] = k2 + array[PinholeDistortionIndex.P1] = p1 + array[PinholeDistortionIndex.P2] = p2 + array[PinholeDistortionIndex.K3] = k3 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> PinholeDistortion: + assert array.ndim == 1 + assert array.shape[-1] == len(PinholeDistortionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def k1(self) -> float: + return self._array[PinholeDistortionIndex.K1] + + @property + def k2(self) -> float: + return self._array[PinholeDistortionIndex.K2] + + @property + def p1(self) -> float: + return self._array[PinholeDistortionIndex.P1] + + @property + def p2(self) -> float: + return self._array[PinholeDistortionIndex.P2] + + @property + def k3(self) -> float: + return self._array[PinholeDistortionIndex.K3] + + +@dataclass +class PinholeCameraMetadata: + + camera_type: PinholeCameraType + intrinsics: Optional[PinholeIntrinsics] + distortion: Optional[PinholeDistortion] + width: int + height: int + + @classmethod + def from_dict(cls, data_dict: Dict[str, Any]) -> PinholeCameraMetadata: + data_dict["camera_type"] = PinholeCameraType(data_dict["camera_type"]) + data_dict["intrinsics"] = ( + PinholeIntrinsics.from_list(data_dict["intrinsics"]) if data_dict["intrinsics"] is not None else None + ) + data_dict["distortion"] = ( + PinholeDistortion.from_list(data_dict["distortion"]) if data_dict["distortion"] is not None else None + ) + return PinholeCameraMetadata(**data_dict) + + def to_dict(self) -> Dict[str, Any]: + data_dict = asdict(self) + data_dict["camera_type"] = int(self.camera_type) + data_dict["intrinsics"] = self.intrinsics.tolist() if self.intrinsics is not None else None + data_dict["distortion"] = self.distortion.tolist() if self.distortion is not None else None + return data_dict + + @property + def aspect_ratio(self) -> float: + return self.width / self.height + + @property + def fov_x(self) -> float: + """ + Calculates the horizontal field of view (FOV) in radian. + """ + fov_x_rad = 2 * np.arctan(self.width / (2 * self.intrinsics.fx)) + return fov_x_rad + + @property + def fov_y(self) -> float: + """ + Calculates the vertical field of view (FOV) in radian. + """ + fov_y_rad = 2 * np.arctan(self.height / (2 * self.intrinsics.fy)) + return fov_y_rad diff --git a/d123/script/__init__.py b/src/py123d/datatypes/time/__init__.py similarity index 100% rename from d123/script/__init__.py rename to src/py123d/datatypes/time/__init__.py diff --git a/d123/common/datatypes/time/time_point.py b/src/py123d/datatypes/time/time_point.py similarity index 100% rename from d123/common/datatypes/time/time_point.py rename to src/py123d/datatypes/time/time_point.py diff --git a/d123/script/builders/__init__.py b/src/py123d/datatypes/vehicle_state/__init__.py similarity index 100% rename from d123/script/builders/__init__.py rename to src/py123d/datatypes/vehicle_state/__init__.py diff --git a/d123/common/datatypes/vehicle_state/ego_state.py b/src/py123d/datatypes/vehicle_state/ego_state.py similarity index 89% rename from d123/common/datatypes/vehicle_state/ego_state.py rename to src/py123d/datatypes/vehicle_state/ego_state.py index 9ee67593..3ddc09a5 100644 --- a/d123/common/datatypes/vehicle_state/ego_state.py +++ b/src/py123d/datatypes/vehicle_state/ego_state.py @@ -2,61 +2,62 @@ from dataclasses import dataclass from enum import IntEnum -from functools import cached_property from typing import Final, Optional import numpy as np import numpy.typing as npt -from d123.common.datatypes.detection.detection import ( - BoxDetectionMetadata, - BoxDetectionSE2, - BoxDetectionSE3, -) -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.vehicle_parameters import ( +from py123d.common.utils.enums import classproperty +from py123d.conversion.registry.box_detection_label_registry import DefaultBoxDetectionLabel +from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE2, BoxDetectionSE3 +from py123d.datatypes.time.time_point import TimePoint +from py123d.datatypes.vehicle_state.vehicle_parameters import ( VehicleParameters, center_se2_to_rear_axle_se2, center_se3_to_rear_axle_se3, rear_axle_se2_to_center_se2, rear_axle_se3_to_center_se3, ) -from d123.common.geometry.base import StateSE2, StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2, BoundingBoxSE3 -from d123.common.geometry.vector import Vector2D, Vector3D -from d123.common.utils.enums import classproperty - -# TODO: Find an appropriate way to handle SE2 and SE3 states. +from py123d.geometry import BoundingBoxSE2, BoundingBoxSE3, StateSE2, StateSE3, Vector2D, Vector3D EGO_TRACK_TOKEN: Final[str] = "ego_vehicle" class EgoStateSE3Index(IntEnum): + X = 0 Y = 1 Z = 2 - ROLL = 3 - PITCH = 4 - YAW = 5 - VELOCITY_X = 6 - VELOCITY_Y = 7 - VELOCITY_Z = 8 - ACCELERATION_X = 9 - ACCELERATION_Y = 10 - ACCELERATION_Z = 11 - ANGULAR_VELOCITY_X = 12 - ANGULAR_VELOCITY_Y = 13 - ANGULAR_VELOCITY_Z = 14 + QW = 3 + QX = 4 + QY = 5 + QZ = 6 + VELOCITY_X = 7 + VELOCITY_Y = 8 + VELOCITY_Z = 9 + ACCELERATION_X = 10 + ACCELERATION_Y = 11 + ACCELERATION_Z = 12 + ANGULAR_VELOCITY_X = 13 + ANGULAR_VELOCITY_Y = 14 + ANGULAR_VELOCITY_Z = 15 @classproperty - def SE3(cls) -> slice: - return slice(cls.X, cls.YAW + 1) + def STATE_SE3(cls) -> slice: + return slice(cls.X, cls.QZ + 1) @classproperty def DYNAMIC_VEHICLE_STATE(cls) -> slice: return slice(cls.VELOCITY_X, cls.ANGULAR_VELOCITY_Z + 1) + @classproperty + def SCALAR(cls) -> slice: + return slice(cls.QW, cls.QW + 1) + + @classproperty + def VECTOR(cls) -> slice: + return slice(cls.QX, cls.QZ + 1) + @dataclass class EgoStateSE3: @@ -74,7 +75,7 @@ def from_array( vehicle_parameters: VehicleParameters, timepoint: Optional[TimePoint] = None, ) -> EgoStateSE3: - state_se3 = StateSE3.from_array(array[EgoStateSE3Index.SE3]) + state_se3 = StateSE3.from_array(array[EgoStateSE3Index.STATE_SE3]) dynamic_state = DynamicStateSE3.from_array(array[EgoStateSE3Index.DYNAMIC_VEHICLE_STATE]) return EgoStateSE3(state_se3, dynamic_state, vehicle_parameters, timepoint) @@ -126,7 +127,7 @@ def rear_axle_se2(self) -> StateSE2: def rear_axle(self) -> StateSE3: return self.rear_axle_se3 - @cached_property + @property def bounding_box(self) -> BoundingBoxSE3: return BoundingBoxSE3( center=self.center_se3, @@ -147,7 +148,7 @@ def bounding_box_se2(self) -> BoundingBoxSE2: def box_detection(self) -> BoxDetectionSE3: return BoxDetectionSE3( metadata=BoxDetectionMetadata( - detection_type=DetectionType.EGO, + label=DefaultBoxDetectionLabel.EGO, timepoint=self.timepoint, track_token=EGO_TRACK_TOKEN, confidence=1.0, @@ -214,7 +215,7 @@ def rear_axle_se2(self) -> StateSE2: def rear_axle(self) -> StateSE2: return self.rear_axle_se2 - @cached_property + @property def bounding_box(self) -> BoundingBoxSE2: return BoundingBoxSE2( center=self.center_se2, @@ -230,7 +231,7 @@ def bounding_box_se2(self) -> BoundingBoxSE2: def box_detection(self) -> BoxDetectionSE2: return BoxDetectionSE2( metadata=BoxDetectionMetadata( - detection_type=DetectionType.EGO, + label=DefaultBoxDetectionLabel.EGO, timepoint=self.timepoint, track_token=EGO_TRACK_TOKEN, confidence=1.0, @@ -271,6 +272,8 @@ def ANGULAR_VELOCITY(cls) -> slice: @dataclass class DynamicStateSE3: + # TODO: Make class array like + velocity: Vector3D acceleration: Vector3D angular_velocity: Vector3D @@ -328,6 +331,7 @@ def dynamic_state_se2(self) -> DynamicStateSE2: @dataclass class DynamicStateSE2: + velocity: Vector2D acceleration: Vector2D angular_velocity: float diff --git a/d123/common/datatypes/vehicle_state/vehicle_parameters.py b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py similarity index 56% rename from d123/common/datatypes/vehicle_state/vehicle_parameters.py rename to src/py123d/datatypes/vehicle_state/vehicle_parameters.py index 3b4f04b9..ca2a1944 100644 --- a/d123/common/datatypes/vehicle_state/vehicle_parameters.py +++ b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py @@ -1,9 +1,10 @@ -from d123.common.geometry.base import StateSE2, StateSE3, dataclass -from d123.common.geometry.transform.se3 import translate_se3_along_x, translate_se3_along_z -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.geometry.vector import Vector2D +from __future__ import annotations -# TODO: Add more vehicle parameters, potentially extend the parameters. +from dataclasses import asdict, dataclass + +from py123d.geometry import StateSE2, StateSE3, Vector2D, Vector3D +from py123d.geometry.transform.transform_se2 import translate_se2_along_body_frame +from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame @dataclass @@ -19,6 +20,25 @@ class VehicleParameters: rear_axle_to_center_vertical: float rear_axle_to_center_longitudinal: float + @classmethod + def from_dict(cls, data_dict: dict) -> VehicleParameters: + return VehicleParameters(**data_dict) + + def to_dict(self) -> dict: + return asdict(self) + + @property + def half_width(self) -> float: + return self.width / 2.0 + + @property + def half_length(self) -> float: + return self.length / 2.0 + + @property + def half_height(self) -> float: + return self.height / 2.0 + def get_nuplan_chrysler_pacifica_parameters() -> VehicleParameters: # NOTE: use parameters from nuPlan dataset @@ -33,6 +53,19 @@ def get_nuplan_chrysler_pacifica_parameters() -> VehicleParameters: ) +def get_nuscenes_renault_zoe_parameters() -> VehicleParameters: + # https://en.wikipedia.org/wiki/Renault_Zoe + return VehicleParameters( + vehicle_name="nuscenes_renault_zoe", + width=1.730, + length=4.084, + height=1.562, + wheel_base=2.588, + rear_axle_to_center_vertical=1.562 / 2, # NOTE: missing in nuscenes, TODO: find more accurate value + rear_axle_to_center_longitudinal=1.385, + ) + + def get_carla_lincoln_mkz_2020_parameters() -> VehicleParameters: # NOTE: values are extracted from CARLA return VehicleParameters( @@ -60,6 +93,23 @@ def get_wopd_chrysler_pacifica_parameters() -> VehicleParameters: ) +def get_kitti360_vw_passat_parameters() -> VehicleParameters: + # The KITTI-360 dataset uses a 2006 VW Passat Variant B6. + # https://en.wikipedia.org/wiki/Volkswagen_Passat_(B6) + # [1] https://scispace.com/pdf/team-annieway-s-autonomous-system-18ql8b7kki.pdf + # NOTE: Parameters are estimated from the vehicle model. + # https://www.cvlibs.net/datasets/kitti-360/documentation.php + return VehicleParameters( + vehicle_name="kitti360_vw_passat", + width=1.820, + length=4.775, + height=1.516, + wheel_base=2.709, + rear_axle_to_center_vertical=1.516 / 2 - 0.9, + rear_axle_to_center_longitudinal=1.3369, + ) + + def get_av2_ford_fusion_hybrid_parameters() -> VehicleParameters: # NOTE: Parameters are estimated from the vehicle model. # https://en.wikipedia.org/wiki/Ford_Fusion_Hybrid#Second_generation @@ -75,6 +125,18 @@ def get_av2_ford_fusion_hybrid_parameters() -> VehicleParameters: ) +def get_pandaset_chrysler_pacifica_parameters() -> VehicleParameters: + return VehicleParameters( + vehicle_name="pandaset_chrysler_pacifica", + width=2.297, + length=5.176, + height=1.777, + wheel_base=3.089, + rear_axle_to_center_vertical=0.45, + rear_axle_to_center_longitudinal=1.461, + ) + + def center_se3_to_rear_axle_se3(center_se3: StateSE3, vehicle_parameters: VehicleParameters) -> StateSE3: """ Converts a center state to a rear axle state. @@ -82,12 +144,13 @@ def center_se3_to_rear_axle_se3(center_se3: StateSE3, vehicle_parameters: Vehicl :param vehicle_parameters: The vehicle parameters. :return: The rear axle state. """ - return translate_se3_along_z( - translate_se3_along_x( - center_se3, + return translate_se3_along_body_frame( + center_se3, + Vector3D( -vehicle_parameters.rear_axle_to_center_longitudinal, + 0, + -vehicle_parameters.rear_axle_to_center_vertical, ), - -vehicle_parameters.rear_axle_to_center_vertical, ) @@ -98,12 +161,13 @@ def rear_axle_se3_to_center_se3(rear_axle_se3: StateSE3, vehicle_parameters: Veh :param vehicle_parameters: The vehicle parameters. :return: The center state. """ - return translate_se3_along_x( - translate_se3_along_z( - rear_axle_se3, + return translate_se3_along_body_frame( + rear_axle_se3, + Vector3D( + vehicle_parameters.rear_axle_to_center_longitudinal, + 0, vehicle_parameters.rear_axle_to_center_vertical, ), - vehicle_parameters.rear_axle_to_center_longitudinal, ) @@ -114,7 +178,7 @@ def center_se2_to_rear_axle_se2(center_se2: StateSE2, vehicle_parameters: Vehicl :param vehicle_parameters: The vehicle parameters. :return: The rear axle state in 2D. """ - return translate_along_yaw(center_se2, Vector2D(-vehicle_parameters.rear_axle_to_center_longitudinal, 0)) + return translate_se2_along_body_frame(center_se2, Vector2D(-vehicle_parameters.rear_axle_to_center_longitudinal, 0)) def rear_axle_se2_to_center_se2(rear_axle_se2: StateSE2, vehicle_parameters: VehicleParameters) -> StateSE2: @@ -124,4 +188,6 @@ def rear_axle_se2_to_center_se2(rear_axle_se2: StateSE2, vehicle_parameters: Veh :param vehicle_parameters: The vehicle parameters. :return: The center state in 2D. """ - return translate_along_yaw(rear_axle_se2, Vector2D(vehicle_parameters.rear_axle_to_center_longitudinal, 0)) + return translate_se2_along_body_frame( + rear_axle_se2, Vector2D(vehicle_parameters.rear_axle_to_center_longitudinal, 0) + ) diff --git a/src/py123d/geometry/__init__.py b/src/py123d/geometry/__init__.py new file mode 100644 index 00000000..c391b29a --- /dev/null +++ b/src/py123d/geometry/__init__.py @@ -0,0 +1,22 @@ +from py123d.geometry.geometry_index import ( + Point2DIndex, + Point3DIndex, + BoundingBoxSE2Index, + BoundingBoxSE3Index, + Corners2DIndex, + Corners3DIndex, + EulerAnglesIndex, + EulerStateSE3Index, + QuaternionIndex, + StateSE2Index, + StateSE3Index, + Vector2DIndex, + Vector3DIndex, +) +from py123d.geometry.point import Point2D, Point3D +from py123d.geometry.vector import Vector2D, Vector3D +from py123d.geometry.rotation import EulerAngles, Quaternion +from py123d.geometry.se import EulerStateSE3, StateSE2, StateSE3 +from py123d.geometry.bounding_box import BoundingBoxSE2, BoundingBoxSE3 +from py123d.geometry.polyline import Polyline2D, Polyline3D, PolylineSE2 +from py123d.geometry.occupancy_map import OccupancyMap2D diff --git a/src/py123d/geometry/bounding_box.py b/src/py123d/geometry/bounding_box.py new file mode 100644 index 00000000..bc3e1b73 --- /dev/null +++ b/src/py123d/geometry/bounding_box.py @@ -0,0 +1,281 @@ +from __future__ import annotations + +from ast import Dict +from typing import Union + +import numpy as np +import numpy.typing as npt +import shapely.geometry as geom + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import BoundingBoxSE2Index, BoundingBoxSE3Index, Corners2DIndex, Corners3DIndex +from py123d.geometry.point import Point2D, Point3D +from py123d.geometry.se import StateSE2, StateSE3 +from py123d.geometry.utils.bounding_box_utils import bbse2_array_to_corners_array, bbse3_array_to_corners_array + + +class BoundingBoxSE2(ArrayMixin): + """ + Rotated bounding box in 2D defined by center (StateSE2), length and width. + + Example: + >>> from py123d.geometry import StateSE2 + >>> bbox = BoundingBoxSE2(center=StateSE2(1.0, 2.0, 0.5), length=4.0, width=2.0) + >>> bbox.array + array([1. , 2. , 0.5, 4. , 2. ]) + >>> bbox.corners_array.shape + (4, 2) + >>> bbox.shapely_polygon.area + 8.0 + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, center: StateSE2, length: float, width: float): + """Initialize BoundingBoxSE2 with center (StateSE2), length and width. + + :param center: Center of the bounding box as a StateSE2 instance. + :param length: Length of the bounding box along the x-axis in the local frame. + :param width: Width of the bounding box along the y-axis in the local frame. + """ + array = np.zeros(len(BoundingBoxSE2Index), dtype=np.float64) + array[BoundingBoxSE2Index.SE2] = center.array + array[BoundingBoxSE2Index.LENGTH] = length + array[BoundingBoxSE2Index.WIDTH] = width + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> BoundingBoxSE2: + """Create a BoundingBoxSE2 from a numpy array. + + :param array: A 1D numpy array containing the bounding box parameters, indexed by \ + :class:`~py123d.geometry.BoundingBoxSE2Index`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A BoundingBoxSE2 instance. + """ + assert array.ndim == 1 + assert array.shape[-1] == len(BoundingBoxSE2Index) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def center(self) -> StateSE2: + """The center of the bounding box as a StateSE2 instance. + + :return: The center of the bounding box as a StateSE2 instance. + """ + return StateSE2.from_array(self._array[BoundingBoxSE2Index.SE2]) + + @property + def center_se2(self) -> StateSE2: + """The center of the bounding box as a StateSE2 instance. + + :return: The center of the bounding box as a StateSE2 instance. + """ + return self.center + + @property + def length(self) -> float: + """The length of the bounding box along the x-axis in the local frame. + + :return: The length of the bounding box. + """ + return self._array[BoundingBoxSE2Index.LENGTH] + + @property + def width(self) -> float: + """The width of the bounding box along the y-axis in the local frame. + + :return: The width of the bounding box. + """ + return self._array[BoundingBoxSE2Index.WIDTH] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the BoundingBoxSE2 instance to a numpy array, indexed by :class:`~py123d.geometry.BoundingBoxSE2Index`. + + :return: A numpy array of shape (5,) containing the bounding box parameters [x, y, yaw, length, width]. + """ + return self._array + + @property + def shapely_polygon(self) -> geom.Polygon: + """Return a Shapely polygon representation of the bounding box. + + :return: A Shapely polygon representing the bounding box. + """ + return geom.Polygon(self.corners_array) + + @property + def bounding_box_se2(self) -> BoundingBoxSE2: + """Returns bounding box itself for polymorphism. + + :return: A BoundingBoxSE2 instance representing the 2D bounding box. + """ + return self + + @property + def corners_array(self) -> npt.NDArray[np.float64]: + """Returns the corner points of the bounding box as a numpy array. + + :return: A numpy array of shape (4, 2) containing the corner points of the bounding box, \ + indexed by :class:`~py123d.geometry.Corners2DIndex` and :class:`~py123d.geometry.Point2DIndex`. + """ + return bbse2_array_to_corners_array(self.array) + + @property + def corners_dict(self) -> Dict[Corners2DIndex, Point2D]: + """Returns the corner points of the bounding box as a dictionary. + + :return: A dictionary mapping :class:`~py123d.geometry.Corners2DIndex` to :class:`~py123d.geometry.Point2D` instances. + """ + corners_array = self.corners_array + return {index: Point2D.from_array(corners_array[index]) for index in Corners2DIndex} + + +class BoundingBoxSE3(ArrayMixin): + """ + Rotated bounding box in 3D defined by center with quaternion rotation (StateSE3), length, width and height. + + Example: + >>> from py123d.geometry import StateSE3 + >>> bbox = BoundingBoxSE3(center=StateSE3(1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0), length=4.0, width=2.0, height=1.5) + >>> bbox.array + array([1. , 2. , 3. , 1. , 0. , 0. , 0. , 4. , 2. , 1.5]) + >>> bbox.bounding_box_se2.array + array([1., 2., 0., 4., 2.]) + >>> bbox.shapely_polygon.area + 8.0 + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, center: StateSE3, length: float, width: float, height: float): + """Initialize BoundingBoxSE3 with center (StateSE3), length, width and height. + + :param center: Center of the bounding box as a StateSE3 instance. + :param length: Length of the bounding box along the x-axis in the local frame. + :param width: Width of the bounding box along the y-axis in the local frame. + :param height: Height of the bounding box along the z-axis in the local frame. + """ + array = np.zeros(len(BoundingBoxSE3Index), dtype=np.float64) + array[BoundingBoxSE3Index.STATE_SE3] = center.array + array[BoundingBoxSE3Index.LENGTH] = length + array[BoundingBoxSE3Index.WIDTH] = width + array[BoundingBoxSE3Index.HEIGHT] = height + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> BoundingBoxSE3: + """Create a BoundingBoxSE3 from a numpy array. + + :param array: A 1D numpy array containing the bounding box parameters, indexed by \ + :class:`~py123d.geometry.BoundingBoxSE3Index`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A BoundingBoxSE3 instance. + """ + assert array.ndim == 1 + assert array.shape[-1] == len(BoundingBoxSE3Index) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def center(self) -> StateSE3: + """The center of the bounding box as a StateSE3 instance. + + :return: The center of the bounding box as a StateSE3 instance. + """ + return StateSE3.from_array(self._array[BoundingBoxSE3Index.STATE_SE3]) + + @property + def center_se3(self) -> StateSE3: + """The center of the bounding box as a StateSE3 instance. + + :return: The center of the bounding box as a StateSE3 instance. + """ + return self.center + + @property + def center_se2(self) -> StateSE2: + """The center of the bounding box as a StateSE2 instance. + + :return: The center of the bounding box as a StateSE2 instance. + """ + return self.center_se3.state_se2 + + @property + def length(self) -> float: + """The length of the bounding box along the x-axis in the local frame. + + :return: The length of the bounding box. + """ + return self._array[BoundingBoxSE3Index.LENGTH] + + @property + def width(self) -> float: + """The width of the bounding box along the y-axis in the local frame. + + :return: The width of the bounding box. + """ + return self._array[BoundingBoxSE3Index.WIDTH] + + @property + def height(self) -> float: + """The height of the bounding box along the z-axis in the local frame. + + :return: The height of the bounding box. + """ + return self._array[BoundingBoxSE3Index.HEIGHT] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Convert the BoundingBoxSE3 instance to a numpy array. + + :return: A 1D numpy array containing the bounding box parameters, indexed by \ + :class:`~py123d.geometry.BoundingBoxSE3Index`. + """ + return self._array + + @property + def bounding_box_se2(self) -> BoundingBoxSE2: + """Converts the 3D bounding box to a 2D bounding box by dropping the z, roll and pitch components. + + :return: A BoundingBoxSE2 instance. + """ + return BoundingBoxSE2( + center=self.center_se2, + length=self.length, + width=self.width, + ) + + @property + def shapely_polygon(self) -> geom.Polygon: + """Return a Shapely polygon representation of the 2D projection of the bounding box. + + :return: A shapely polygon representing the 2D bounding box. + """ + return self.bounding_box_se2.shapely_polygon + + @property + def corners_array(self) -> npt.NDArray[np.float64]: + """Returns the corner points of the bounding box as a numpy array, shape (8, 3). + + :return: A numpy array of shape (8, 3) containing the corner points of the bounding box, \ + indexed by :class:`~py123d.geometry.Corners3DIndex` and :class:`~py123d.geometry.Point3DIndex`. + """ + return bbse3_array_to_corners_array(self.array) + + @property + def corners_dict(self) -> Dict[Corners3DIndex, Point3D]: + """Returns the corner points of the bounding box as a dictionary. + + :return: A dictionary mapping :class:`~py123d.geometry.Corners3DIndex` to \ + :class:`~py123d.geometry.Point3D` instances. + """ + corners_array = self.corners_array + return {index: Point3D.from_array(corners_array[index]) for index in Corners3DIndex} + + +BoundingBox = Union[BoundingBoxSE2, BoundingBoxSE3] diff --git a/src/py123d/geometry/geometry_index.py b/src/py123d/geometry/geometry_index.py new file mode 100644 index 00000000..5d596f77 --- /dev/null +++ b/src/py123d/geometry/geometry_index.py @@ -0,0 +1,268 @@ +from enum import IntEnum + +from py123d.common.utils.enums import classproperty + + +class Point2DIndex(IntEnum): + """ + Indexes array-like representations of 2D points (x,y). + """ + + X = 0 + Y = 1 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + +class Vector2DIndex(IntEnum): + """ + Indexes array-like representations of 2D vectors (x,y). + """ + + X = 0 + Y = 1 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + +class StateSE2Index(IntEnum): + """ + Indexes array-like representations of SE2 states (x,y,yaw). + """ + + X = 0 + Y = 1 + YAW = 2 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + +class Point3DIndex(IntEnum): + """ + Indexes array-like representations of 3D points (x,y,z). + """ + + X = 0 + Y = 1 + Z = 2 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + @classproperty + def XYZ(cls) -> slice: + return slice(cls.X, cls.Z + 1) + + +class Vector3DIndex(IntEnum): + """ + Indexes array-like representations of 3D vectors (x,y,z). + """ + + X = 0 + Y = 1 + Z = 2 + + @classproperty + def XYZ(cls) -> slice: + return slice(cls.X, cls.Z + 1) + + +class EulerAnglesIndex(IntEnum): + """ + Indexes array-like representations of Euler angles (roll,pitch,yaw). + """ + + ROLL = 0 + PITCH = 1 + YAW = 2 + + +class QuaternionIndex(IntEnum): + """ + Indexes array-like representations of quaternions (qw,qx,qy,qz). + """ + + QW = 0 + QX = 1 + QY = 2 + QZ = 3 + + @classproperty + def SCALAR(cls) -> int: + return cls.QW + + @classproperty + def VECTOR(cls) -> slice: + return slice(cls.QX, cls.QZ + 1) + + +class EulerStateSE3Index(IntEnum): + """ + Indexes array-like representations of SE3 states (x,y,z,roll,pitch,yaw). + TODO: Use quaternions for rotation. + """ + + X = 0 + Y = 1 + Z = 2 + ROLL = 3 + PITCH = 4 + YAW = 5 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + @classproperty + def XYZ(cls) -> slice: + return slice(cls.X, cls.Z + 1) + + @classproperty + def EULER_ANGLES(cls) -> slice: + return slice(cls.ROLL, cls.YAW + 1) + + +class StateSE3Index(IntEnum): + """ + Indexes array-like representations of SE3 states with quaternions (x,y,z,qw,qx,qy,qz). + """ + + X = 0 + Y = 1 + Z = 2 + QW = 3 + QX = 4 + QY = 5 + QZ = 6 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + @classproperty + def XYZ(cls) -> slice: + return slice(cls.X, cls.Z + 1) + + @classproperty + def QUATERNION(cls) -> slice: + return slice(cls.QW, cls.QZ + 1) + + @classproperty + def SCALAR(cls) -> slice: + return slice(cls.QW, cls.QW + 1) + + @classproperty + def VECTOR(cls) -> slice: + return slice(cls.QX, cls.QZ + 1) + + +class BoundingBoxSE2Index(IntEnum): + """ + Indexes array-like representations of rotated 2D bounding boxes (x,y,yaw,length,width). + """ + + X = 0 + Y = 1 + YAW = 2 + LENGTH = 3 + WIDTH = 4 + + @classproperty + def XY(cls) -> slice: + return slice(cls.X, cls.Y + 1) + + @classproperty + def SE2(cls) -> slice: + return slice(cls.X, cls.YAW + 1) + + @classproperty + def EXTENT(cls) -> slice: + return slice(cls.LENGTH, cls.WIDTH + 1) + + +class Corners2DIndex(IntEnum): + """ + Indexes the corners of a BoundingBoxSE2 in the order: front-left, front-right, back-right, back-left. + """ + + FRONT_LEFT = 0 + FRONT_RIGHT = 1 + BACK_RIGHT = 2 + BACK_LEFT = 3 + + +class BoundingBoxSE3Index(IntEnum): + """ + Indexes array-like representations of rotated 3D bounding boxes + - center (x,y,z). + - rotation (qw,qx,qy,qz). + - extent (length,width,height). + """ + + X = 0 + Y = 1 + Z = 2 + QW = 3 + QX = 4 + QY = 5 + QZ = 6 + LENGTH = 7 + WIDTH = 8 + HEIGHT = 9 + + @classproperty + def XYZ(cls) -> slice: + return slice(cls.X, cls.Z + 1) + + @classproperty + def STATE_SE3(cls) -> slice: + return slice(cls.X, cls.QZ + 1) + + @classproperty + def QUATERNION(cls) -> slice: + return slice(cls.QW, cls.QZ + 1) + + @classproperty + def EXTENT(cls) -> slice: + return slice(cls.LENGTH, cls.HEIGHT + 1) + + @classproperty + def SCALAR(cls) -> slice: + return slice(cls.QW, cls.QW + 1) + + @classproperty + def VECTOR(cls) -> slice: + return slice(cls.QX, cls.QZ + 1) + + +class Corners3DIndex(IntEnum): + """ + Indexes the corners of a BoundingBoxSE3 in the order: + front-left-bottom, front-right-bottom, back-right-bottom, back-left-bottom, + front-left-top, front-right-top, back-right-top, back-left-top. + """ + + FRONT_LEFT_BOTTOM = 0 + FRONT_RIGHT_BOTTOM = 1 + BACK_RIGHT_BOTTOM = 2 + BACK_LEFT_BOTTOM = 3 + FRONT_LEFT_TOP = 4 + FRONT_RIGHT_TOP = 5 + BACK_RIGHT_TOP = 6 + BACK_LEFT_TOP = 7 + + @classproperty + def BOTTOM(cls) -> slice: + return slice(cls.FRONT_LEFT_BOTTOM, cls.BACK_LEFT_BOTTOM + 1) + + @classproperty + def TOP(cls) -> slice: + return slice(cls.FRONT_LEFT_TOP, cls.BACK_LEFT_TOP + 1) diff --git a/src/py123d/geometry/occupancy_map.py b/src/py123d/geometry/occupancy_map.py new file mode 100644 index 00000000..a0e4021a --- /dev/null +++ b/src/py123d/geometry/occupancy_map.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from typing import Dict, List, Literal, Optional, Sequence, Tuple, Union + +import numpy as np +import numpy.typing as npt +import shapely +from shapely.geometry.base import BaseGeometry +from shapely.strtree import STRtree + +from py123d.geometry.geometry_index import Point2DIndex + + +class OccupancyMap2D: + def __init__( + self, + geometries: Sequence[BaseGeometry], + ids: Optional[Union[List[str], List[int]]] = None, + node_capacity: int = 10, + ): + """Constructs a 2D occupancy map of shapely geometries using an str-tree for efficient spatial queries. + + :param geometries: list/array of shapely geometries + :param ids: optional list of geometry identifiers, either strings or integers + :param node_capacity: max number of child nodes in str-tree, defaults to 10 + """ + + assert ids is None or len(ids) == len(geometries), "Length of ids must match length of geometries" + + self._ids: Union[List[str], List[int]] = ( + ids if ids is not None else [str(idx) for idx in range(len(geometries))] + ) + self._id_to_idx: Dict[Union[str, int], int] = {id: idx for idx, id in enumerate(self._ids)} + + self._geometries = geometries + self._node_capacity = node_capacity + self._str_tree = STRtree(self._geometries, node_capacity) + + @classmethod + def from_dict(cls, geometry_dict: Dict[Union[str, int], BaseGeometry], node_capacity: int = 10) -> OccupancyMap2D: + """Constructs a 2D occupancy map from a dictionary of geometries. + + :param geometry_dict: Dictionary mapping geometry identifiers to shapely geometries + :param node_capacity: Max number of child nodes in str-tree, defaults to 10 + :return: OccupancyMap2D instance + """ + ids = list(geometry_dict.keys()) + geometries = list(geometry_dict.values()) + return cls(geometries=geometries, ids=ids, node_capacity=node_capacity) + + def __getitem__(self, id: Union[str, int]) -> BaseGeometry: + """Retrieves geometry given an ID. + + :param id: geometry identifier + :return: Geometry of ID. + """ + return self._geometries[self._id_to_idx[id]] + + def __len__(self) -> int: + """ + :return: Number of geometries in the occupancy map. + """ + return len(self._ids) + + @property + def ids(self) -> Union[List[str], List[int]]: + """Getter for geometry IDs in occupancy map + + :return: list of IDs + """ + return self._ids + + @property + def geometries(self) -> Sequence[BaseGeometry]: + """Getter for geometries in occupancy map. + + :return: list of geometries + """ + return self._geometries + + @property + def id_to_idx(self) -> Dict[Union[int, str], int]: + """Mapping from geometry IDs to indices in the occupancy map. + + :return: dictionary of IDs and indices + """ + return self._id_to_idx + + def intersects(self, geometry: BaseGeometry) -> Union[List[str], List[int]]: + """Searches for intersecting geometries in the occupancy map. + + :param geometry: geometries to query + :return: list of IDs for intersecting geometries + """ + indices = self.query(geometry, predicate="intersects") + return [self._ids[idx] for idx in indices] + + def query( + self, + geometry: Union[BaseGeometry, np.ndarray], + predicate: Optional[ + Literal[ + "intersects", + "within", + "contains", + "overlaps", + "crosses", + "touches", + "covers", + "covered_by", + ] + ] = None, + distance: Optional[float] = None, + ) -> npt.NDArray[np.int64]: + """Queries the str-tree for geometries that match the given predicate with the input geometry. + + :param geometry: Geometry or array_like + :param predicate: {None, 'intersects', 'within', 'contains', 'overlaps', 'crosses', 'touches', 'covers', \ + 'covered_by', 'contains_properly', 'dwithin'}, defaults to None + :param distance: number or array_like, defaults to None. + :return: ndarray with shape (n,) if geometry is a scalar. + Contains tree geometry indices. + :return: ndarray with shape (2, n) if geometry is an array_like + The first subarray contains input geometry indices. + The second subarray contains tree geometry indices. + """ + return self._str_tree.query(geometry, predicate=predicate, distance=distance) + + def query_nearest( + self, + geometry: Union[BaseGeometry, np.ndarray], + max_distance: Optional[float] = None, + return_distance: bool = False, + exclusive: bool = False, + all_matches: bool = True, + ) -> Union[npt.NDArray[np.int64], Tuple[npt.NDArray[np.int64], npt.NDArray[np.float64]]]: + """Queries the str-tree for the nearest geometry to the input geometry. + + :param geometry: The input geometry to query. + :param max_distance: The maximum distance to consider, defaults to None. + :param return_distance: Whether to return the distance to the nearest geometry, defaults to False. + :param exclusive: Whether to exclude the input geometry from the results, defaults to False. + :param all_matches: Whether to return all matching geometries, defaults to True. + :return: The nearest geometry or geometries. + """ + return self._str_tree.query_nearest( + geometry, + max_distance=max_distance, + return_distance=return_distance, + exclusive=exclusive, + all_matches=all_matches, + ) + + def contains_vectorized(self, points: npt.NDArray[np.float64]) -> npt.NDArray[np.bool_]: + """Determines wether input-points are in geometries (i.e. polygons) of the occupancy map. + NOTE: This function can be significantly faster than using the str-tree, if the number of geometries is + relatively small compared to the number of input-points. + + :param points: array of shape (num_points, 2), indexed by :class:`~py123d.geometry.Point2DIndex`. + :return: boolean array of shape (polygons, input-points) + """ + output = np.zeros((len(self._geometries), len(points)), dtype=bool) + for i, geometry in enumerate(self._geometries): + output[i] = shapely.contains_xy(geometry, points[..., Point2DIndex.X], points[..., Point2DIndex.Y]) + + return output diff --git a/src/py123d/geometry/point.py b/src/py123d/geometry/point.py new file mode 100644 index 00000000..571567be --- /dev/null +++ b/src/py123d/geometry/point.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from typing import Iterable + +import numpy as np +import numpy.typing as npt +import shapely.geometry as geom + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import Point2DIndex, Point3DIndex + + +class Point2D(ArrayMixin): + """Class to represents 2D points.""" + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float): + """Initialize StateSE2 with x, y, yaw coordinates.""" + array = np.zeros(len(Point2DIndex), dtype=np.float64) + array[Point2DIndex.X] = x + array[Point2DIndex.Y] = y + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> Point2D: + """Constructs a Point2D from a numpy array. + + :param array: Array of shape (2,) representing the point coordinates [x, y], indexed by \ + :class:`~py123d.geometry.Point2DIndex`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A Point2D instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(Point2DIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def x(self) -> float: + """The x coordinate of the point. + + :return: The x coordinate of the point. + """ + return self._array[Point2DIndex.X] + + @property + def y(self) -> float: + """The y coordinate of the point. + + :return: The y coordinate of the point. + """ + return self._array[Point2DIndex.Y] + + @property + def array(self) -> npt.NDArray[np.float64]: + """The array representation of the point. + + :return: A numpy array of shape (2,) containing the point coordinates [x, y], indexed by \ + :class:`~py123d.geometry.Point2DIndex`. + """ + return self._array + + @property + def shapely_point(self) -> geom.Point: + """The Shapely Point representation of the 2D point. + + :return: A Shapely Point representation of the 2D point. + """ + return geom.Point(self.x, self.y) + + def __iter__(self) -> Iterable[float]: + """Iterator over point coordinates.""" + return iter((self.x, self.y)) + + def __hash__(self) -> int: + """Hash method""" + return hash((self.x, self.y)) + + +class Point3D(ArrayMixin): + """Class to represents 3D points.""" + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float, z: float): + """Initialize Point3D with x, y, z coordinates.""" + array = np.zeros(len(Point3DIndex), dtype=np.float64) + array[Point3DIndex.X] = x + array[Point3DIndex.Y] = y + array[Point3DIndex.Z] = z + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> Point3D: + """Constructs a Point3D from a numpy array. + + :param array: Array of shape (3,) representing the point coordinates [x, y, z], indexed by \ + :class:`~py123d.geometry.Point3DIndex`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A Point3D instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(Point3DIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + """The array representation of the point. + + :return: A numpy array of shape (3,) containing the point coordinates [x, y, z], indexed by \ + :class:`~py123d.geometry.Point3DIndex`. + """ + return self._array + + @property + def x(self) -> float: + """The x coordinate of the point. + + :return: The x coordinate of the point. + """ + return self._array[Point3DIndex.X] + + @property + def y(self) -> float: + """The y coordinate of the point. + + :return: The y coordinate of the point. + """ + return self._array[Point3DIndex.Y] + + @property + def z(self) -> float: + """The z coordinate of the point. + + :return: The z coordinate of the point. + """ + return self._array[Point3DIndex.Z] + + @property + def point_2d(self) -> Point2D: + """The 2D projection of the 3D point. + + :return: A Point2D instance representing the 2D projection of the 3D point. + """ + return Point2D.from_array(self.array[Point3DIndex.XY], copy=False) + + @property + def shapely_point(self) -> geom.Point: + """The Shapely Point representation of the 3D point. \ + This geometry contains the z-coordinate, but many Shapely operations ignore it. + + :return: A Shapely Point representation of the 3D point. + """ + return geom.Point(self.x, self.y, self.z) + + def __iter__(self) -> Iterable[float]: + """Iterator over the point coordinates (x, y, z).""" + return iter((self.x, self.y, self.z)) + + def __hash__(self) -> int: + """Hash method""" + return hash((self.x, self.y, self.z)) diff --git a/src/py123d/geometry/polyline.py b/src/py123d/geometry/polyline.py new file mode 100644 index 00000000..3f3b7f65 --- /dev/null +++ b/src/py123d/geometry/polyline.py @@ -0,0 +1,370 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import List, Optional, Union + +import numpy as np +import numpy.typing as npt +import shapely.creation as geom_creation +import shapely.geometry as geom +from scipy.interpolate import interp1d + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import Point2DIndex, Point3DIndex, StateSE2Index +from py123d.geometry.point import Point2D, Point3D +from py123d.geometry.se import StateSE2 +from py123d.geometry.utils.constants import DEFAULT_Z +from py123d.geometry.utils.polyline_utils import get_linestring_yaws, get_path_progress +from py123d.geometry.utils.rotation_utils import normalize_angle + +# TODO: Implement PolylineSE3 +# TODO: Benchmark interpolation performance and reconsider reliance on LineString + + +@dataclass +class Polyline2D(ArrayMixin): + """Represents a interpolatable 2D polyline.""" + + linestring: geom.LineString + + @classmethod + def from_linestring(cls, linestring: geom.LineString) -> Polyline2D: + """Creates a Polyline2D from a Shapely LineString. If the LineString has Z-coordinates, they are ignored. + + :param linestring: A Shapely LineString object. + :return: A Polyline2D instance. + """ + if linestring.has_z: + linestring_ = geom_creation.linestrings(*linestring.xy) + else: + linestring_ = linestring + return Polyline2D(linestring_) + + @classmethod + def from_array(cls, polyline_array: npt.NDArray[np.float32]) -> Polyline2D: + """Creates a Polyline2D from a numpy array. + + :param polyline_array: A numpy array of shape (N, 2) or (N, 3), e.g. indexed by \ + :class:`~py123d.geometry.Point2DIndex` or :class:`~py123d.geometry.Point3DIndex`. + :raises ValueError: If the input array is not of the expected shape. + :return: A Polyline2D instance. + """ + assert polyline_array.ndim == 2 + linestring: Optional[geom.LineString] = None + if polyline_array.shape[-1] == len(Point2DIndex): + linestring = geom_creation.linestrings(polyline_array) + elif polyline_array.shape[-1] == len(Point3DIndex): + linestring = geom_creation.linestrings(polyline_array[:, Point3DIndex.XY]) + else: + raise ValueError("Array must have shape (N, 2) or (N, 3) for Point2D or Point3D respectively.") + return Polyline2D(linestring) + + def from_discrete_points(cls, discrete_points: List[Point2D]) -> Polyline2D: + """Creates a Polyline2D from a list of discrete 2D points. + + :param discrete_points: A list of Point2D instances. + :return: A Polyline2D instance. + """ + return Polyline2D.from_array(np.array(discrete_points, dtype=np.float64)) + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the polyline to a numpy array, indexed by :class:`~py123d.geometry.Point2DIndex`. + + :return: A numpy array of shape (N, 2) representing the polyline. + """ + x, y = self.linestring.xy + array = np.zeros((len(x), len(Point2DIndex)), dtype=np.float64) + array[:, Point2DIndex.X] = x + array[:, Point2DIndex.Y] = y + return array + + @property + def polyline_se2(self) -> PolylineSE2: + """Converts the 2D polyline to a 2D SE(2) polyline and retrieves the yaw angles. + + :return: A PolylineSE2 instance representing the 2D polyline. + """ + return PolylineSE2.from_linestring(self.linestring) + + @property + def length(self) -> float: + """Returns the length of the polyline. + + :return: The length of the polyline. + """ + return self.linestring.length + + def interpolate( + self, + distances: Union[float, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> Union[Point2D, npt.NDArray[np.float64]]: + """Interpolates the polyline at the given distances. + + :param distances: The distances at which to interpolate the polyline. + :return: The interpolated point(s) on the polyline. + """ + + if isinstance(distances, float) or isinstance(distances, int): + point = self.linestring.interpolate(distances, normalized=normalized) + return Point2D(point.x, point.y) + else: + distances_ = np.asarray(distances, dtype=np.float64) + points = self.linestring.interpolate(distances, normalized=normalized) + return np.array([[p.x, p.y] for p in points], dtype=np.float64) + + def project( + self, + point: Union[geom.Point, Point2D, StateSE2, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> npt.NDArray[np.float64]: + """Projects a point onto the polyline and returns the distance along the polyline to the closest point. + + :param point: The point to project onto the polyline. + :param normalized: Whether to return the normalized distance, defaults to False. + :return: The distance along the polyline to the closest point. + """ + if isinstance(point, Point2D) or isinstance(point, StateSE2): + point_ = point.shapely_point + elif isinstance(point, geom.Point): + point_ = point + else: + point_ = np.array(point, dtype=np.float64) + return self.linestring.project(point_, normalized=normalized) + + +@dataclass +class PolylineSE2(ArrayMixin): + """Represents a interpolatable SE2 polyline.""" + + _array: npt.NDArray[np.float64] + linestring: Optional[geom.LineString] = None + + _progress: Optional[npt.NDArray[np.float64]] = None + _interpolator: Optional[interp1d] = None + + def __post_init__(self): + assert self._array is not None + + if self.linestring is None: + self.linestring = geom_creation.linestrings(self._array[..., StateSE2Index.XY]) + + self._array[:, StateSE2Index.YAW] = np.unwrap(self._array[:, StateSE2Index.YAW], axis=0) + self._progress = get_path_progress(self._array) + self._interpolator = interp1d(self._progress, self._array, axis=0, bounds_error=False, fill_value=0.0) + + @classmethod + def from_linestring(cls, linestring: geom.LineString) -> PolylineSE2: + """Creates a PolylineSE2 from a LineString. This requires computing the yaw angles along the path. + + :param linestring: The LineString to convert. + :return: A PolylineSE2 representing the same path as the LineString. + """ + points_2d = np.array(linestring.coords, dtype=np.float64)[..., StateSE2Index.XY] + se2_array = np.zeros((len(points_2d), len(StateSE2Index)), dtype=np.float64) + se2_array[:, StateSE2Index.XY] = points_2d + se2_array[:, StateSE2Index.YAW] = get_linestring_yaws(linestring) + return PolylineSE2(se2_array, linestring) + + @classmethod + def from_array(cls, polyline_array: npt.NDArray[np.float32]) -> PolylineSE2: + """Creates a PolylineSE2 from a numpy array. + + :param polyline_array: The input numpy array representing, either indexed by \ + :class:`~py123d.geometry.Point2DIndex` or :class:`~py123d.geometry.StateSE2Index`. + :raises ValueError: If the input array is not of the expected shape. + :return: A PolylineSE2 representing the same path as the input array. + """ + assert polyline_array.ndim == 2 + if polyline_array.shape[-1] == len(Point2DIndex): + se2_array = np.zeros((len(polyline_array), len(StateSE2Index)), dtype=np.float64) + se2_array[:, StateSE2Index.XY] = polyline_array + se2_array[:, StateSE2Index.YAW] = get_linestring_yaws(geom_creation.linestrings(*polyline_array.T)) + elif polyline_array.shape[-1] == len(StateSE2Index): + se2_array = np.array(polyline_array, dtype=np.float64) + else: + raise ValueError("Invalid polyline array shape.") + return PolylineSE2(se2_array) + + @classmethod + def from_discrete_se2(cls, discrete_se2: List[StateSE2]) -> PolylineSE2: + """Creates a PolylineSE2 from a list of discrete SE2 states. + + :param discrete_se2: The list of discrete SE2 states. + :return: A PolylineSE2 representing the same path as the discrete SE2 states. + """ + return PolylineSE2.from_array(np.array(discrete_se2, dtype=np.float64)) + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the polyline to a numpy array, indexed by :class:`~py123d.geometry.StateSE2Index`. + + :return: A numpy array of shape (N, 3) representing the polyline. + """ + return self._array + + @property + def length(self) -> float: + """Returns the length of the polyline. + + :return: The length of the polyline. + """ + return float(self._progress[-1]) + + def interpolate( + self, + distances: Union[float, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> Union[StateSE2, npt.NDArray[np.float64]]: + """Interpolates the polyline at the given distances. + + :param distances: The distances along the polyline to interpolate. + :param normalized: Whether the distances are normalized (0 to 1), defaults to False + :return: The interpolated StateSE2 or an array of interpolated states, according to + """ + + distances_ = distances * self.length if normalized else distances + clipped_distances = np.clip(distances_, 1e-8, self.length) + + interpolated_se2_array = self._interpolator(clipped_distances) + interpolated_se2_array[..., StateSE2Index.YAW] = normalize_angle(interpolated_se2_array[..., StateSE2Index.YAW]) + + if clipped_distances.ndim == 0: + return StateSE2(*interpolated_se2_array) + else: + return interpolated_se2_array + + def project( + self, + point: Union[geom.Point, Point2D, StateSE2, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> npt.NDArray[np.float64]: + """Projects a point onto the polyline and returns the distance along the polyline to the closest point. + + :param point: The point to project onto the polyline. + :param normalized: Whether to return the normalized distance, defaults to False. + :return: The distance along the polyline to the closest point. + """ + if isinstance(point, Point2D) or isinstance(point, StateSE2): + point_ = point.shapely_point + elif isinstance(point, geom.Point): + point_ = point + else: + point_ = np.array(point, dtype=np.float64) + return self.linestring.project(point_, normalized=normalized) + + +@dataclass +class Polyline3D(ArrayMixin): + """Represents a interpolatable 3D polyline.""" + + linestring: geom.LineString + + @classmethod + def from_linestring(cls, linestring: geom.LineString) -> Polyline3D: + """Creates a Polyline3D from a Shapely LineString. If the LineString does not have Z-coordinates, \ + a default Z-value is added. + + :param linestring: The input LineString. + :return: A Polyline3D instance. + """ + return ( + Polyline3D(linestring) + if linestring.has_z + else Polyline3D(geom_creation.linestrings(*linestring.xy, z=DEFAULT_Z)) + ) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64]) -> Polyline3D: + """Creates a Polyline3D from a numpy array. + + :param array: A numpy array of shape (N, 3) representing 3D points, e.g. indexed by \ + :class:`~py123d.geometry.Point3DIndex`. + :return: A Polyline3D instance. + """ + assert array.ndim == 2 and array.shape[1] == len(Point3DIndex), "Array must be 3D with shape (N, 3)" + linestring = geom_creation.linestrings(*array.T) + return Polyline3D(linestring) + + @property + def polyline_2d(self) -> Polyline2D: + """Converts the 3D polyline to a 2D polyline by dropping the Z-coordinates. + + :return: A Polyline2D instance. + """ + return Polyline2D(geom_creation.linestrings(*self.linestring.xy)) + + @property + def polyline_se2(self) -> PolylineSE2: + """Converts the 3D polyline to a 2D SE(2) polyline. + + :return: A PolylineSE2 instance. + """ + return PolylineSE2.from_linestring(self.linestring) + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the 3D polyline to the discrete 3D points. + + :return: A numpy array of shape (N, 3), indexed by :class:`~py123d.geometry.Point3DIndex`. + """ + return np.array(self.linestring.coords, dtype=np.float64) + + @property + def length(self) -> float: + """Returns the length of the 3D polyline. + + :return: The length of the polyline. + """ + return self.linestring.length + + def interpolate( + self, + distances: Union[float, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> Union[Point3D, npt.NDArray[np.float64]]: + """Interpolates the 3D polyline at the given distances. + + :param distances: A float or numpy array of distances along the polyline. + :param normalized: Whether to interpret the distances as fractions of the length. + :return: A Point3D instance or a numpy array of shape (N, 3) representing the interpolated points. + """ + + if isinstance(distances, float) or isinstance(distances, int): + point = self.linestring.interpolate(distances, normalized=normalized) + return Point3D(point.x, point.y, point.z) + else: + distances = np.asarray(distances, dtype=np.float64) + points = self.linestring.interpolate(distances, normalized=normalized) + return np.array([[p.x, p.y, p.z] for p in points], dtype=np.float64) + + def project( + self, + point: Union[geom.Point, Point2D, Point3D, npt.NDArray[np.float64]], + normalized: bool = False, + ) -> npt.NDArray[np.float64]: + """Projects a point onto the 3D polyline and returns the distance along the polyline to the closest point. + + :param point: The point to project. + :param normalized: Whether to return normalized distances, defaults to False. + :return: The distance along the polyline to the closest point. + """ + if isinstance(point, Point2D) or isinstance(point, StateSE2) or isinstance(point, Point3D): + point_ = point.shapely_point + elif isinstance(point, geom.Point): + point_ = point + else: + point_ = np.array(point, dtype=np.float64) + return self.linestring.project(point_, normalized=normalized) + + +@dataclass +class PolylineSE3: + # TODO: Implement PolylineSE3 once quaternions are used in StateSE3 + # Interpolating along SE3 states (i.e., 3D position + orientation) is meaningful, + # but more complex than SE2 due to 3D rotations (quaternions or rotation matrices). + # Linear interpolation of positions is straightforward, but orientation interpolation + # should use SLERP (spherical linear interpolation) for quaternions. + # This is commonly needed in robotics, animation, and path planning. + pass diff --git a/src/py123d/geometry/rotation.py b/src/py123d/geometry/rotation.py new file mode 100644 index 00000000..1f54431a --- /dev/null +++ b/src/py123d/geometry/rotation.py @@ -0,0 +1,240 @@ +from __future__ import annotations + +import numpy as np +import numpy.typing as npt +import pyquaternion + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import EulerAnglesIndex, QuaternionIndex +from py123d.geometry.utils.rotation_utils import ( + get_euler_array_from_quaternion_array, + get_euler_array_from_rotation_matrix, + get_quaternion_array_from_euler_array, + get_quaternion_array_from_rotation_matrix, + get_rotation_matrix_from_euler_array, + get_rotation_matrix_from_quaternion_array, +) + + +class EulerAngles(ArrayMixin): + """Class to represent 3D rotation using Euler angles (roll, pitch, yaw) in radians. + NOTE: The rotation order is intrinsic Z-Y'-X'' (yaw-pitch-roll). + See https://en.wikipedia.org/wiki/Euler_angles for more details. + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, roll: float, pitch: float, yaw: float): + """Initialize EulerAngles with roll, pitch, yaw coordinates.""" + array = np.zeros(len(EulerAnglesIndex), dtype=np.float64) + array[EulerAnglesIndex.ROLL] = roll + array[EulerAnglesIndex.PITCH] = pitch + array[EulerAnglesIndex.YAW] = yaw + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> EulerAngles: + """Constructs a EulerAngles from a numpy array. + + :param array: Array of shape (3,) representing the euler angles [roll, pitch, yaw], indexed by \ + :class:`~py123d.geometry.EulerAnglesIndex`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A EulerAngles instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(EulerAnglesIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @classmethod + def from_rotation_matrix(cls, rotation_matrix: npt.NDArray[np.float64]) -> EulerAngles: + """Constructs a EulerAngles from a 3x3 rotation matrix. + NOTE: The rotation order is intrinsic Z-Y'-X'' (yaw-pitch-roll). + + :param rotation_matrix: A 3x3 numpy array representing the rotation matrix. + :return: A EulerAngles instance. + """ + assert rotation_matrix.ndim == 2 + assert rotation_matrix.shape == (3, 3) + return EulerAngles.from_array(get_euler_array_from_rotation_matrix(rotation_matrix), copy=False) + + @property + def roll(self) -> float: + """The roll (x-axis rotation) angle in radians. + + :return: The roll angle in radians. + """ + return self._array[EulerAnglesIndex.ROLL] + + @property + def pitch(self) -> float: + """The pitch (y-axis rotation) angle in radians. + + :return: The pitch angle in radians. + """ + return self._array[EulerAnglesIndex.PITCH] + + @property + def yaw(self) -> float: + """The yaw (z-axis rotation) angle in radians. + + :return: The yaw angle in radians. + """ + return self._array[EulerAnglesIndex.YAW] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the EulerAngles instance to a numpy array. + + :return: A numpy array of shape (3,) containing the Euler angles [roll, pitch, yaw], indexed by \ + :class:`~py123d.geometry.EulerAnglesIndex`. + """ + return self._array + + @property + def quaternion(self) -> Quaternion: + return Quaternion.from_euler_angles(self) + + @property + def rotation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 3x3 rotation matrix representation of the Euler angles. + NOTE: The rotation order is intrinsic Z-Y'-X'' (yaw-pitch-roll). + + :return: A 3x3 numpy array representing the rotation matrix. + """ + return get_rotation_matrix_from_euler_array(self.array) + + def __iter__(self): + """Iterator over euler angles.""" + return iter((self.roll, self.pitch, self.yaw)) + + def __hash__(self): + """Hash function for euler angles.""" + return hash((self.roll, self.pitch, self.yaw)) + + +class Quaternion(ArrayMixin): + """ + Represents a quaternion for 3D rotations. + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, qw: float, qx: float, qy: float, qz: float): + """Initialize Quaternion with qw, qx, qy, qz components.""" + array = np.zeros(len(QuaternionIndex), dtype=np.float64) + array[QuaternionIndex.QW] = qw + array[QuaternionIndex.QX] = qx + array[QuaternionIndex.QY] = qy + array[QuaternionIndex.QZ] = qz + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, arr: npt.NDArray[np.float64], copy: bool = True) -> Quaternion: + """Constructs a Quaternion from a numpy array. + + :param arr: A 1D numpy array of shape (4,) containing the quaternion components [qw, qx, qy, qz]. + :param copy: Whether to copy the array data, defaults to True. + :return: A Quaternion instance. + """ + assert arr.ndim == 1 + assert arr.shape[0] == len(QuaternionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", arr.copy() if copy else arr) + return instance + + @classmethod + def from_rotation_matrix(cls, rotation_matrix: npt.NDArray[np.float64]) -> Quaternion: + """Constructs a Quaternion from a 3x3 rotation matrix. + + :param rotation_matrix: A 3x3 numpy array representing the rotation matrix. + :return: A Quaternion instance. + """ + assert rotation_matrix.ndim == 2 + assert rotation_matrix.shape == (3, 3) + return Quaternion.from_array(get_quaternion_array_from_rotation_matrix(rotation_matrix), copy=False) + + @classmethod + def from_euler_angles(cls, euler_angles: EulerAngles) -> Quaternion: + """Constructs a Quaternion from Euler angles. + NOTE: The rotation order is intrinsic Z-Y'-X'' (yaw-pitch-roll). + + :param euler_angles: An EulerAngles instance representing the Euler angles. + :return: A Quaternion instance. + """ + return Quaternion.from_array(get_quaternion_array_from_euler_array(euler_angles.array), copy=False) + + @property + def qw(self) -> float: + """The scalar part of the quaternion. + + :return: The qw component. + """ + return self._array[QuaternionIndex.QW] + + @property + def qx(self) -> float: + """The x component of the quaternion. + + :return: The qx component. + """ + return self._array[QuaternionIndex.QX] + + @property + def qy(self) -> float: + """The y component of the quaternion. + + :return: The qy component. + """ + return self._array[QuaternionIndex.QY] + + @property + def qz(self) -> float: + """The z component of the quaternion. + + :return: The qz component. + """ + return self._array[QuaternionIndex.QZ] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the Quaternion instance to a numpy array. + + :return: A numpy array of shape (4,) containing the quaternion [qw, qx, qy, qz], indexed by \ + :class:`~py123d.geometry.QuaternionIndex`. + """ + return self._array + + @property + def pyquaternion(self) -> pyquaternion.Quaternion: + """Returns the pyquaternion.Quaternion representation of the quaternion. + + :return: A pyquaternion.Quaternion representation of the quaternion. + """ + return pyquaternion.Quaternion(array=self.array) + + @property + def euler_angles(self) -> EulerAngles: + """Returns the Euler angles (roll, pitch, yaw) representation of the quaternion. + NOTE: The rotation order is intrinsic Z-Y'-X'' (yaw-pitch-roll). + + :return: An EulerAngles instance representing the Euler angles. + """ + return EulerAngles.from_array(get_euler_array_from_quaternion_array(self.array), copy=False) + + @property + def rotation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 3x3 rotation matrix representation of the quaternion. + + :return: A 3x3 numpy array representing the rotation matrix. + """ + return get_rotation_matrix_from_quaternion_array(self.array) + + def __iter__(self): + """Iterator over quaternion components.""" + return iter((self.qw, self.qx, self.qy, self.qz)) + + def __hash__(self): + """Hash function for quaternion.""" + return hash((self.qw, self.qx, self.qy, self.qz)) diff --git a/src/py123d/geometry/se.py b/src/py123d/geometry/se.py new file mode 100644 index 00000000..b8b30cc8 --- /dev/null +++ b/src/py123d/geometry/se.py @@ -0,0 +1,495 @@ +from __future__ import annotations + +from typing import Iterable + +import numpy as np +import numpy.typing as npt +import shapely.geometry as geom + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import EulerStateSE3Index, Point3DIndex, StateSE2Index, StateSE3Index +from py123d.geometry.point import Point2D, Point3D +from py123d.geometry.rotation import EulerAngles, Quaternion + + +class StateSE2(ArrayMixin): + """Class to represents a 2D pose as SE2 (x, y, yaw).""" + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float, yaw: float): + """Initialize StateSE2 with x, y, yaw coordinates.""" + array = np.zeros(len(StateSE2Index), dtype=np.float64) + array[StateSE2Index.X] = x + array[StateSE2Index.Y] = y + array[StateSE2Index.YAW] = yaw + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> StateSE2: + """Constructs a StateSE2 from a numpy array. + + :param array: Array of shape (3,) representing the state [x, y, yaw], indexed by \ + :class:`~py123d.geometry.geometry_index.StateSE2Index`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A StateSE2 instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(StateSE2Index) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def x(self) -> float: + return self._array[StateSE2Index.X] + + @property + def y(self) -> float: + return self._array[StateSE2Index.Y] + + @property + def yaw(self) -> float: + return self._array[StateSE2Index.YAW] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the StateSE2 instance to a numpy array + + :return: A numpy array of shape (3,) containing the state, indexed by \ + :class:`~py123d.geometry.geometry_index.StateSE2Index`. + """ + return self._array + + @property + def state_se2(self) -> StateSE2: + """The 2D pose itself. Helpful for polymorphism. + + :return: A StateSE2 instance representing the 2D pose. + """ + return self + + @property + def point_2d(self) -> Point2D: + """The 2D projection of the 2D pose. + + :return: A Point2D instance representing the 2D projection of the 2D pose. + """ + return Point2D.from_array(self.array[StateSE2Index.XY]) + + @property + def rotation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 2x2 rotation matrix representation of the state's orientation. + + :return: A 2x2 numpy array representing the rotation matrix. + """ + cos_yaw = np.cos(self.yaw) + sin_yaw = np.sin(self.yaw) + return np.array([[cos_yaw, -sin_yaw], [sin_yaw, cos_yaw]], dtype=np.float64) + + @property + def transformation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 3x3 transformation matrix representation of the state. + + :return: A 3x3 numpy array representing the transformation matrix. + """ + matrix = np.zeros((3, 3), dtype=np.float64) + matrix[:2, :2] = self.rotation_matrix + matrix[0, 2] = self.x + matrix[1, 2] = self.y + return matrix + + @property + def shapely_point(self) -> geom.Point: + return geom.Point(self.x, self.y) + + +class StateSE3(ArrayMixin): + """Class representing a quaternion in SE3 space.""" + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float, z: float, qw: float, qx: float, qy: float, qz: float): + """Initialize QuaternionSE3 with x, y, z, qw, qx, qy, qz coordinates.""" + array = np.zeros(len(StateSE3Index), dtype=np.float64) + array[StateSE3Index.X] = x + array[StateSE3Index.Y] = y + array[StateSE3Index.Z] = z + array[StateSE3Index.QW] = qw + array[StateSE3Index.QX] = qx + array[StateSE3Index.QY] = qy + array[StateSE3Index.QZ] = qz + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> StateSE3: + """Constructs a QuaternionSE3 from a numpy array. + + :param array: Array of shape (7,), indexed by :class:`~py123d.geometry.geometry_index.QuaternionSE3Index`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A QuaternionSE3 instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(StateSE3Index) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @classmethod + def from_transformation_matrix(cls, transformation_matrix: npt.NDArray[np.float64]) -> StateSE3: + """Constructs a StateSE3 from a 4x4 transformation matrix. + + :param transformation_matrix: A 4x4 numpy array representing the transformation matrix. + :return: A StateSE3 instance. + """ + assert transformation_matrix.ndim == 2 + assert transformation_matrix.shape == (4, 4) + array = np.zeros(len(StateSE3Index), dtype=np.float64) + array[StateSE3Index.XYZ] = transformation_matrix[:3, 3] + array[StateSE3Index.QUATERNION] = Quaternion.from_rotation_matrix(transformation_matrix[:3, :3]) + return StateSE3.from_array(array, copy=False) + + @property + def x(self) -> float: + """Returns the x-coordinate of the quaternion. + + :return: The x-coordinate. + """ + return self._array[StateSE3Index.X] + + @property + def y(self) -> float: + """Returns the y-coordinate of the quaternion. + + :return: The y-coordinate. + """ + return self._array[StateSE3Index.Y] + + @property + def z(self) -> float: + """Returns the z-coordinate of the quaternion. + + :return: The z-coordinate. + """ + return self._array[StateSE3Index.Z] + + @property + def qw(self) -> float: + """Returns the w-coordinate of the quaternion. + + :return: The w-coordinate. + """ + return self._array[StateSE3Index.QW] + + @property + def qx(self) -> float: + """Returns the x-coordinate of the quaternion. + + :return: The x-coordinate. + """ + return self._array[StateSE3Index.QX] + + @property + def qy(self) -> float: + """Returns the y-coordinate of the quaternion. + + :return: The y-coordinate. + """ + return self._array[StateSE3Index.QY] + + @property + def qz(self) -> float: + """Returns the z-coordinate of the quaternion. + + :return: The z-coordinate. + """ + return self._array[StateSE3Index.QZ] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Converts the QuaternionSE3 instance to a numpy array. + + :return: A numpy array of shape (7,), indexed by :class:`~py123d.geometry.geometry_index.QuaternionSE3Index`. + """ + return self._array + + @property + def state_se2(self) -> StateSE2: + """Returns the quaternion state as a 2D state by ignoring the z-axis. + + :return: A StateSE2 instance representing the 2D projection of the 3D state. + """ + # Convert quaternion to yaw angle + yaw = self.quaternion.euler_angles.yaw + return StateSE2(self.x, self.y, yaw) + + @property + def point_3d(self) -> Point3D: + """Returns the 3D point representation of the state. + + :return: A Point3D instance representing the 3D point. + """ + return Point3D(self.x, self.y, self.z) + + @property + def point_2d(self) -> Point2D: + """Returns the 2D point representation of the state. + + :return: A Point2D instance representing the 2D point. + """ + return Point2D(self.x, self.y) + + @property + def shapely_point(self) -> geom.Point: + """Returns the Shapely point representation of the state. + + :return: A Shapely Point instance representing the 3D point. + """ + return self.point_3d.shapely_point + + @property + def quaternion(self) -> Quaternion: + """Returns the quaternion (w, x, y, z) representation of the state's orientation. + + :return: A Quaternion instance representing the quaternion. + """ + return Quaternion.from_array(self.array[StateSE3Index.QUATERNION]) + + @property + def euler_angles(self) -> EulerAngles: + """Returns the Euler angles (roll, pitch, yaw) representation of the state's orientation. + + :return: An EulerAngles instance representing the Euler angles. + """ + return self.quaternion.euler_angles + + @property + def roll(self) -> float: + """The roll (x-axis rotation) angle in radians. + + :return: The roll angle in radians. + """ + return self.euler_angles.roll + + @property + def pitch(self) -> float: + """The pitch (y-axis rotation) angle in radians. + + :return: The pitch angle in radians. + """ + return self.euler_angles.pitch + + @property + def yaw(self) -> float: + """The yaw (z-axis rotation) angle in radians. + + :return: The yaw angle in radians. + """ + return self.euler_angles.yaw + + @property + def rotation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 3x3 rotation matrix representation of the state's orientation. + + :return: A 3x3 numpy array representing the rotation matrix. + """ + return self.quaternion.rotation_matrix + + @property + def transformation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 4x4 transformation matrix representation of the state. + + :return: A 4x4 numpy array representing the transformation matrix. + """ + transformation_matrix = np.eye(4, dtype=np.float64) + transformation_matrix[:3, :3] = self.rotation_matrix + transformation_matrix[:3, 3] = self.array[StateSE3Index.XYZ] + return transformation_matrix + + +class EulerStateSE3(ArrayMixin): + """ + Class to represents a 3D pose as SE3 (x, y, z, roll, pitch, yaw). + NOTE: This class is deprecated, use :class:`~py123d.geometry.StateSE3` instead (quaternion based). + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float, z: float, roll: float, pitch: float, yaw: float): + """Initialize StateSE3 with x, y, z, roll, pitch, yaw coordinates.""" + array = np.zeros(len(EulerStateSE3Index), dtype=np.float64) + array[EulerStateSE3Index.X] = x + array[EulerStateSE3Index.Y] = y + array[EulerStateSE3Index.Z] = z + array[EulerStateSE3Index.ROLL] = roll + array[EulerStateSE3Index.PITCH] = pitch + array[EulerStateSE3Index.YAW] = yaw + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> EulerStateSE3: + """Constructs a StateSE3 from a numpy array. + + :param array: Array of shape (6,) representing the state [x, y, z, roll, pitch, yaw], indexed by \ + :class:`~py123d.geometry.geometry_index.StateSE3Index`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A StateSE3 instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(EulerStateSE3Index) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @classmethod + def from_transformation_matrix(cls, transformation_matrix: npt.NDArray[np.float64]) -> EulerStateSE3: + """Constructs a EulerStateSE3 from a 4x4 transformation matrix. + + :param array: A 4x4 numpy array representing the transformation matrix. + :return: A EulerStateSE3 instance. + """ + assert transformation_matrix.ndim == 2 + assert transformation_matrix.shape == (4, 4) + translation = transformation_matrix[:3, 3] + rotation = transformation_matrix[:3, :3] + roll, pitch, yaw = EulerAngles.from_rotation_matrix(rotation) + return EulerStateSE3( + x=translation[Point3DIndex.X], + y=translation[Point3DIndex.Y], + z=translation[Point3DIndex.Z], + roll=roll, + pitch=pitch, + yaw=yaw, + ) + + @property + def x(self) -> float: + """Returns the x-coordinate of the 3D state. + + :return: The x-coordinate. + """ + return self._array[EulerStateSE3Index.X] + + @property + def y(self) -> float: + """Returns the y-coordinate of the 3D state. + + :return: The y-coordinate. + """ + return self._array[EulerStateSE3Index.Y] + + @property + def z(self) -> float: + """Returns the z-coordinate of the 3D state. + + :return: The z-coordinate. + """ + return self._array[EulerStateSE3Index.Z] + + @property + def roll(self) -> float: + """Returns the roll (x-axis rotation) of the 3D state. + + :return: The roll angle. + """ + return self._array[EulerStateSE3Index.ROLL] + + @property + def pitch(self) -> float: + """Returns the pitch (y-axis rotation) of the 3D state. + + :return: The pitch angle. + """ + return self._array[EulerStateSE3Index.PITCH] + + @property + def yaw(self) -> float: + """Returns the yaw (z-axis rotation) of the 3D state. + + :return: The yaw angle. + """ + return self._array[EulerStateSE3Index.YAW] + + @property + def array(self) -> npt.NDArray[np.float64]: + """Returns the StateSE3 instance as a numpy array. + + :return: A numpy array of shape (6,), indexed by \ + :class:`~py123d.geometry.geometry_index.StateSE3Index`. + """ + return self._array + + @property + def state_se2(self) -> StateSE2: + """Returns the 3D state as a 2D state by ignoring the z-axis. + + :return: A StateSE2 instance representing the 2D projection of the 3D state. + """ + return StateSE2(self.x, self.y, self.yaw) + + @property + def point_3d(self) -> Point3D: + """Returns the 3D point representation of the state. + + :return: A Point3D instance representing the 3D point. + """ + return Point3D(self.x, self.y, self.z) + + @property + def point_2d(self) -> Point2D: + """Returns the 2D point representation of the state. + + :return: A Point2D instance representing the 2D point. + """ + return Point2D(self.x, self.y) + + @property + def shapely_point(self) -> geom.Point: + """Returns the Shapely point representation of the state. + + :return: A Shapely Point instance representing the 3D point. + """ + return self.point_3d.shapely_point + + @property + def rotation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 3x3 rotation matrix representation of the state's orientation. + + :return: A 3x3 numpy array representing the rotation matrix. + """ + return self.euler_angles.rotation_matrix + + @property + def transformation_matrix(self) -> npt.NDArray[np.float64]: + """Returns the 4x4 transformation matrix representation of the state. + + :return: A 4x4 numpy array representing the transformation matrix. + """ + rotation_matrix = self.rotation_matrix + transformation_matrix = np.eye(4, dtype=np.float64) + transformation_matrix[:3, :3] = rotation_matrix + transformation_matrix[:3, 3] = self.array[EulerStateSE3Index.XYZ] + return transformation_matrix + + @property + def euler_angles(self) -> EulerAngles: + return EulerAngles.from_array(self.array[EulerStateSE3Index.EULER_ANGLES]) + + @property + def state_se3(self) -> StateSE3: + quaternion_se3_array = np.zeros(len(StateSE3Index), dtype=np.float64) + quaternion_se3_array[StateSE3Index.XYZ] = self.array[EulerStateSE3Index.XYZ] + quaternion_se3_array[StateSE3Index.QUATERNION] = Quaternion.from_euler_angles(self.euler_angles) + return StateSE3.from_array(quaternion_se3_array, copy=False) + + @property + def quaternion(self) -> Quaternion: + return Quaternion.from_euler_angles(self.euler_angles) + + def __iter__(self) -> Iterable[float]: + """Iterator over the state coordinates (x, y, z, roll, pitch, yaw).""" + return iter((self.x, self.y, self.z, self.roll, self.pitch, self.yaw)) + + def __hash__(self) -> int: + """Hash method""" + return hash((self.x, self.y, self.z, self.roll, self.pitch, self.yaw)) diff --git a/d123/common/visualization/bokeh/.gitkeep b/src/py123d/geometry/torch/.gitkeep similarity index 100% rename from d123/common/visualization/bokeh/.gitkeep rename to src/py123d/geometry/torch/.gitkeep diff --git a/src/py123d/geometry/transform/__init__.py b/src/py123d/geometry/transform/__init__.py new file mode 100644 index 00000000..cf22b848 --- /dev/null +++ b/src/py123d/geometry/transform/__init__.py @@ -0,0 +1,24 @@ +from py123d.geometry.transform.transform_se2 import ( + convert_absolute_to_relative_point_2d_array, + convert_absolute_to_relative_se2_array, + convert_relative_to_absolute_point_2d_array, + convert_relative_to_absolute_se2_array, + translate_se2_along_body_frame, + translate_se2_along_x, + translate_se2_along_y, + translate_se2_array_along_body_frame, + translate_2d_along_body_frame, +) +from py123d.geometry.transform.transform_se3 import ( + convert_absolute_to_relative_points_3d_array, + convert_absolute_to_relative_se3_array, + convert_relative_to_absolute_points_3d_array, + convert_points_3d_array_between_origins, + convert_relative_to_absolute_se3_array, + convert_se3_array_between_origins, + translate_se3_along_body_frame, + translate_se3_along_x, + translate_se3_along_y, + translate_se3_along_z, + translate_3d_along_body_frame, +) diff --git a/src/py123d/geometry/transform/transform_euler_se3.py b/src/py123d/geometry/transform/transform_euler_se3.py new file mode 100644 index 00000000..398b2af4 --- /dev/null +++ b/src/py123d/geometry/transform/transform_euler_se3.py @@ -0,0 +1,170 @@ +from typing import Union + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import EulerAngles, EulerStateSE3, EulerStateSE3Index, Point3DIndex, Vector3D, Vector3DIndex +from py123d.geometry.utils.rotation_utils import ( + get_euler_array_from_rotation_matrices, + get_rotation_matrices_from_euler_array, + get_rotation_matrix_from_euler_array, +) + + +def translate_euler_se3_along_z(state_se3: EulerStateSE3, distance: float) -> EulerStateSE3: + + R = state_se3.rotation_matrix + z_axis = R[:, 2] + + state_se3_array = state_se3.array.copy() + state_se3_array[EulerStateSE3Index.XYZ] += distance * z_axis[Vector3DIndex.XYZ] + return EulerStateSE3.from_array(state_se3_array, copy=False) + + +def translate_euler_se3_along_y(state_se3: EulerStateSE3, distance: float) -> EulerStateSE3: + + R = state_se3.rotation_matrix + y_axis = R[:, 1] + + state_se3_array = state_se3.array.copy() + state_se3_array[EulerStateSE3Index.XYZ] += distance * y_axis[Vector3DIndex.XYZ] + return EulerStateSE3.from_array(state_se3_array, copy=False) + + +def translate_euler_se3_along_x(state_se3: EulerStateSE3, distance: float) -> EulerStateSE3: + + R = state_se3.rotation_matrix + x_axis = R[:, 0] + + state_se3_array = state_se3.array.copy() + state_se3_array[EulerStateSE3Index.XYZ] += distance * x_axis[Vector3DIndex.XYZ] + return EulerStateSE3.from_array(state_se3_array, copy=False) + + +def translate_euler_se3_along_body_frame(state_se3: EulerStateSE3, vector_3d: Vector3D) -> EulerStateSE3: + + R = state_se3.rotation_matrix + world_translation = R @ vector_3d.array + + state_se3_array = state_se3.array.copy() + state_se3_array[EulerStateSE3Index.XYZ] += world_translation[Vector3DIndex.XYZ] + return EulerStateSE3.from_array(state_se3_array, copy=False) + + +def convert_absolute_to_relative_euler_se3_array( + origin: Union[EulerStateSE3, npt.NDArray[np.float64]], se3_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + + if isinstance(origin, EulerStateSE3): + origin_array = origin.array + t_origin = origin.point_3d.array + R_origin = origin.rotation_matrix + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(EulerStateSE3Index) + origin_array = origin + t_origin = origin_array[EulerStateSE3Index.XYZ] + R_origin = get_rotation_matrix_from_euler_array(origin_array[EulerStateSE3Index.EULER_ANGLES]) + else: + raise TypeError(f"Expected StateSE3 or np.ndarray, got {type(origin)}") + + assert se3_array.ndim >= 1 + assert se3_array.shape[-1] == len(EulerStateSE3Index) + + # Prepare output array + rel_se3_array = se3_array.copy() + + # Vectorized relative position calculation + abs_positions = se3_array[..., EulerStateSE3Index.XYZ] + rel_positions = (abs_positions - t_origin) @ R_origin + rel_se3_array[..., EulerStateSE3Index.XYZ] = rel_positions + + # Convert absolute rotation matrices to relative rotation matrices + abs_rotation_matrices = get_rotation_matrices_from_euler_array(se3_array[..., EulerStateSE3Index.EULER_ANGLES]) + rel_rotation_matrices = np.einsum("ij,...jk->...ik", R_origin.T, abs_rotation_matrices) + + if se3_array.shape[0] != 0: + # rel_euler_angles = np.array([EulerAngles.from_rotation_matrix(R).array for R in rel_rotation_matrices]) + # rel_se3_array[..., EulerStateSE3Index.EULER_ANGLES] = normalize_angle(rel_euler_angles) + rel_se3_array[..., EulerStateSE3Index.EULER_ANGLES] = get_euler_array_from_rotation_matrices( + rel_rotation_matrices + ) + + return rel_se3_array + + +def convert_relative_to_absolute_euler_se3_array( + origin: EulerStateSE3, se3_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + + if isinstance(origin, EulerStateSE3): + origin_array = origin.array + t_origin = origin.point_3d.array + R_origin = origin.rotation_matrix + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(EulerStateSE3Index) + origin_array = origin + t_origin = origin_array[EulerStateSE3Index.XYZ] + R_origin = get_rotation_matrix_from_euler_array(origin_array[EulerStateSE3Index.EULER_ANGLES]) + else: + raise TypeError(f"Expected StateSE3 or np.ndarray, got {type(origin)}") + + assert se3_array.ndim >= 1 + assert se3_array.shape[-1] == len(EulerStateSE3Index) + + # Prepare output array + abs_se3_array = se3_array.copy() + + # Vectorized absolute position calculation: rotate and translate + rel_positions = se3_array[..., EulerStateSE3Index.XYZ] + abs_positions = (rel_positions @ R_origin.T) + t_origin + abs_se3_array[..., EulerStateSE3Index.XYZ] = abs_positions + + # Convert relative rotation matrices to absolute rotation matrices + rel_rotation_matrices = get_rotation_matrices_from_euler_array(se3_array[..., EulerStateSE3Index.EULER_ANGLES]) + abs_rotation_matrices = np.einsum("ij,...jk->...ik", R_origin, rel_rotation_matrices) + if se3_array.shape[0] != 0: + abs_se3_array[..., EulerStateSE3Index.EULER_ANGLES] = get_euler_array_from_rotation_matrices( + abs_rotation_matrices + ) + return abs_se3_array + + +def convert_absolute_to_relative_points_3d_array( + origin: Union[EulerStateSE3, npt.NDArray[np.float64]], points_3d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + + if isinstance(origin, EulerStateSE3): + t_origin = origin.point_3d.array + R_origin = origin.rotation_matrix + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(EulerStateSE3Index) + t_origin = origin[EulerStateSE3Index.XYZ] + R_origin = get_rotation_matrix_from_euler_array(origin[EulerStateSE3Index.EULER_ANGLES]) + else: + raise TypeError(f"Expected StateSE3 or np.ndarray, got {type(origin)}") + + assert points_3d_array.ndim >= 1 + assert points_3d_array.shape[-1] == len(Point3DIndex) + + # Translate points to origin frame, then rotate to body frame + relative_points = (points_3d_array - t_origin) @ R_origin + return relative_points + + +def convert_relative_to_absolute_points_3d_array( + origin: Union[EulerStateSE3, npt.NDArray[np.float64]], points_3d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + + if isinstance(origin, EulerStateSE3): + origin_array = origin.array + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(EulerStateSE3Index) + origin_array = origin + else: + raise TypeError(f"Expected EulerStateSE3 or np.ndarray, got {type(origin)}") + + assert points_3d_array.shape[-1] == len(Point3DIndex) + + R = EulerAngles.from_array(origin_array[EulerStateSE3Index.EULER_ANGLES]).rotation_matrix + absolute_points = points_3d_array @ R.T + origin.point_3d.array + return absolute_points diff --git a/src/py123d/geometry/transform/transform_se2.py b/src/py123d/geometry/transform/transform_se2.py new file mode 100644 index 00000000..0ee2d5b5 --- /dev/null +++ b/src/py123d/geometry/transform/transform_se2.py @@ -0,0 +1,206 @@ +from typing import Union + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import Point2DIndex, StateSE2, StateSE2Index, Vector2D, Vector2DIndex +from py123d.geometry.utils.rotation_utils import normalize_angle + + +def convert_absolute_to_relative_se2_array( + origin: Union[StateSE2, npt.NDArray[np.float64]], state_se2_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts an StateSE2 array from global to relative coordinates. + + :param origin: origin pose of relative coords system + :param state_se2_array: array of SE2 states with (x,y,yaw), indexed by \ + :class:`~py123d.geometry.geometry_index.StateSE2Index`, in last dim + :return: SE2 array, index by \ + :class:`~py123d.geometry.geometry_index.StateSE2Index`, in last dim + """ + if isinstance(origin, StateSE2): + origin_array = origin.array + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) + origin_array = origin + else: + raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") + + assert len(StateSE2Index) == state_se2_array.shape[-1] + + rotate_rad = -origin_array[StateSE2Index.YAW] + cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) + R_inv = np.array([[cos, -sin], [sin, cos]]) + + state_se2_rel = state_se2_array - origin_array + state_se2_rel[..., StateSE2Index.XY] = state_se2_rel[..., StateSE2Index.XY] @ R_inv.T + state_se2_rel[..., StateSE2Index.YAW] = normalize_angle(state_se2_rel[..., StateSE2Index.YAW]) + + return state_se2_rel + + +def convert_relative_to_absolute_se2_array( + origin: Union[StateSE2, npt.NDArray[np.float64]], state_se2_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """ + Converts an StateSE2 array from global to relative coordinates. + :param origin: origin pose of relative coords system + :param state_se2_array: array of SE2 states with (x,y,θ) in last dim + :return: SE2 coords array in relative coordinates + """ + if isinstance(origin, StateSE2): + origin_array = origin.array + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) + origin_array = origin + else: + raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") + + assert len(StateSE2Index) == state_se2_array.shape[-1] + + rotate_rad = origin_array[StateSE2Index.YAW] + cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) + R = np.array([[cos, -sin], [sin, cos]]) + + state_se2_abs = np.zeros_like(state_se2_array, dtype=np.float64) + state_se2_abs[..., StateSE2Index.XY] = state_se2_array[..., StateSE2Index.XY] @ R.T + state_se2_abs[..., StateSE2Index.XY] += origin_array[..., StateSE2Index.XY] + state_se2_abs[..., StateSE2Index.YAW] = normalize_angle( + state_se2_array[..., StateSE2Index.YAW] + origin_array[..., StateSE2Index.YAW] + ) + + return state_se2_abs + + +def convert_absolute_to_relative_point_2d_array( + origin: Union[StateSE2, npt.NDArray[np.float64]], point_2d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts an absolute 2D point array from global to relative coordinates. + + :param origin: origin pose of relative coords system + :param point_2d_array: array of 2D points with (x,y) in last dim + :return: 2D points array in relative coordinates + """ + if isinstance(origin, StateSE2): + origin_array = origin.array + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) + origin_array = origin + else: + raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") + + rotate_rad = -origin_array[StateSE2Index.YAW] + cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) + R = np.array([[cos, -sin], [sin, cos]], dtype=np.float64) + + point_2d_rel = point_2d_array - origin_array[..., StateSE2Index.XY] + point_2d_rel = point_2d_rel @ R.T + + return point_2d_rel + + +def convert_relative_to_absolute_point_2d_array( + origin: Union[StateSE2, npt.NDArray[np.float64]], point_2d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + + if isinstance(origin, StateSE2): + origin_array = origin.array + elif isinstance(origin, np.ndarray): + assert origin.ndim == 1 and origin.shape[-1] == len(StateSE2Index) + origin_array = origin + else: + raise TypeError(f"Expected StateSE2 or np.ndarray, got {type(origin)}") + + rotate_rad = origin_array[StateSE2Index.YAW] + cos, sin = np.cos(rotate_rad), np.sin(rotate_rad) + R = np.array([[cos, -sin], [sin, cos]], dtype=np.float64) + + point_2d_abs = point_2d_array @ R.T + point_2d_abs = point_2d_abs + origin_array[..., StateSE2Index.XY] + + return point_2d_abs + + +def translate_se2_array_along_body_frame( + state_se2_array: npt.NDArray[np.float64], translation: Vector2D +) -> npt.NDArray[np.float64]: + """Translate an array of SE2 states along their respective local coordinate frames. + + :param state_se2_array: array of SE2 states with (x,y,yaw) in last dim + :param translation: 2D translation in local frame (x: forward, y: left) + :return: translated SE2 array + """ + assert len(StateSE2Index) == state_se2_array.shape[-1] + result = state_se2_array.copy() + yaws = state_se2_array[..., StateSE2Index.YAW] + cos_yaws, sin_yaws = np.cos(yaws), np.sin(yaws) + + # Transform translation from local to global frame for each state + # Create rotation matrices for each state + R = np.stack([cos_yaws, -sin_yaws, sin_yaws, cos_yaws], axis=-1).reshape(*cos_yaws.shape, 2, 2) + + # Transform translation vector from local to global frame + translation_vector = translation.array[Vector2DIndex.XY] # [x, y] + global_translation = np.einsum("...ij,...j->...i", R, translation_vector) + + result[..., StateSE2Index.XY] += global_translation + + return result + + +def translate_se2_along_body_frame(state_se2: StateSE2, translation: Vector2D) -> StateSE2: + """Translate a single SE2 state along its local coordinate frame. + + :param state_se2: SE2 state to translate + :param translation: 2D translation in local frame (x: forward, y: left) + :return: translated SE2 state + """ + return StateSE2.from_array(translate_se2_array_along_body_frame(state_se2.array, translation), copy=False) + + +def translate_se2_along_x(state_se2: StateSE2, distance: float) -> StateSE2: + """Translate a single SE2 state along its local X-axis. + + :param state_se2: SE2 state to translate + :param distance: distance to translate along the local X-axis + :return: translated SE2 state + """ + translation = Vector2D.from_array(np.array([distance, 0.0], dtype=np.float64)) + return StateSE2.from_array(translate_se2_array_along_body_frame(state_se2.array, translation), copy=False) + + +def translate_se2_along_y(state_se2: StateSE2, distance: float) -> StateSE2: + """Translate a single SE2 state along its local Y-axis. + + :param state_se2: SE2 state to translate + :param distance: distance to translate along the local Y-axis + :return: translated SE2 state + """ + translation = Vector2D.from_array(np.array([0.0, distance], dtype=np.float64)) + return StateSE2.from_array(translate_se2_array_along_body_frame(state_se2.array, translation), copy=False) + + +def translate_2d_along_body_frame( + points_2d: npt.NDArray[np.float64], + yaws: npt.NDArray[np.float64], + x_translate: npt.NDArray[np.float64], + y_translate: npt.NDArray[np.float64], +) -> npt.NDArray[np.float64]: + """Translate 2D points along their body frame. + + :param points_2d: Array of 2D points, indexed by :class:`~py123d.geometry.Point2DIndex`. + :param yaws: Array of yaw angles. + :param x_translate: Array of x translation, i.e. forward translation. + :param y_translate: Array of y translation, i.e. left translation. + :return: Array of translated 2D points, indexed by :class:`~py123d.geometry.Point2DIndex`. + """ + assert points_2d.shape[-1] == len(Point2DIndex) + half_pi = np.pi / 2.0 + translation: npt.NDArray[np.float64] = np.stack( + [ + (y_translate * np.cos(yaws + half_pi)) + (x_translate * np.cos(yaws)), + (y_translate * np.sin(yaws + half_pi)) + (x_translate * np.sin(yaws)), + ], + axis=-1, + ) + return points_2d + translation diff --git a/src/py123d/geometry/transform/transform_se3.py b/src/py123d/geometry/transform/transform_se3.py new file mode 100644 index 00000000..8f394772 --- /dev/null +++ b/src/py123d/geometry/transform/transform_se3.py @@ -0,0 +1,294 @@ +from typing import Tuple, Union + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import Point3DIndex, QuaternionIndex, StateSE3, StateSE3Index, Vector3D, Vector3DIndex +from py123d.geometry.utils.rotation_utils import ( + conjugate_quaternion_array, + get_rotation_matrices_from_quaternion_array, + get_rotation_matrix_from_quaternion_array, + multiply_quaternion_arrays, +) + + +def _extract_rotation_translation_pose_arrays( + pose: Union[StateSE3, npt.NDArray[np.float64]], +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], npt.NDArray[np.float64]]: + """Helper function to extract rotation matrix and translation vector from a StateSE3 or np.ndarray. + + :param pose: A StateSE3 pose or np.ndarray, indexed by :class:`~py123d.geometry.StateSE3Index`. + :raises TypeError: If the pose is not a StateSE3 or np.ndarray. + :return: A tuple containing the rotation matrix, translation vector, and pose array. + """ + if isinstance(pose, StateSE3): + translation = pose.point_3d.array + rotation = pose.rotation_matrix + pose_array = pose.array + elif isinstance(pose, np.ndarray): + assert pose.ndim == 1 and pose.shape[-1] == len(StateSE3Index) + translation = pose[StateSE3Index.XYZ] + rotation = get_rotation_matrix_from_quaternion_array(pose[StateSE3Index.QUATERNION]) + pose_array = pose + else: + raise TypeError(f"Expected StateSE3 or np.ndarray, got {type(pose)}") + + return rotation, translation, pose_array + + +def convert_absolute_to_relative_points_3d_array( + origin: Union[StateSE3, npt.NDArray[np.float64]], points_3d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts 3D points from the absolute frame to the relative frame. + + :param origin: The origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param points_3d_array: The 3D points in the absolute frame. + :raises TypeError: If the origin is not a StateSE3 or np.ndarray. + :return: The 3D points in the relative frame, indexed by :class:`~py123d.geometry.Point3DIndex`. + """ + + R_origin, t_origin, _ = _extract_rotation_translation_pose_arrays(origin) + + assert points_3d_array.ndim >= 1 + assert points_3d_array.shape[-1] == len(Point3DIndex) + + # Translate points to origin frame, then rotate to body frame + relative_points = (points_3d_array - t_origin) @ R_origin + return relative_points + + +def convert_absolute_to_relative_se3_array( + origin: Union[StateSE3, npt.NDArray[np.float64]], se3_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts an SE3 array from the absolute frame to the relative frame. + + :param origin: The origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param se3_array: The SE3 array in the absolute frame. + :raises TypeError: If the origin is not a StateSE3 or np.ndarray. + :return: The SE3 array in the relative frame, indexed by :class:`~py123d.geometry.StateSE3Index`. + """ + R_origin, t_origin, origin_array = _extract_rotation_translation_pose_arrays(origin) + + assert se3_array.ndim >= 1 + assert se3_array.shape[-1] == len(StateSE3Index) + + abs_positions = se3_array[..., StateSE3Index.XYZ] + abs_quaternions = se3_array[..., StateSE3Index.QUATERNION] + + rel_se3_array = np.zeros_like(se3_array) + + # 1. Vectorized relative position calculation: translate and rotate + rel_positions = (abs_positions - t_origin) @ R_origin + rel_se3_array[..., StateSE3Index.XYZ] = rel_positions + + # 2. Vectorized relative orientation calculation: quaternion multiplication with conjugate + q_origin_conj = conjugate_quaternion_array(origin_array[StateSE3Index.QUATERNION]) + rel_quaternions = multiply_quaternion_arrays(q_origin_conj, abs_quaternions) + + rel_se3_array[..., StateSE3Index.QUATERNION] = rel_quaternions + + return rel_se3_array + + +def convert_relative_to_absolute_points_3d_array( + origin: Union[StateSE3, npt.NDArray[np.float64]], points_3d_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts 3D points from the relative frame to the absolute frame. + + :param origin: The origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param points_3d_array: The 3D points in the relative frame, indexed by :class:`~py123d.geometry.Point3DIndex`. + :raises TypeError: If the origin is not a StateSE3 or np.ndarray. + :return: The 3D points in the absolute frame, indexed by :class:`~py123d.geometry.Point3DIndex`. + """ + R_origin, t_origin, _ = _extract_rotation_translation_pose_arrays(origin) + + assert points_3d_array.shape[-1] == len(Point3DIndex) + + absolute_points = points_3d_array @ R_origin.T + t_origin + return absolute_points + + +def convert_relative_to_absolute_se3_array( + origin: StateSE3, se3_array: npt.NDArray[np.float64] +) -> npt.NDArray[np.float64]: + """Converts an SE3 array from the relative frame to the absolute frame. + + :param origin: The origin state in the relative frame, as a StateSE3 or np.ndarray. + :param se3_array: The SE3 array in the relative frame. + :raises TypeError: If the origin is not a StateSE3 or np.ndarray. + :return: The SE3 array in the absolute frame, indexed by :class:`~py123d.geometry.StateSE3Index`. + """ + + R_origin, t_origin, origin_array = _extract_rotation_translation_pose_arrays(origin) + + assert se3_array.ndim >= 1 + assert se3_array.shape[-1] == len(StateSE3Index) + + # Extract relative positions and orientations + rel_positions = se3_array[..., StateSE3Index.XYZ] + rel_quaternions = se3_array[..., StateSE3Index.QUATERNION] + + # Vectorized absolute position calculation: rotate and translate + abs_positions = (R_origin @ rel_positions.T).T + t_origin + abs_quaternions = multiply_quaternion_arrays(origin_array[StateSE3Index.QUATERNION], rel_quaternions) + + # Prepare output array + abs_se3_array = se3_array.copy() + abs_se3_array[..., StateSE3Index.XYZ] = abs_positions + abs_se3_array[..., StateSE3Index.QUATERNION] = abs_quaternions + + return abs_se3_array + + +def convert_se3_array_between_origins( + from_origin: Union[StateSE3, npt.NDArray[np.float64]], + to_origin: Union[StateSE3, npt.NDArray[np.float64]], + se3_array: npt.NDArray[np.float64], +) -> npt.NDArray[np.float64]: + """Converts an SE3 array from one origin frame to another origin frame. + + :param from_origin: The source origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param to_origin: The target origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param se3_array: The SE3 array in the source origin frame. + :raises TypeError: If the origins are not StateSE3 or np.ndarray. + :return: The SE3 array in the target origin frame, indexed by :class:`~py123d.geometry.StateSE3Index`. + """ + # Parse from_origin & to_origin + R_from, t_from, from_origin_array = _extract_rotation_translation_pose_arrays(from_origin) + R_to, t_to, to_origin_array = _extract_rotation_translation_pose_arrays(to_origin) + + assert se3_array.ndim >= 1 + assert se3_array.shape[-1] == len(StateSE3Index) + + rel_positions = se3_array[..., StateSE3Index.XYZ] + rel_quaternions = se3_array[..., StateSE3Index.QUATERNION] + + # Compute relative transformation: T_to^-1 * T_from + R_rel = R_to.T @ R_from # Relative rotation matrix + t_rel = R_to.T @ (t_from - t_to) # Relative translation + + q_rel = multiply_quaternion_arrays( + conjugate_quaternion_array(to_origin_array[StateSE3Index.QUATERNION]), + from_origin_array[StateSE3Index.QUATERNION], + ) + + # Transform positions: rotate and translate + new_rel_positions = (R_rel @ rel_positions.T).T + t_rel + + # Transform orientations: quaternion multiplication + new_rel_quaternions = multiply_quaternion_arrays(q_rel, rel_quaternions) + + # Prepare output array + result_se3_array = np.zeros_like(se3_array) + result_se3_array[..., StateSE3Index.XYZ] = new_rel_positions + result_se3_array[..., StateSE3Index.QUATERNION] = new_rel_quaternions + + return result_se3_array + + +def convert_points_3d_array_between_origins( + from_origin: Union[StateSE3, npt.NDArray[np.float64]], + to_origin: Union[StateSE3, npt.NDArray[np.float64]], + points_3d_array: npt.NDArray[np.float64], +) -> npt.NDArray[np.float64]: + """Converts 3D points from one origin frame to another origin frame. + + :param from_origin: The source origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param to_origin: The target origin state in the absolute frame, as a StateSE3 or np.ndarray. + :param points_3d_array: The 3D points in the source origin frame. + :raises TypeError: If the origins are not StateSE3 or np.ndarray. + :return: The 3D points in the target origin frame, indexed by :class:`~py123d.geometry.Point3DIndex`. + """ + # Parse from_origin & to_origin + R_from, t_from, _ = _extract_rotation_translation_pose_arrays(from_origin) + R_to, t_to, _ = _extract_rotation_translation_pose_arrays(to_origin) + + assert points_3d_array.ndim >= 1 + assert points_3d_array.shape[-1] == len(Point3DIndex) + + R_rel = R_to.T @ R_from # Relative rotation matrix + t_rel = R_to.T @ (t_from - t_to) # Relative translation + + conv_points_3d_array = (R_rel @ points_3d_array.T).T + t_rel + return conv_points_3d_array + + +def translate_se3_along_z(state_se3: StateSE3, distance: float) -> StateSE3: + """Translates an SE3 state along the Z-axis. + + :param state_se3: The SE3 state to translate. + :param distance: The distance to translate along the Z-axis. + :return: The translated SE3 state. + """ + R = state_se3.rotation_matrix + z_axis = R[:, 2] + + state_se3_array = state_se3.array.copy() + state_se3_array[StateSE3Index.XYZ] += distance * z_axis[Vector3DIndex.XYZ] + return StateSE3.from_array(state_se3_array, copy=False) + + +def translate_se3_along_y(state_se3: StateSE3, distance: float) -> StateSE3: + """Translates a SE3 state along the Y-axis. + + :param state_se3: The SE3 state to translate. + :param distance: The distance to translate along the Y-axis. + :return: The translated SE3 state. + """ + R = state_se3.rotation_matrix + y_axis = R[:, 1] + + state_se3_array = state_se3.array.copy() + state_se3_array[StateSE3Index.XYZ] += distance * y_axis[Vector3DIndex.XYZ] + return StateSE3.from_array(state_se3_array, copy=False) + + +def translate_se3_along_x(state_se3: StateSE3, distance: float) -> StateSE3: + """Translates a SE3 state along the X-axis. + + :param state_se3: The SE3 state to translate. + :param distance: The distance to translate along the X-axis. + :return: The translated SE3 state. + """ + R = state_se3.rotation_matrix + x_axis = R[:, 0] + + state_se3_array = state_se3.array.copy() + state_se3_array[StateSE3Index.XYZ] += distance * x_axis[Vector3DIndex.XYZ] + return StateSE3.from_array(state_se3_array, copy=False) + + +def translate_se3_along_body_frame(state_se3: StateSE3, vector_3d: Vector3D) -> StateSE3: + """Translates a SE3 state along a vector in the body frame. + + :param state_se3: The SE3 state to translate. + :param vector_3d: The vector to translate along in the body frame. + :return: The translated SE3 state. + """ + R = state_se3.rotation_matrix + world_translation = R @ vector_3d.array + + state_se3_array = state_se3.array.copy() + state_se3_array[StateSE3Index.XYZ] += world_translation + return StateSE3.from_array(state_se3_array, copy=False) + + +def translate_3d_along_body_frame( + points_3d: npt.NDArray[np.float64], + quaternions: npt.NDArray[np.float64], + translation: npt.NDArray[np.float64], +) -> npt.NDArray[np.float64]: + """Translates 3D points along a vector in the body frame defined by quaternions. + + :param points_3d: Array of 3D points, index by :class:`~py123d.geometry.Point3DIndex`. + :param quaternions: Array of quaternions, index by :class:`~py123d.geometry.QuaternionIndex`. + :param translation: Array of translation vectors, index by :class:`~py123d.geometry.Vector3DIndex`. + :return: The translated 3D points in the world frame, index by :class:`~py123d.geometry.Point3DIndex`. + """ + assert points_3d.shape[-1] == len(Point3DIndex) + assert quaternions.shape[-1] == len(QuaternionIndex) + assert translation.shape[-1] == len(Vector3DIndex) + + R = get_rotation_matrices_from_quaternion_array(quaternions) + world_translation = np.einsum("...ij,...j->...i", R, translation) + return points_3d + world_translation diff --git a/d123/script/config/__init__.py b/src/py123d/geometry/utils/__init__.py similarity index 100% rename from d123/script/config/__init__.py rename to src/py123d/geometry/utils/__init__.py diff --git a/src/py123d/geometry/utils/bounding_box_utils.py b/src/py123d/geometry/utils/bounding_box_utils.py new file mode 100644 index 00000000..d31058bd --- /dev/null +++ b/src/py123d/geometry/utils/bounding_box_utils.py @@ -0,0 +1,216 @@ +from typing import Tuple + +import numpy as np +import numpy.typing as npt +import shapely + +from py123d.geometry.geometry_index import ( + BoundingBoxSE2Index, + BoundingBoxSE3Index, + Corners2DIndex, + Corners3DIndex, + Point2DIndex, + Point3DIndex, + Vector2DIndex, + Vector3DIndex, +) +from py123d.geometry.transform.transform_se2 import translate_2d_along_body_frame +from py123d.geometry.transform.transform_se3 import translate_3d_along_body_frame + + +def get_corners_2d_factors() -> npt.NDArray[np.float64]: + """Returns the factors to compute the corners of a SE2 bounding box in the body frame. + + The factors are defined such that multiplying them with the length and width + of the bounding box yields the corner coordinates in the body frame. + + :return: A (4, 2), indexed by :class:`~py123d.geometry.Corners2DIndex` and + :class:`~py123d.geometry.Point2DIndex`, respectively. + """ + # NOTE: ISO 8855 convention for rotation + factors = np.zeros((len(Corners2DIndex), len(Point2DIndex)), dtype=np.float64) + factors.fill(0.5) + factors[Corners2DIndex.FRONT_LEFT] *= [+1, +1] + factors[Corners2DIndex.FRONT_RIGHT] *= [+1, -1] + factors[Corners2DIndex.BACK_RIGHT] *= [-1, -1] + factors[Corners2DIndex.BACK_LEFT] *= [-1, +1] + return factors + + +def get_corners_3d_factors() -> npt.NDArray[np.float64]: + """Returns the factors to compute the corners of a SE3 bounding box in the body frame. + + The factors are defined such that multiplying them with the length, width, and height + of the bounding box yields the corner coordinates in the body frame. + + :return: A (8, 3), indexed by :class:`~py123d.geometry.Corners3DIndex` and + :class:`~py123d.geometry.Vector3DIndex`, respectively. + """ + # NOTE: ISO 8855 convention for rotation + factors = np.zeros((len(Corners3DIndex), len(Vector3DIndex)), dtype=np.float64) + factors.fill(0.5) + factors[Corners3DIndex.FRONT_LEFT_BOTTOM] *= [+1, +1, -1] + factors[Corners3DIndex.FRONT_RIGHT_BOTTOM] *= [+1, -1, -1] + factors[Corners3DIndex.BACK_RIGHT_BOTTOM] *= [-1, -1, -1] + factors[Corners3DIndex.BACK_LEFT_BOTTOM] *= [-1, +1, -1] + factors[Corners3DIndex.FRONT_LEFT_TOP] *= [+1, +1, +1] + factors[Corners3DIndex.FRONT_RIGHT_TOP] *= [+1, -1, +1] + factors[Corners3DIndex.BACK_RIGHT_TOP] *= [-1, -1, +1] + factors[Corners3DIndex.BACK_LEFT_TOP] *= [-1, +1, +1] + return factors + + +def bbse2_array_to_corners_array(bbse2: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Converts an array of BoundingBoxSE2 objects to the 2D coordinates array of their corners. + + :param bbse2: Array of SE2 bounding boxes, indexed by :class:`~py123d.geometry.BoundingBoxSE2Index`. + :return: Coordinates array of shape (..., 4, 2), indexed by + :class:`~py123d.geometry.Corners2DIndex` and :class:`~py123d.geometry.Point2DIndex`, respectively. + """ + assert bbse2.shape[-1] == len(BoundingBoxSE2Index) + + ndim_one: bool = bbse2.ndim == 1 + if ndim_one: + bbse2 = bbse2[None, ...] + + centers = bbse2[..., BoundingBoxSE2Index.XY] + yaws = bbse2[..., BoundingBoxSE2Index.YAW] + extents = bbse2[..., BoundingBoxSE2Index.EXTENT] # (..., 2) + + factors = get_corners_2d_factors() # (4, 2) + corner_translation_body = extents[..., None, :] * factors[None, :, :] # (..., 4, 2) + + corners_array = translate_2d_along_body_frame( # (..., 4, 2) + points_2d=centers[..., None, :], # (..., 1, 2) + yaws=yaws[..., None], # (..., 1) + x_translate=corner_translation_body[..., Vector2DIndex.X], + y_translate=corner_translation_body[..., Vector2DIndex.Y], + ) # (..., 4, 2) + + return corners_array.squeeze(axis=0) if ndim_one else corners_array + + +def corners_2d_array_to_polygon_array(corners_array: npt.NDArray[np.float64]) -> npt.NDArray[np.object_]: + """Converts an array of 2D corners to an array of shapely Polygons. + TODO: Consider removing this function? + + :param corners_array: Array of shape (..., 4, 2) where 4 is the number of corners. + :return: Array of shapely Polygons. + """ + polygons = shapely.creation.polygons(corners_array) + return polygons + + +def bbse2_array_to_polygon_array(bbse2: npt.NDArray[np.float64]) -> npt.NDArray[np.object_]: + """Converts an array of BoundingBoxSE2 objects to an array of shapely Polygons. + + :param bbse2: Array of SE2 bounding boxes, indexed by :class:`~py123d.geometry.BoundingBoxSE2Index`. + :return: Array of shapely Polygons. + """ + return corners_2d_array_to_polygon_array(bbse2_array_to_corners_array(bbse2)) + + +def bbse3_array_to_corners_array(bbse3_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Converts an array of BoundingBoxSE3 objects to the 3D coordinates array of their corners. + + :param bbse3_array: Array of SE3 bounding boxes, indexed by :class:`~py123d.geometry.BoundingBoxSE3Index`. + :return: Coordinates array of shape (..., 8, 3), indexed by + :class:`~py123d.geometry.Corners3DIndex` and :class:`~py123d.geometry.Point3DIndex`, respectively. + """ + assert bbse3_array.shape[-1] == len(BoundingBoxSE3Index) + + # Flag whether to unsqueeze and squeeze the input dim + ndim_one: bool = bbse3_array.ndim == 1 + if ndim_one: + bbse3_array = bbse3_array[None, ...] + + # Extract parameters + centers = bbse3_array[..., BoundingBoxSE3Index.XYZ] # (..., 3) + quaternions = bbse3_array[..., BoundingBoxSE3Index.QUATERNION] # (..., 4) + + # Box extents + factors = get_corners_3d_factors() # (8, 3) + extents = bbse3_array[..., BoundingBoxSE3Index.EXTENT] # (..., 3) + corner_translation_body = extents[..., None, :] * factors[None, :, :] # (..., 8, 3) + corners_world = translate_3d_along_body_frame( + centers[..., None, :], # (..., 1, 3) + quaternions[..., None, :], # (..., 1, 4) + corner_translation_body, + ) + + return corners_world.squeeze(axis=0) if ndim_one else corners_world + + +def corners_array_to_3d_mesh( + corners_array: npt.NDArray[np.float64], +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.int32]]: + + num_boxes = corners_array.shape[0] + vertices = corners_array.reshape(-1, 3) + + # Define the faces for a single box using Corners3DIndex + faces_single = np.array( + [ + # Bottom face + [Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.BACK_RIGHT_BOTTOM], + [Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.BACK_LEFT_BOTTOM], + # Top face + [Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.FRONT_LEFT_TOP], + [Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.FRONT_LEFT_TOP], + # Left face + [Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.BACK_LEFT_TOP], + [Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.FRONT_LEFT_TOP], + # Right face + [Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.FRONT_RIGHT_BOTTOM], + [Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.FRONT_RIGHT_BOTTOM], + # Front face + [Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.FRONT_LEFT_BOTTOM], + [Corners3DIndex.FRONT_LEFT_TOP, Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.FRONT_LEFT_BOTTOM], + # Back face + [Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.BACK_RIGHT_BOTTOM], + [Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.BACK_RIGHT_BOTTOM], + ], + dtype=np.int32, + ) + + # Offset the faces for each box + faces = np.vstack([faces_single + i * len(Corners3DIndex) for i in range(num_boxes)]) + + return vertices, faces + + +def corners_array_to_edge_lines(corners_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert corners_array.shape[-1] == len(Point3DIndex) + assert corners_array.shape[-2] == len(Corners3DIndex) + assert corners_array.ndim >= 2 + + index_pairs = [ + (Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.FRONT_RIGHT_BOTTOM), + (Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.BACK_RIGHT_BOTTOM), + (Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.BACK_LEFT_BOTTOM), + (Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.FRONT_LEFT_BOTTOM), + (Corners3DIndex.FRONT_LEFT_TOP, Corners3DIndex.FRONT_RIGHT_TOP), + (Corners3DIndex.FRONT_RIGHT_TOP, Corners3DIndex.BACK_RIGHT_TOP), + (Corners3DIndex.BACK_RIGHT_TOP, Corners3DIndex.BACK_LEFT_TOP), + (Corners3DIndex.BACK_LEFT_TOP, Corners3DIndex.FRONT_LEFT_TOP), + (Corners3DIndex.FRONT_LEFT_BOTTOM, Corners3DIndex.FRONT_LEFT_TOP), + (Corners3DIndex.FRONT_RIGHT_BOTTOM, Corners3DIndex.FRONT_RIGHT_TOP), + (Corners3DIndex.BACK_RIGHT_BOTTOM, Corners3DIndex.BACK_RIGHT_TOP), + (Corners3DIndex.BACK_LEFT_BOTTOM, Corners3DIndex.BACK_LEFT_TOP), + ] + + if corners_array.ndim == 2: + # Single box case: (8, 3) + edge_lines = np.zeros((len(index_pairs), 2, len(Point3DIndex)), dtype=np.float64) + for edge_idx, (start_idx, end_idx) in enumerate(index_pairs): + edge_lines[edge_idx, 0] = corners_array[start_idx] + edge_lines[edge_idx, 1] = corners_array[end_idx] + else: + # Batched case: (..., 8, 3) + batch_shape = corners_array.shape[:-2] + edge_lines = np.zeros(batch_shape + (len(index_pairs), 2, len(Point3DIndex)), dtype=np.float64) + for edge_idx, (start_idx, end_idx) in enumerate(index_pairs): + edge_lines[..., edge_idx, 0, :] = corners_array[..., start_idx, :] + edge_lines[..., edge_idx, 1, :] = corners_array[..., end_idx, :] + + return edge_lines diff --git a/d123/common/geometry/constants.py b/src/py123d/geometry/utils/constants.py similarity index 100% rename from d123/common/geometry/constants.py rename to src/py123d/geometry/utils/constants.py diff --git a/d123/common/geometry/line/helper.py b/src/py123d/geometry/utils/polyline_utils.py similarity index 67% rename from d123/common/geometry/line/helper.py rename to src/py123d/geometry/utils/polyline_utils.py index 99036004..d66628e8 100644 --- a/d123/common/geometry/line/helper.py +++ b/src/py123d/geometry/utils/polyline_utils.py @@ -2,7 +2,8 @@ import numpy.typing as npt from shapely.geometry import LineString -from d123.common.geometry.base import Point2DIndex, StateSE2Index +from py123d.geometry.geometry_index import Point2DIndex, StateSE2Index +from py123d.geometry.transform.transform_se2 import translate_2d_along_body_frame def get_linestring_yaws(linestring: LineString) -> npt.NDArray[np.float64]: @@ -41,3 +42,24 @@ def get_path_progress(points_array: npt.NDArray[np.float64]) -> list[float]: points_diff: npt.NDArray[np.float64] = np.concatenate(([x_diff], [y_diff]), axis=0, dtype=np.float64) progress_diff = np.append(0.0, np.linalg.norm(points_diff, axis=0)) return np.cumsum(progress_diff, dtype=np.float64) # type: ignore + + +def offset_points_perpendicular(points_array: npt.NDArray[np.float64], offset: float) -> npt.NDArray[np.float64]: + if points_array.shape[-1] == len(Point2DIndex): + xy = points_array[..., Point2DIndex.XY] + yaws = get_points_2d_yaws(points_array[..., Point2DIndex.XY]) + elif points_array.shape[-1] == len(StateSE2Index): + xy = points_array[..., StateSE2Index.XY] + yaws = points_array[..., StateSE2Index.YAW] + else: + raise ValueError( + f"Invalid points_array shape: {points_array.shape}. Expected last dimension to be {len(Point2DIndex)} or " + f"{len(StateSE2Index)}." + ) + + return translate_2d_along_body_frame( + points_2d=xy, + yaws=yaws, + y_translate=offset, + x_translate=0.0, + ) diff --git a/src/py123d/geometry/utils/rotation_utils.py b/src/py123d/geometry/utils/rotation_utils.py new file mode 100644 index 00000000..2429ffda --- /dev/null +++ b/src/py123d/geometry/utils/rotation_utils.py @@ -0,0 +1,470 @@ +from typing import Union + +import numpy as np +import numpy.typing as npt + +from py123d.geometry.geometry_index import EulerAnglesIndex, QuaternionIndex + +# import pyquaternion + + +def batch_matmul(A: npt.NDArray[np.float64], B: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Batch matrix multiplication for arrays of matrices. + # TODO: move somewhere else + + :param A: Array of shape (..., M, N) + :param B: Array of shape (..., N, P) + :return: Array of shape (..., M, P) resulting from batch matrix multiplication of A and B. + """ + assert A.ndim >= 2 and B.ndim >= 2 + assert ( + A.shape[-1] == B.shape[-2] + ), f"Inner dimensions must match for matrix multiplication, got {A.shape} and {B.shape}" + return np.einsum("...ij,...jk->...ik", A, B) + + +def normalize_angle(angle: Union[float, npt.NDArray[np.float64]]) -> Union[float, npt.NDArray[np.float64]]: + """ + Map a angle in range [-π, π] + :param angle: any angle as float or array of floats + :return: normalized angle or array of normalized angles + """ + return ((angle + np.pi) % (2 * np.pi)) - np.pi + + +def get_rotation_matrices_from_euler_array(euler_angles_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """ + Convert Euler angles to rotation matrices using Tait-Bryan ZYX convention (yaw-pitch-roll). + + Convention: Intrinsic rotations in order Z-Y-X (yaw, pitch, roll) + Equivalent to: R = R_z(yaw) @ R_y(pitch) @ R_x(roll) + """ + assert euler_angles_array.ndim >= 1 and euler_angles_array.shape[-1] == len(EulerAnglesIndex) + + # Store original shape for reshaping later + original_shape = euler_angles_array.shape[:-1] + + # Flatten to 2D if needed + if euler_angles_array.ndim > 2: + euler_angles_array_ = euler_angles_array.reshape(-1, len(EulerAnglesIndex)) + else: + euler_angles_array_ = euler_angles_array + + # Extract roll, pitch, yaw for all samples at once + roll = euler_angles_array_[:, EulerAnglesIndex.ROLL] + pitch = euler_angles_array_[:, EulerAnglesIndex.PITCH] + yaw = euler_angles_array_[:, EulerAnglesIndex.YAW] + + # Compute sin/cos for all angles at once + # NOTE: (c/s = cos/sin, r/p/y = roll/pitch/yaw) + cr, sr = np.cos(roll), np.sin(roll) + cp, sp = np.cos(pitch), np.sin(pitch) + cy, sy = np.cos(yaw), np.sin(yaw) + + # Build rotation matrices for entire batch + batch_size = euler_angles_array_.shape[0] + rotation_matrices = np.zeros((batch_size, 3, 3), dtype=np.float64) + + # Formula for ZYX Tait-Bryan rotation matrix: + # R = | cy*cp cy*sp*sr - sy*cr cy*sp*cr + sy*sr | + # | sy*cp sy*sp*sr + cy*cr sy*sp*cr - cy*sr | + # | -sp cp*sr cp*cr | + + # ZYX Tait-Bryan rotation matrix elements + rotation_matrices[:, 0, 0] = cy * cp + rotation_matrices[:, 1, 0] = sy * cp + rotation_matrices[:, 2, 0] = -sp + + rotation_matrices[:, 0, 1] = cy * sp * sr - sy * cr + rotation_matrices[:, 1, 1] = sy * sp * sr + cy * cr + rotation_matrices[:, 2, 1] = cp * sr + + rotation_matrices[:, 0, 2] = cy * sp * cr + sy * sr + rotation_matrices[:, 1, 2] = sy * sp * cr - cy * sr + rotation_matrices[:, 2, 2] = cp * cr + + # Reshape back to original batch dimensions + (3, 3) + if len(original_shape) > 1: + rotation_matrices = rotation_matrices.reshape(original_shape + (3, 3)) + + return rotation_matrices + + +def get_euler_array_from_rotation_matrices(rotation_matrices: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Convert rotation matrices to Euler angles using Tait-Bryan ZYX convention (yaw-pitch-roll). + + :param rotation_matrices: Rotation matrices of shape (..., 3, 3) + :return: Euler angles of shape (..., 3), indexed by EulerAnglesIndex + """ + assert rotation_matrices.ndim >= 2 and rotation_matrices.shape[-2:] == (3, 3) + + original_shape = rotation_matrices.shape[:-2] + + # Flatten to 3D if needed, i.e. (N, 3, 3) + if rotation_matrices.ndim > 3: + R = rotation_matrices.reshape(-1, 3, 3) + else: + R = rotation_matrices + + batch_size = R.shape[0] + euler_angles = np.zeros((batch_size, len(EulerAnglesIndex)), dtype=np.float64) + + # Calculate yaw (rotation around Z-axis) + euler_angles[:, EulerAnglesIndex.YAW] = np.arctan2(R[:, 1, 0], R[:, 0, 0]) + + # Calculate pitch (rotation around Y-axis) + # NOTE: Clip to avoid numerical issues with arcsin + sin_pitch = np.clip(-R[:, 2, 0], -1.0, 1.0) + euler_angles[:, EulerAnglesIndex.PITCH] = np.arcsin(sin_pitch) + + # Calculate roll (rotation around X-axis) + euler_angles[:, EulerAnglesIndex.ROLL] = np.arctan2(R[:, 2, 1], R[:, 2, 2]) + + # Reshape back to original batch dimensions + (3,) + if len(original_shape) > 1: + euler_angles = euler_angles.reshape(original_shape + (len(EulerAnglesIndex),)) + + return euler_angles + + +def get_euler_array_from_rotation_matrix(rotation_matrix: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert rotation_matrix.ndim == 2 and rotation_matrix.shape == (3, 3) + return get_euler_array_from_rotation_matrices(rotation_matrix[None, ...])[0] + + +def get_quaternion_array_from_rotation_matrices(rotation_matrices: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert rotation_matrices.ndim >= 2 + assert rotation_matrices.shape[-1] == rotation_matrices.shape[-2] == 3 + # http://www.euclideanspace.com/maths/geometry/rotations/conversions/matrixToQuaternion/ + + # TODO: Update with: + # https://d3cw3dd2w32x2b.cloudfront.net/wp-content/uploads/2015/01/matrix-to-quat.pdf + + original_shape = rotation_matrices.shape[:-2] + + # Extract rotation matrix elements for vectorized operations + if rotation_matrices.ndim > 3: + R = rotation_matrices.reshape(-1, 3, 3) + else: + R = rotation_matrices + + N = R.shape[0] + quaternions = np.zeros((N, 4), dtype=np.float64) + + # Compute trace for each matrix + trace = np.trace(R, axis1=1, axis2=2) + + # Case 1: trace > 0 (most common case) + mask1 = trace > 0 + s1 = np.sqrt(trace[mask1] + 1.0) * 2 # s = 4 * qw + quaternions[mask1, QuaternionIndex.QW] = 0.25 * s1 + quaternions[mask1, QuaternionIndex.QX] = (R[mask1, 2, 1] - R[mask1, 1, 2]) / s1 + quaternions[mask1, QuaternionIndex.QY] = (R[mask1, 0, 2] - R[mask1, 2, 0]) / s1 + quaternions[mask1, QuaternionIndex.QZ] = (R[mask1, 1, 0] - R[mask1, 0, 1]) / s1 + + # Case 2: R[0,0] > R[1,1] and R[0,0] > R[2,2] + mask2 = (~mask1) & (R[:, 0, 0] > R[:, 1, 1]) & (R[:, 0, 0] > R[:, 2, 2]) + s2 = np.sqrt(1.0 + R[mask2, 0, 0] - R[mask2, 1, 1] - R[mask2, 2, 2]) * 2 # s = 4 * qx + quaternions[mask2, QuaternionIndex.QW] = (R[mask2, 2, 1] - R[mask2, 1, 2]) / s2 + quaternions[mask2, QuaternionIndex.QX] = 0.25 * s2 # x + quaternions[mask2, QuaternionIndex.QY] = (R[mask2, 0, 1] + R[mask2, 1, 0]) / s2 + quaternions[mask2, QuaternionIndex.QZ] = (R[mask2, 0, 2] + R[mask2, 2, 0]) / s2 + + # Case 3: R[1,1] > R[2,2] + mask3 = (~mask1) & (~mask2) & (R[:, 1, 1] > R[:, 2, 2]) + s3 = np.sqrt(1.0 + R[mask3, 1, 1] - R[mask3, 0, 0] - R[mask3, 2, 2]) * 2 # s = 4 * qy + quaternions[mask3, QuaternionIndex.QW] = (R[mask3, 0, 2] - R[mask3, 2, 0]) / s3 + quaternions[mask3, QuaternionIndex.QX] = (R[mask3, 0, 1] + R[mask3, 1, 0]) / s3 + quaternions[mask3, QuaternionIndex.QY] = 0.25 * s3 # y + quaternions[mask3, QuaternionIndex.QZ] = (R[mask3, 1, 2] + R[mask3, 2, 1]) / s3 + + # Case 4: R[2,2] is largest + mask4 = (~mask1) & (~mask2) & (~mask3) + s4 = np.sqrt(1.0 + R[mask4, 2, 2] - R[mask4, 0, 0] - R[mask4, 1, 1]) * 2 # s = 4 * qz + quaternions[mask4, QuaternionIndex.QW] = (R[mask4, 1, 0] - R[mask4, 0, 1]) / s4 + quaternions[mask4, QuaternionIndex.QX] = (R[mask4, 0, 2] + R[mask4, 2, 0]) / s4 + quaternions[mask4, QuaternionIndex.QY] = (R[mask4, 1, 2] + R[mask4, 2, 1]) / s4 + quaternions[mask4, QuaternionIndex.QZ] = 0.25 * s4 # z + + assert np.all(mask1 | mask2 | mask3 | mask4), "All matrices should fall into one of the four cases." + + quaternions = normalize_quaternion_array(quaternions) + + # Reshape back to original batch dimensions + (4,) + if len(original_shape) > 1: + quaternions = quaternions.reshape(original_shape + (len(QuaternionIndex),)) + + return quaternions + + +def get_quaternion_array_from_rotation_matrix(rotation_matrix: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert rotation_matrix.ndim == 2 and rotation_matrix.shape == (3, 3) + return get_quaternion_array_from_rotation_matrices(rotation_matrix[None, ...])[0] + + +def get_quaternion_array_from_euler_array(euler_angles: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert euler_angles.ndim >= 1 and euler_angles.shape[-1] == len(EulerAnglesIndex) + + # Store original shape for reshaping later + original_shape = euler_angles.shape[:-1] + + # Flatten to 2D if needed + if euler_angles.ndim > 2: + euler_angles_ = euler_angles.reshape(-1, len(EulerAnglesIndex)) + else: + euler_angles_ = euler_angles + + # Extract roll, pitch, yaw + roll = euler_angles_[..., EulerAnglesIndex.ROLL] + pitch = euler_angles_[..., EulerAnglesIndex.PITCH] + yaw = euler_angles_[..., EulerAnglesIndex.YAW] + + # Half angles + roll_half = roll / 2.0 + pitch_half = pitch / 2.0 + yaw_half = yaw / 2.0 + + # Compute sin/cos for half angles + cos_roll_half = np.cos(roll_half) + sin_roll_half = np.sin(roll_half) + cos_pitch_half = np.cos(pitch_half) + sin_pitch_half = np.sin(pitch_half) + cos_yaw_half = np.cos(yaw_half) + sin_yaw_half = np.sin(yaw_half) + + # Compute quaternion components (ZYX intrinsic rotation order) + qw = cos_roll_half * cos_pitch_half * cos_yaw_half + sin_roll_half * sin_pitch_half * sin_yaw_half + qx = sin_roll_half * cos_pitch_half * cos_yaw_half - cos_roll_half * sin_pitch_half * sin_yaw_half + qy = cos_roll_half * sin_pitch_half * cos_yaw_half + sin_roll_half * cos_pitch_half * sin_yaw_half + qz = cos_roll_half * cos_pitch_half * sin_yaw_half - sin_roll_half * sin_pitch_half * cos_yaw_half + + # Stack into quaternion array + quaternions = np.stack([qw, qx, qy, qz], axis=-1) + + # Reshape back to original batch dimensions + (4,) + if len(original_shape) > 1: + quaternions = quaternions.reshape(original_shape + (len(QuaternionIndex),)) + + return normalize_quaternion_array(quaternions) + + +def get_rotation_matrix_from_euler_array(euler_angles: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert euler_angles.ndim == 1 and euler_angles.shape[0] == len(EulerAnglesIndex) + return get_rotation_matrices_from_euler_array(euler_angles[None, ...])[0] + + +def get_rotation_matrices_from_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert quaternion_array.ndim >= 1 and quaternion_array.shape[-1] == len(QuaternionIndex) + + # Store original shape for reshaping later + original_shape = quaternion_array.shape[:-1] + + # Flatten to 2D if needed + if quaternion_array.ndim > 2: + quaternion_array_ = quaternion_array.reshape(-1, len(QuaternionIndex)) + else: + quaternion_array_ = quaternion_array + + norm_quaternion = normalize_quaternion_array(quaternion_array_) + Q_matrices = get_q_matrices(norm_quaternion) + Q_bar_matrices = get_q_bar_matrices(norm_quaternion) + rotation_matrix = batch_matmul(Q_matrices, Q_bar_matrices.conj().swapaxes(-1, -2)) + rotation_matrix = rotation_matrix[:, 1:][:, :, 1:] + + # Reshape back to original batch dimensions + (3, 3) + if len(original_shape) > 1: + rotation_matrix = rotation_matrix.reshape(original_shape + (3, 3)) + + return rotation_matrix + + +def get_rotation_matrix_from_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + # TODO: Check if this function is necessary or batch-wise function is universally applicable + assert quaternion_array.ndim == 1 and quaternion_array.shape[0] == len(QuaternionIndex) + return get_rotation_matrices_from_quaternion_array(quaternion_array[None, :])[0] + + +def get_euler_array_from_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + assert quaternion_array.ndim >= 1 and quaternion_array.shape[-1] == len(QuaternionIndex) + norm_quaternion = normalize_quaternion_array(quaternion_array) + QW, QX, QY, QZ = ( + norm_quaternion[..., QuaternionIndex.QW], + norm_quaternion[..., QuaternionIndex.QX], + norm_quaternion[..., QuaternionIndex.QY], + norm_quaternion[..., QuaternionIndex.QZ], + ) + + euler_angles = np.zeros_like(quaternion_array[..., :3]) + euler_angles[..., EulerAnglesIndex.YAW] = np.arctan2( + 2 * (QW * QZ - QX * QY), + 1 - 2 * (QY**2 + QZ**2), + ) + euler_angles[..., EulerAnglesIndex.PITCH] = np.arcsin( + np.clip(2 * (QW * QY + QZ * QX), -1.0, 1.0), + ) + euler_angles[..., EulerAnglesIndex.ROLL] = np.arctan2( + 2 * (QW * QX - QY * QZ), + 1 - 2 * (QX**2 + QY**2), + ) + + return euler_angles + + +def conjugate_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Computes the conjugate of an array of quaternions. + in the order [qw, qx, qy, qz]. + :param quaternion: Array of quaternions. + :return: Array of conjugated quaternions. + """ + assert quaternion_array.ndim >= 1 + assert quaternion_array.shape[-1] == len(QuaternionIndex) + conjugated_quaternions = np.zeros_like(quaternion_array) + conjugated_quaternions[..., QuaternionIndex.SCALAR] = quaternion_array[..., QuaternionIndex.SCALAR] + conjugated_quaternions[..., QuaternionIndex.VECTOR] = -quaternion_array[..., QuaternionIndex.VECTOR] + return conjugated_quaternions + + +def invert_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Computes the inverse of an array of quaternions. + in the order [qw, qx, qy, qz]. + :param quaternion: Array of quaternions. + :return: Array of inverted quaternions. + """ + assert quaternion_array.ndim >= 1 + assert quaternion_array.shape[-1] == len(QuaternionIndex) + norm_squared = np.sum(quaternion_array**2, axis=-1, keepdims=True) + assert np.all(norm_squared > 0), "Cannot invert a quaternion with zero norm." + conjugated_quaternions = conjugate_quaternion_array(quaternion_array) + inverted_quaternions = conjugated_quaternions / norm_squared + return inverted_quaternions + + +def normalize_quaternion_array(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Normalizes an array of quaternions. + in the order [qw, qx, qy, qz]. + :param quaternion: Array of quaternions. + :return: Array of normalized quaternions. + """ + assert quaternion_array.ndim >= 1 + assert quaternion_array.shape[-1] == len(QuaternionIndex) + norm = np.linalg.norm(quaternion_array, axis=-1, keepdims=True) + assert np.all(norm > 0), "Cannot normalize a quaternion with zero norm." + normalized_quaternions = quaternion_array / norm + return normalized_quaternions + + +def multiply_quaternion_arrays(q1: npt.NDArray[np.float64], q2: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Multiplies two arrays of quaternions element-wise. + in the order [qw, qx, qy, qz]. + :param q1: First array of quaternions. + :param q2: Second array of quaternions. + :return: Array of resulting quaternions after multiplication. + """ + assert q1.ndim >= 1 + assert q2.ndim >= 1 + assert q1.shape[-1] == q2.shape[-1] == len(QuaternionIndex) + + # Vectorized quaternion multiplication + qw1, qx1, qy1, qz1 = ( + q1[..., QuaternionIndex.QW], + q1[..., QuaternionIndex.QX], + q1[..., QuaternionIndex.QY], + q1[..., QuaternionIndex.QZ], + ) + qw2, qx2, qy2, qz2 = ( + q2[..., QuaternionIndex.QW], + q2[..., QuaternionIndex.QX], + q2[..., QuaternionIndex.QY], + q2[..., QuaternionIndex.QZ], + ) + + quaternions = np.stack( + [ + qw1 * qw2 - qx1 * qx2 - qy1 * qy2 - qz1 * qz2, + qw1 * qx2 + qx1 * qw2 + qy1 * qz2 - qz1 * qy2, + qw1 * qy2 - qx1 * qz2 + qy1 * qw2 + qz1 * qx2, + qw1 * qz2 + qx1 * qy2 - qy1 * qx2 + qz1 * qw2, + ], + axis=-1, + ) + return quaternions + + +def get_q_matrices(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Computes the Q matrices for an array of quaternions. + in the order [qw, qx, qy, qz]. + :param quaternion: Array of quaternions. + :return: Array of Q matrices. + """ + assert quaternion_array.ndim >= 1 + assert quaternion_array.shape[-1] == len(QuaternionIndex) + + qw = quaternion_array[..., QuaternionIndex.QW] + qx = quaternion_array[..., QuaternionIndex.QX] + qy = quaternion_array[..., QuaternionIndex.QY] + qz = quaternion_array[..., QuaternionIndex.QZ] + + batch_shape = quaternion_array.shape[:-1] + Q_matrices = np.zeros(batch_shape + (4, 4), dtype=np.float64) + + Q_matrices[..., 0, 0] = qw + Q_matrices[..., 0, 1] = -qx + Q_matrices[..., 0, 2] = -qy + Q_matrices[..., 0, 3] = -qz + + Q_matrices[..., 1, 0] = qx + Q_matrices[..., 1, 1] = qw + Q_matrices[..., 1, 2] = -qz + Q_matrices[..., 1, 3] = qy + + Q_matrices[..., 2, 0] = qy + Q_matrices[..., 2, 1] = qz + Q_matrices[..., 2, 2] = qw + Q_matrices[..., 2, 3] = -qx + + Q_matrices[..., 3, 0] = qz + Q_matrices[..., 3, 1] = -qy + Q_matrices[..., 3, 2] = qx + Q_matrices[..., 3, 3] = qw + + return Q_matrices + + +def get_q_bar_matrices(quaternion_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Computes the Q-bar matrices for an array of quaternions. + in the order [qw, qx, qy, qz]. + :param quaternion: Array of quaternions. + :return: Array of Q-bar matrices. + """ + assert quaternion_array.ndim >= 1 + assert quaternion_array.shape[-1] == len(QuaternionIndex) + + qw = quaternion_array[..., QuaternionIndex.QW] + qx = quaternion_array[..., QuaternionIndex.QX] + qy = quaternion_array[..., QuaternionIndex.QY] + qz = quaternion_array[..., QuaternionIndex.QZ] + + batch_shape = quaternion_array.shape[:-1] + Q_bar_matrices = np.zeros(batch_shape + (4, 4), dtype=np.float64) + + Q_bar_matrices[..., 0, 0] = qw + Q_bar_matrices[..., 0, 1] = -qx + Q_bar_matrices[..., 0, 2] = -qy + Q_bar_matrices[..., 0, 3] = -qz + + Q_bar_matrices[..., 1, 0] = qx + Q_bar_matrices[..., 1, 1] = qw + Q_bar_matrices[..., 1, 2] = qz + Q_bar_matrices[..., 1, 3] = -qy + + Q_bar_matrices[..., 2, 0] = qy + Q_bar_matrices[..., 2, 1] = -qz + Q_bar_matrices[..., 2, 2] = qw + Q_bar_matrices[..., 2, 3] = qx + + Q_bar_matrices[..., 3, 0] = qz + Q_bar_matrices[..., 3, 1] = qy + Q_bar_matrices[..., 3, 2] = -qx + Q_bar_matrices[..., 3, 3] = qw + + return Q_bar_matrices diff --git a/d123/common/geometry/units.py b/src/py123d/geometry/utils/units.py similarity index 100% rename from d123/common/geometry/units.py rename to src/py123d/geometry/utils/units.py diff --git a/src/py123d/geometry/vector.py b/src/py123d/geometry/vector.py new file mode 100644 index 00000000..f2846c1f --- /dev/null +++ b/src/py123d/geometry/vector.py @@ -0,0 +1,267 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Iterable + +import numpy as np +import numpy.typing as npt + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry.geometry_index import Vector2DIndex, Vector3DIndex + + +class Vector2D(ArrayMixin): + """ + Class to represents 2D vectors, in x, y direction. + + Example: + >>> v1 = Vector2D(3.0, 4.0) + >>> v2 = Vector2D(1.0, 2.0) + >>> v3 = v1 + v2 + >>> v3 + Vector2D(4.0, 6.0) + >>> v1.array + array([3., 4.]) + >>> v1.magnitude + 5.0 + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float): + """Initialize Vector2D with x, y components.""" + array = np.zeros(len(Vector2DIndex), dtype=np.float64) + array[Vector2DIndex.X] = x + array[Vector2DIndex.Y] = y + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> Vector2D: + """Constructs a Vector2D from a numpy array. + + :param array: Array of shape (2,) representing the vector components [x, y], indexed by \ + :class:`~py123d.geometry.Vector2DIndex`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A Vector2D instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(Vector2DIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def x(self) -> float: + """The x component of the vector. + + :return: The x component of the vector. + """ + return self._array[Vector2DIndex.X] + + @property + def y(self) -> float: + """The y component of the vector. + + :return: The y component of the vector. + """ + return self._array[Vector2DIndex.Y] + + @property + def array(self) -> npt.NDArray[np.float64]: + """The array representation of the 2D vector. + + :return: A numpy array of shape (2,) containing the vector components [x, y], indexed by \ + :class:`~py123d.geometry.Vector2DIndex`. + """ + array = np.zeros(len(Vector2DIndex), dtype=np.float64) + array[Vector2DIndex.X] = self.x + array[Vector2DIndex.Y] = self.y + return array + + @property + def magnitude(self) -> float: + """Calculates the magnitude (length) of the 2D vector. + + :return: The magnitude of the vector. + """ + return float(np.linalg.norm(self.array)) + + @property + def vector_2d(self) -> Vector2D: + """The 2D vector itself. Handy for polymorphism. + + :return: A Vector2D instance representing the 2D vector. + """ + return self + + def __add__(self, other: Vector2D) -> Vector2D: + """Adds two 2D vectors. + + :param other: The other vector to add. + :return: A new Vector2D instance representing the sum. + """ + return Vector2D(self.x + other.x, self.y + other.y) + + def __sub__(self, other: Vector2D) -> Vector2D: + """Subtracts two 2D vectors. + + :param other: The other vector to subtract. + :return: A new Vector2D instance representing the difference. + """ + return Vector2D(self.x - other.x, self.y - other.y) + + def __mul__(self, scalar: float) -> Vector2D: + """Multiplies the 2D vector by a scalar. + + :param scalar: The scalar value to multiply with. + :return: A new Vector2D instance representing the scaled vector. + """ + return Vector2D(self.x * scalar, self.y * scalar) + + def __truediv__(self, scalar: float) -> Vector2D: + """Divides the 2D vector by a scalar. + + :param scalar: The scalar value to divide by. + :return: A new Vector2D instance representing the divided vector. + """ + return Vector2D(self.x / scalar, self.y / scalar) + + def __iter__(self) -> Iterable[float]: + """Iterator over vector components.""" + return iter((self.x, self.y)) + + def __hash__(self) -> int: + """Hash method""" + return hash((self.x, self.y)) + + +@dataclass +class Vector3D(ArrayMixin): + """ + Class to represents 3D vectors, in x, y, z direction. + + Example: + >>> v1 = Vector3D(1.0, 2.0, 3.0) + >>> v2 = Vector3D(4.0, 5.0, 6.0) + >>> v3 = v1 + v2 + >>> v3 + Vector3D(5.0, 7.0, 9.0) + >>> v1.array + array([1., 2., 3.]) + >>> v1.magnitude + 3.7416573867739413 + """ + + _array: npt.NDArray[np.float64] + + def __init__(self, x: float, y: float, z: float): + """Initialize Vector3D with x, y, z components.""" + array = np.zeros(len(Vector3DIndex), dtype=np.float64) + array[Vector3DIndex.X] = x + array[Vector3DIndex.Y] = y + array[Vector3DIndex.Z] = z + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> Vector3D: + """Constructs a Vector3D from a numpy array. + + :param array: Array of shape (3,), indexed by :class:`~py123d.geometry.geometry_index.Vector3DIndex`. + :param copy: Whether to copy the input array. Defaults to True. + :return: A Vector3D instance. + """ + assert array.ndim == 1 + assert array.shape[0] == len(Vector3DIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def x(self) -> float: + """The x component of the vector. + + :return: The x component of the vector. + """ + return self._array[Vector3DIndex.X] + + @property + def y(self) -> float: + """The y component of the vector. + + :return: The y component of the vector. + """ + return self._array[Vector3DIndex.Y] + + @property + def z(self) -> float: + """The z component of the vector. + + :return: The z component of the vector. + """ + return self._array[Vector3DIndex.Z] + + @property + def array(self) -> npt.NDArray[np.float64]: + """ + Returns the vector components as a numpy array + + :return: A numpy array representing the vector components [x, y, z], indexed by \ + :class:`~py123d.geometry.geometry_index.Vector3DIndex`. + """ + return self._array + + @property + def magnitude(self) -> float: + """Calculates the magnitude (length) of the 3D vector. + + :return: The magnitude of the vector. + """ + return float(np.linalg.norm(self.array)) + + @property + def vector_2d(self) -> Vector2D: + """Returns the 2D vector projection (x, y) of the 3D vector. + + :return: A Vector2D instance representing the 2D projection. + """ + return Vector2D(self.x, self.y) + + def __add__(self, other: Vector3D) -> Vector3D: + """Adds two 3D vectors. + + :param other: The other vector to add. + :return: A new Vector2D instance representing the sum. + """ + return Vector3D(self.x + other.x, self.y + other.y, self.z + other.z) + + def __sub__(self, other: Vector3D) -> Vector3D: + """Subtracts two 3D vectors. + + :param other: The other vector to subtract. + :return: A new Vector3D instance representing the difference. + """ + return Vector3D(self.x - other.x, self.y - other.y, self.z - other.z) + + def __mul__(self, scalar: float) -> Vector3D: + """Multiplies the 2D vector by a scalar. + + :param scalar: The scalar value to multiply with. + :return: A new Vector3D instance representing the scaled vector. + """ + return Vector3D(self.x * scalar, self.y * scalar, self.z * scalar) + + def __truediv__(self, scalar: float) -> Vector3D: + """Divides the 2D vector by a scalar. + + :param scalar: The scalar value to divide by. + :return: A new Vector3D instance representing the divided vector. + """ + return Vector3D(self.x / scalar, self.y / scalar, self.z / scalar) + + def __iter__(self) -> Iterable[float]: + """Iterator over vector components.""" + return iter((self.x, self.y, self.z)) + + def __hash__(self) -> int: + """Hash method""" + return hash((self.x, self.y, self.z)) diff --git a/d123/script/config/common/__init__.py b/src/py123d/script/__init__.py similarity index 100% rename from d123/script/config/common/__init__.py rename to src/py123d/script/__init__.py diff --git a/d123/script/config/common/worker/__init__.py b/src/py123d/script/builders/__init__.py similarity index 100% rename from d123/script/config/common/worker/__init__.py rename to src/py123d/script/builders/__init__.py diff --git a/src/py123d/script/builders/dataset_converter_builder.py b/src/py123d/script/builders/dataset_converter_builder.py new file mode 100644 index 00000000..53f9531d --- /dev/null +++ b/src/py123d/script/builders/dataset_converter_builder.py @@ -0,0 +1,22 @@ +import logging +from typing import List + +from hydra.utils import instantiate +from omegaconf import DictConfig + +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.script.builders.utils.utils_type import validate_type + +logger = logging.getLogger(__name__) + + +def build_dataset_converters(cfg: DictConfig) -> List[AbstractDatasetConverter]: + logger.info("Building AbstractDatasetConverter...") + instantiated_datasets: List[AbstractDatasetConverter] = [] + for dataset_type in cfg.values(): + processor: AbstractDatasetConverter = instantiate(dataset_type) + validate_type(processor, AbstractDatasetConverter) + instantiated_datasets.append(processor) + + logger.info("Building AbstractDatasetConverter...DONE!") + return instantiated_datasets diff --git a/d123/script/builders/scene_builder_builder.py b/src/py123d/script/builders/scene_builder_builder.py similarity index 90% rename from d123/script/builders/scene_builder_builder.py rename to src/py123d/script/builders/scene_builder_builder.py index 4424b550..3c7523a4 100644 --- a/d123/script/builders/scene_builder_builder.py +++ b/src/py123d/script/builders/scene_builder_builder.py @@ -5,7 +5,7 @@ from hydra.utils import instantiate from omegaconf import DictConfig -from d123.dataset.scene.scene_builder import SceneBuilder +from py123d.datatypes.scene.abstract_scene_builder import SceneBuilder logger = logging.getLogger(__name__) diff --git a/d123/script/builders/scene_filter_builder.py b/src/py123d/script/builders/scene_filter_builder.py similarity index 85% rename from d123/script/builders/scene_filter_builder.py rename to src/py123d/script/builders/scene_filter_builder.py index af3429a2..191af8ea 100644 --- a/d123/script/builders/scene_filter_builder.py +++ b/src/py123d/script/builders/scene_filter_builder.py @@ -4,7 +4,7 @@ from hydra.utils import instantiate from omegaconf import DictConfig -from d123.dataset.scene.scene_filter import SceneFilter +from py123d.datatypes.scene.scene_filter import SceneFilter logger = logging.getLogger(__name__) @@ -31,11 +31,11 @@ def build_scene_filter(cfg: DictConfig) -> SceneFilter: :return: Instance of SceneFilter. """ logger.info("Building SceneFilter...") - if cfg.scene_tokens and not all(map(is_valid_token, cfg.scene_tokens)): + if cfg.scene_uuids and not all(map(is_valid_token, cfg.scene_uuids)): raise RuntimeError( "Expected all scene tokens to be 16-character strings. Your shell may strip quotes " "causing hydra to parse a token as a float, so consider passing them like " - "scene_filter.scene_tokens='[\"595322e649225137\", ...]'" + "scene_filter.scene_uuids='[\"595322e649225137\", ...]'" ) scene_filter: SceneFilter = instantiate(cfg) assert isinstance(scene_filter, SceneFilter) diff --git a/d123/script/builders/utils/utils_type.py b/src/py123d/script/builders/utils/utils_type.py similarity index 100% rename from d123/script/builders/utils/utils_type.py rename to src/py123d/script/builders/utils/utils_type.py diff --git a/src/py123d/script/builders/viser_config_builder.py b/src/py123d/script/builders/viser_config_builder.py new file mode 100644 index 00000000..a94fcf7a --- /dev/null +++ b/src/py123d/script/builders/viser_config_builder.py @@ -0,0 +1,22 @@ +import logging + +from hydra.utils import instantiate +from omegaconf import DictConfig + +from py123d.script.builders.utils.utils_type import validate_type +from py123d.visualization.viser.viser_config import ViserConfig + +logger = logging.getLogger(__name__) + + +def build_viser_config(cfg: DictConfig) -> ViserConfig: + """ + Builds the config dataclass for the viser viewer. + :param cfg: DictConfig. Configuration that is used to run the viewer. + :return: Instance of ViserConfig. + """ + logger.info("Building ViserConfig...") + viser_config: ViserConfig = instantiate(cfg) + validate_type(viser_config, ViserConfig) + logger.info("Building ViserConfig...DONE!") + return viser_config diff --git a/d123/script/builders/worker_pool_builder.py b/src/py123d/script/builders/worker_pool_builder.py similarity index 80% rename from d123/script/builders/worker_pool_builder.py rename to src/py123d/script/builders/worker_pool_builder.py index 123ec3c3..d5607da7 100644 --- a/d123/script/builders/worker_pool_builder.py +++ b/src/py123d/script/builders/worker_pool_builder.py @@ -3,8 +3,8 @@ from hydra.utils import instantiate from omegaconf import DictConfig -from d123.common.multithreading.worker_pool import WorkerPool -from d123.script.builders.utils.utils_type import validate_type +from py123d.common.multithreading.worker_pool import WorkerPool +from py123d.script.builders.utils.utils_type import validate_type logger = logging.getLogger(__name__) diff --git a/src/py123d/script/builders/writer_builder.py b/src/py123d/script/builders/writer_builder.py new file mode 100644 index 00000000..ebee9935 --- /dev/null +++ b/src/py123d/script/builders/writer_builder.py @@ -0,0 +1,26 @@ +import logging + +from hydra.utils import instantiate +from omegaconf import DictConfig + +from py123d.conversion.abstract_dataset_converter import AbstractLogWriter +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.script.builders.utils.utils_type import validate_type + +logger = logging.getLogger(__name__) + + +def build_map_writer(cfg: DictConfig) -> AbstractMapWriter: + logger.info("Building AbstractMapWriter...") + map_writer: AbstractLogWriter = instantiate(cfg) + validate_type(map_writer, AbstractMapWriter) + logger.info("Building AbstractMapWriter...DONE!") + return map_writer + + +def build_log_writer(cfg: DictConfig) -> AbstractLogWriter: + logger.info("Building AbstractLogWriter...") + log_writer: AbstractLogWriter = instantiate(cfg) + validate_type(log_writer, AbstractLogWriter) + logger.info("Building AbstractLogWriter...DONE!") + return log_writer diff --git a/d123/script/config/dataset_conversion/__init__.py b/src/py123d/script/config/__init__.py similarity index 100% rename from d123/script/config/dataset_conversion/__init__.py rename to src/py123d/script/config/__init__.py diff --git a/d123/script/config/lightning_training/__init__.py b/src/py123d/script/config/common/__init__.py similarity index 100% rename from d123/script/config/lightning_training/__init__.py rename to src/py123d/script/config/common/__init__.py diff --git a/src/py123d/script/config/common/default_common.yaml b/src/py123d/script/config/common/default_common.yaml new file mode 100644 index 00000000..a43a5c2d --- /dev/null +++ b/src/py123d/script/config/common/default_common.yaml @@ -0,0 +1,8 @@ +# Default common configs +defaults: + - worker: ray_distributed + - scene_filter: all_scenes + - scene_builder: default_scene_builder + - override hydra/job_logging: disabled + - override hydra/hydra_logging: colorlog + - _self_ diff --git a/src/py123d/script/config/common/default_dataset_paths.yaml b/src/py123d/script/config/common/default_dataset_paths.yaml new file mode 100644 index 00000000..81d37227 --- /dev/null +++ b/src/py123d/script/config/common/default_dataset_paths.yaml @@ -0,0 +1,30 @@ +dataset_paths: + + # 123D defaults + py123d_data_root: ${oc.env:PY123D_DATA_ROOT,null} + py123d_logs_root: ${dataset_paths.py123d_data_root}/logs + py123d_maps_root: ${dataset_paths.py123d_data_root}/maps + py123d_sensors_root: ${dataset_paths.py123d_data_root}/sensors + + # nuPlan defaults + nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT,null} + nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT,null} + nuplan_sensor_root: ${oc.env:NUPLAN_SENSOR_ROOT,null} + + # AV2 defaults + av2_data_root: ${oc.env:AV2_DATA_ROOT,null} + av2_sensor_data_root: ${dataset_paths.av2_data_root}/sensor + + # WOPD defaults + wopd_data_root: ${oc.env:WOPD_DATA_ROOT,null} + + # Pandaset defaults + pandaset_data_root: ${oc.env:PANDASET_DATA_ROOT,null} + + # KITTI360 defaults + kitti360_data_root: ${oc.env:KITTI360_DATA_ROOT,null} + + # nuScenes defaults + nuscenes_data_root: ${oc.env:NUSCENES_DATA_ROOT,null} + nuscenes_map_root: ${dataset_paths.nuscenes_data_root} + nuscenes_sensor_root: ${dataset_paths.nuscenes_data_root} diff --git a/src/py123d/script/config/common/default_experiment.yaml b/src/py123d/script/config/common/default_experiment.yaml new file mode 100644 index 00000000..b6f236c9 --- /dev/null +++ b/src/py123d/script/config/common/default_experiment.yaml @@ -0,0 +1,11 @@ +defaults: + - _self_ + - default_dataset_paths + - override hydra/job_logging: colorlog + - override hydra/hydra_logging: colorlog + +# Cache parameters +experiment_name: ??? +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:PY123D_EXP_ROOT}/${experiment_name}/${experiment_uid} # path where output csv is saved diff --git a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml new file mode 100644 index 00000000..cf2e553a --- /dev/null +++ b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml @@ -0,0 +1,5 @@ +_target_: py123d.datatypes.scene.arrow.arrow_scene_builder.ArrowSceneBuilder +_convert_: 'all' + +logs_root: ${dataset_paths.py123d_logs_root} +maps_root: ${dataset_paths.py123d_maps_root} diff --git a/d123/script/config/common/scene_filter/log_scenes.yaml b/src/py123d/script/config/common/scene_filter/log_scenes.yaml similarity index 61% rename from d123/script/config/common/scene_filter/log_scenes.yaml rename to src/py123d/script/config/common/scene_filter/log_scenes.yaml index 68114df9..726b1d73 100644 --- a/d123/script/config/common/scene_filter/log_scenes.yaml +++ b/src/py123d/script/config/common/scene_filter/log_scenes.yaml @@ -1,13 +1,13 @@ -_target_: d123.dataset.scene.scene_filter.SceneFilter +_target_: py123d.datatypes.scene.scene_filter.SceneFilter _convert_: 'all' split_types: null -split_names: ["av2-sensor-mini_train"] +split_names: null log_names: null -map_names: null -scene_tokens: null +locations: null +scene_uuids: null timestamp_threshold_s: null ego_displacement_minimum_m: null diff --git a/src/py123d/script/config/common/scene_filter/nuplan_logs.yaml b/src/py123d/script/config/common/scene_filter/nuplan_logs.yaml new file mode 100644 index 00000000..a9dc8275 --- /dev/null +++ b/src/py123d/script/config/common/scene_filter/nuplan_logs.yaml @@ -0,0 +1,19 @@ +_target_: py123d.datatypes.scene.scene_filter.SceneFilter +_convert_: 'all' + +split_types: null +split_names: + - "nuplan_train" + - "nuplan_val" + - "nuplan_test" +log_names: null + + +locations: null +scene_uuids: null +timestamp_threshold_s: null +ego_displacement_minimum_m: null +max_num_scenes: null + +duration_s: null +history_s: null diff --git a/src/py123d/script/config/common/scene_filter/nuplan_mini_logs.yaml b/src/py123d/script/config/common/scene_filter/nuplan_mini_logs.yaml new file mode 100644 index 00000000..9fd43e0d --- /dev/null +++ b/src/py123d/script/config/common/scene_filter/nuplan_mini_logs.yaml @@ -0,0 +1,19 @@ +_target_: py123d.datatypes.scene.scene_filter.SceneFilter +_convert_: 'all' + +split_types: null +split_names: + - "nuplan_mini_train" + - "nuplan_mini_val" + - "nuplan_mini_test" +log_names: null + + +locations: null +scene_uuids: null +timestamp_threshold_s: null +ego_displacement_minimum_m: null +max_num_scenes: null + +duration_s: null +history_s: null diff --git a/src/py123d/script/config/common/scene_filter/viser_scenes.yaml b/src/py123d/script/config/common/scene_filter/viser_scenes.yaml new file mode 100644 index 00000000..1b619af1 --- /dev/null +++ b/src/py123d/script/config/common/scene_filter/viser_scenes.yaml @@ -0,0 +1,20 @@ +_target_: py123d.datatypes.scene.scene_filter.SceneFilter +_convert_: 'all' + +split_types: null +split_names: null +log_names: null + + +locations: null +scene_uuids: null +timestamp_threshold_s: null +ego_displacement_minimum_m: null + +duration_s: null +history_s: null + +pinhole_camera_types: null + +max_num_scenes: null +shuffle: True diff --git a/d123/script/config/viser/__init__.py b/src/py123d/script/config/common/worker/__init__.py similarity index 100% rename from d123/script/config/viser/__init__.py rename to src/py123d/script/config/common/worker/__init__.py diff --git a/d123/script/config/common/worker/ray_distributed.yaml b/src/py123d/script/config/common/worker/ray_distributed.yaml similarity index 89% rename from d123/script/config/common/worker/ray_distributed.yaml rename to src/py123d/script/config/common/worker/ray_distributed.yaml index 2c101f66..bb663462 100644 --- a/d123/script/config/common/worker/ray_distributed.yaml +++ b/src/py123d/script/config/common/worker/ray_distributed.yaml @@ -1,4 +1,4 @@ -_target_: d123.common.multithreading.worker_ray.RayDistributed +_target_: py123d.common.multithreading.worker_ray.RayDistributed _convert_: 'all' master_node_ip: null # Set to a master node IP if you desire to connect to cluster remotely threads_per_node: null # Number of CPU threads to use per node, "null" means all threads available diff --git a/src/py123d/script/config/common/worker/sequential.yaml b/src/py123d/script/config/common/worker/sequential.yaml new file mode 100644 index 00000000..fe3d04f8 --- /dev/null +++ b/src/py123d/script/config/common/worker/sequential.yaml @@ -0,0 +1,2 @@ +_target_: py123d.common.multithreading.worker_sequential.Sequential +_convert_: 'all' diff --git a/d123/script/config/common/worker/single_machine_thread_pool.yaml b/src/py123d/script/config/common/worker/single_machine_thread_pool.yaml similarity index 73% rename from d123/script/config/common/worker/single_machine_thread_pool.yaml rename to src/py123d/script/config/common/worker/single_machine_thread_pool.yaml index ace763f6..1344c762 100644 --- a/d123/script/config/common/worker/single_machine_thread_pool.yaml +++ b/src/py123d/script/config/common/worker/single_machine_thread_pool.yaml @@ -1,4 +1,4 @@ -_target_: d123.common.multithreading.worker_parallel.SingleMachineParallelExecutor +_target_: py123d.common.multithreading.worker_parallel.SingleMachineParallelExecutor _convert_: 'all' use_process_pool: True # If true, use ProcessPoolExecutor as the backend, otherwise uses ThreadPoolExecutor max_workers: 16 # Number of CPU workers (threads/processes) to use per node, "null" means all available diff --git a/d123/simulation/__init__.py b/src/py123d/script/config/conversion/__init__.py similarity index 100% rename from d123/simulation/__init__.py rename to src/py123d/script/config/conversion/__init__.py diff --git a/d123/simulation/agents/__init__.py b/src/py123d/script/config/conversion/datasets/__init__.py similarity index 100% rename from d123/simulation/agents/__init__.py rename to src/py123d/script/config/conversion/datasets/__init__.py diff --git a/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml new file mode 100644 index 00000000..ff8a2433 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml @@ -0,0 +1,36 @@ +av2_sensor_dataset: + _target_: py123d.conversion.datasets.av2.av2_sensor_converter.AV2SensorConverter + _convert_: 'all' + + splits: ["av2-sensor_train"] + av2_data_root: ${dataset_paths.av2_data_root} + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" # "path", "path_merged", "binary" + + # Not available: + include_traffic_lights: false + include_scenario_tags: false + include_route: false + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml new file mode 100644 index 00000000..544eb879 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -0,0 +1,65 @@ +kitti360_dataset: + _target_: py123d.conversion.datasets.kitti360.kitti360_converter.Kitti360Converter + _convert_: 'all' + + splits: ["kitti360_train", "kitti360_val", "kitti360_test"] + + kitti360_data_root: ${dataset_paths.kitti360_data_root} + + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + detection_cache_root: ${dataset_paths.kitti360_data_root}/preprocessed_detections + detection_radius: 60.0 + + # NOTE: + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" + + # Fisheye Cameras + include_fisheye_mei_cameras: true + fisheye_mei_camera_store_option: "binary" + + # LiDARs + include_lidars: true + lidar_store_option: "path" + + # Not available: + include_traffic_lights: false + include_route: false + include_scenario_tags: false + + train_sequences: + - "2013_05_28_drive_0000_sync" + - "2013_05_28_drive_0002_sync" + - "2013_05_28_drive_0003_sync" + + + val_sequences: + - "2013_05_28_drive_0004_sync" + - "2013_05_28_drive_0005_sync" + - "2013_05_28_drive_0006_sync" + - "2013_05_28_drive_0007_sync" + + test_sequences: + - "2013_05_28_drive_0008_sync" + - "2013_05_28_drive_0009_sync" + - "2013_05_28_drive_0010_sync" + - "2013_05_28_drive_0018_sync" diff --git a/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml new file mode 100644 index 00000000..1241c2ae --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml @@ -0,0 +1,42 @@ +nuplan_dataset: + _target_: py123d.conversion.datasets.nuplan.nuplan_converter.NuPlanConverter + _convert_: 'all' + + splits: ["nuplan_train", "nuplan_val", "nuplan_test"] + nuplan_data_root: ${dataset_paths.nuplan_data_root} + nuplan_maps_root: ${dataset_paths.nuplan_maps_root} + nuplan_sensor_root: ${dataset_paths.nuplan_sensor_root} + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Traffic Lights + include_traffic_lights: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" # "path", "path_merged", "binary" + + # Scenario tag / Route + include_scenario_tags: true + include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml new file mode 100644 index 00000000..fb7818b2 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml @@ -0,0 +1,42 @@ +nuplan_mini_dataset: + _target_: py123d.conversion.datasets.nuplan.nuplan_converter.NuPlanConverter + _convert_: 'all' + + splits: ["nuplan-mini_train", "nuplan-mini_val", "nuplan-mini_test"] + nuplan_data_root: ${dataset_paths.nuplan_data_root} + nuplan_maps_root: ${dataset_paths.nuplan_maps_root} + nuplan_sensor_root: ${dataset_paths.nuplan_sensor_root} + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Traffic Lights + include_traffic_lights: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" # "path", "path_merged", "binary" + + # Scenario tag / Route + include_scenario_tags: true + include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml new file mode 100644 index 00000000..52ce8207 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml @@ -0,0 +1,39 @@ +nuscenes_dataset: + _target_: py123d.conversion.datasets.nuscenes.nuscenes_converter.NuScenesConverter + _convert_: 'all' + + splits: ["nuscenes_train", "nuscenes_val", "nuscenes_test"] + nuscenes_data_root: ${dataset_paths.nuscenes_data_root} + nuscenes_map_root: ${dataset_paths.nuscenes_data_root} + nuscenes_lanelet2_root: ${dataset_paths.nuscenes_data_root}/lanelet2 + use_lanelet2: False + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml new file mode 100644 index 00000000..9bc7f019 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml @@ -0,0 +1,39 @@ +nuscenes_dataset: + _target_: py123d.conversion.datasets.nuscenes.nuscenes_converter.NuScenesConverter + _convert_: 'all' + + splits: ["nuscenes-mini_train", "nuscenes-mini_val"] + nuscenes_data_root: ${dataset_paths.nuscenes_data_root} + nuscenes_map_root: ${dataset_paths.nuscenes_data_root} + nuscenes_lanelet2_root: ${dataset_paths.nuscenes_data_root}/lanelet2 + use_lanelet2: false + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml new file mode 100644 index 00000000..30747b42 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml @@ -0,0 +1,146 @@ +pandaset_dataset: + _target_: py123d.conversion.datasets.pandaset.pandaset_converter.PandasetConverter + _convert_: 'all' + + splits: ["pandaset_train", "pandaset_val", "pandaset_test"] + pandaset_data_root: ${dataset_paths.pandaset_data_root} + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" + + # LiDARs + include_lidars: true + lidar_store_option: "binary" + + # Not available: + include_map: false + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false + + + # NOTE: Pandaset does not have official splits, so we create our own here. + # We use 80% of the logs for training, 10% for validation, and 10% for testing. + train_log_names: + - "001" + - "002" + - "003" + - "006" + - "008" + - "011" + - "012" + - "013" + - "014" + - "016" + - "019" + - "020" + - "023" + - "024" + - "027" + - "028" + - "029" + - "030" + - "032" + - "033" + - "034" + - "039" + - "041" + - "042" + - "043" + - "045" + - "046" + - "047" + - "048" + - "050" + - "051" + - "052" + - "053" + - "054" + - "055" + - "056" + - "057" + - "058" + - "059" + - "062" + - "063" + - "064" + - "065" + - "067" + - "068" + - "069" + - "070" + - "071" + - "072" + - "073" + - "074" + - "077" + - "079" + - "080" + - "084" + - "085" + - "088" + - "089" + - "090" + - "091" + - "092" + - "094" + - "095" + - "097" + - "098" + - "099" + - "101" + - "102" + - "103" + - "104" + - "106" + - "109" + - "110" + - "113" + - "115" + - "116" + - "119" + - "120" + - "122" + - "123" + - "149" + - "158" + + val_log_names: + - "004" + - "017" + - "018" + - "021" + - "037" + - "040" + - "044" + - "100" + - "105" + - "117" + - "124" + + test_log_names: + - "005" + - "015" + - "035" + - "038" + - "066" + - "078" + - "086" + - "093" + - "112" + - "139" diff --git a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml new file mode 100644 index 00000000..14a643d8 --- /dev/null +++ b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml @@ -0,0 +1,39 @@ +wopd_dataset: + _target_: py123d.conversion.datasets.wopd.wopd_converter.WOPDConverter + _convert_: 'all' + + splits: ["wopd_val"] # Which splits to convert. Options: ["wopd_train", "wopd_val", "wopd_test"] + wopd_data_root: "/media/nvme1/waymo_perception" # ${wopd_data_root} + zero_roll_pitch: true # Whether to zero the roll and pitch of the box detections in global frame. + keep_polar_features: false # Add lidar polar features (range, azimuth, elevation) in addition to XYZ. (slow if true) + add_map_pose_offset: true # Whether to add the map pose offset to the ego state and box detections. + + + dataset_converter_config: + _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig + _convert_: 'all' + + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + + # Map + include_map: true + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" + + # LiDARs + include_lidars: false + lidar_store_option: "binary" # "path", "path_merged", "binary" + + # Not available: + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/default_conversion.yaml b/src/py123d/script/config/conversion/default_conversion.yaml new file mode 100644 index 00000000..48e55dcc --- /dev/null +++ b/src/py123d/script/config/conversion/default_conversion.yaml @@ -0,0 +1,26 @@ +hydra: + output_subdir: null + run: + dir: . + searchpath: + - pkg://py123d.script.config + - pkg://py123d.script.config.common + job: + env_set: + HYDRA_FULL_ERROR: 1 + chdir: False +# +defaults: + - default_common + - default_dataset_paths + - log_writer: arrow_log_writer + - map_writer: gpkg_map_writer + - datasets: + - kitti360_dataset + - _self_ + + +terminate_on_exception: True + +force_map_conversion: True +force_log_conversion: True diff --git a/d123/simulation/controller/__init__.py b/src/py123d/script/config/conversion/log_writer/__init__.py similarity index 100% rename from d123/simulation/controller/__init__.py rename to src/py123d/script/config/conversion/log_writer/__init__.py diff --git a/src/py123d/script/config/conversion/log_writer/arrow_log_writer.yaml b/src/py123d/script/config/conversion/log_writer/arrow_log_writer.yaml new file mode 100644 index 00000000..61a4ead6 --- /dev/null +++ b/src/py123d/script/config/conversion/log_writer/arrow_log_writer.yaml @@ -0,0 +1,7 @@ +_target_: py123d.conversion.log_writer.arrow_log_writer.ArrowLogWriter +_convert_: 'all' + + +logs_root: ${dataset_paths.py123d_logs_root} +ipc_compression: null # Compression method for ipc files. Options: None, 'lz4', 'zstd' +ipc_compression_level: null # Compression level for ipc files. Options: None, or depending on compression method diff --git a/d123/simulation/controller/motion_model/__init__.py b/src/py123d/script/config/conversion/map_writer/__init__.py similarity index 100% rename from d123/simulation/controller/motion_model/__init__.py rename to src/py123d/script/config/conversion/map_writer/__init__.py diff --git a/src/py123d/script/config/conversion/map_writer/gpkg_map_writer.yaml b/src/py123d/script/config/conversion/map_writer/gpkg_map_writer.yaml new file mode 100644 index 00000000..86bf8e0b --- /dev/null +++ b/src/py123d/script/config/conversion/map_writer/gpkg_map_writer.yaml @@ -0,0 +1,5 @@ +_target_: py123d.conversion.map_writer.gpkg_map_writer.GPKGMapWriter +_convert_: 'all' + +maps_root: ${dataset_paths.py123d_maps_root} +remap_ids: true diff --git a/d123/simulation/controller/tracker/__init__.py b/src/py123d/script/config/viser/__init__.py similarity index 100% rename from d123/simulation/controller/tracker/__init__.py rename to src/py123d/script/config/viser/__init__.py diff --git a/src/py123d/script/config/viser/default_viser.yaml b/src/py123d/script/config/viser/default_viser.yaml new file mode 100644 index 00000000..b9ff6f9b --- /dev/null +++ b/src/py123d/script/config/viser/default_viser.yaml @@ -0,0 +1,74 @@ +hydra: + output_subdir: null + run: + dir: . + searchpath: + - pkg://py123d.script.config + - pkg://py123d.script.config.common + job: + chdir: False +# +defaults: + - default_common + - default_dataset_paths + - override scene_filter: viser_scenes + - _self_ + +viser_config: + _target_: py123d.visualization.viser.viser_config.ViserConfig + _convert_: 'all' + + # Server + server_host: "localhost" + server_port: 8080 + server_label: "123D Viser Server" + server_verbose: true + + # Theme + theme_control_layout: "floating" # ["floating", "collapsible", "fixed"] + theme_control_width: "large" # ["small", "medium", "large"] + theme_dark_mode: false + theme_show_logo: true + theme_show_share_button: true + # theme_brand_color: # Loads ellis blue by default + + # Play Controls + is_playing: false + playback_speed: 1.0 + + # Map + map_visible: true + map_radius: 200.0 + map_non_road_z_offset: 0.1 + map_requery: true + + # Bounding boxes + bounding_box_visible: true + bounding_box_type: "mesh" # ["mesh", "lines"] + bounding_box_line_width: 4.0 + + # Pinhole Cameras + # -> Frustum + camera_frustum_visible: true + # camera_frustum_types: null # Loads all by default + camera_frustum_scale: 1.0 + camera_frustum_image_scale: 0.25 + + # -> GUI + camera_gui_visible: true + camera_gui_types: [] + camera_gui_image_scale: 0.25 + + # Fisheye MEI Cameras + # -> Frustum + fisheye_frustum_visible: true + fisheye_mei_camera_frustum_visible: true + # fisheye_mei_camera_frustum_types: [] # Loads all by default + fisheye_frustum_scale: 1.0 + fisheye_frustum_image_scale: 0.25 + + # LiDAR + lidar_visible: true + # lidar_types: [] # Loads all by default + lidar_point_size: 0.05 + lidar_point_shape: "circle" # ["square", "diamond", "circle", "rounded", "sparkle"] diff --git a/src/py123d/script/run_conversion.py b/src/py123d/script/run_conversion.py new file mode 100644 index 00000000..52ed5cd6 --- /dev/null +++ b/src/py123d/script/run_conversion.py @@ -0,0 +1,84 @@ +import gc +import logging +from functools import partial +from typing import Dict, List + +import hydra +from omegaconf import DictConfig + +from py123d.common.multithreading.worker_utils import worker_map +from py123d.script.builders.dataset_converter_builder import AbstractDatasetConverter, build_dataset_converters +from py123d.script.builders.worker_pool_builder import build_worker +from py123d.script.builders.writer_builder import build_log_writer, build_map_writer +from py123d.script.utils.dataset_path_utils import setup_dataset_paths + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger() + +CONFIG_PATH = "config/conversion" +CONFIG_NAME = "default_conversion" + + +@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) +def main(cfg: DictConfig) -> None: + """ + Main entrypoint for dataset conversion. + :param cfg: omegaconf dictionary + """ + + setup_dataset_paths(cfg.dataset_paths) + + logger.info("Starting Dataset Caching...") + dataset_converters: List[AbstractDatasetConverter] = build_dataset_converters(cfg.datasets) + + for dataset_converter in dataset_converters: + + worker = build_worker(cfg) + logger.info(f"Processing dataset: {dataset_converter.__class__.__name__}") + + map_args = [{"map_index": i} for i in range(dataset_converter.get_number_of_maps())] + logger.info( + f"Found maps: {dataset_converter.get_number_of_maps()} for dataset: {dataset_converter.__class__.__name__}" + ) + + worker_map(worker, partial(_convert_maps, cfg=cfg, dataset_converter=dataset_converter), map_args) + logger.info(f"Finished maps: {dataset_converter.__class__.__name__}") + + log_args = [{"log_index": i} for i in range(dataset_converter.get_number_of_logs())] + logger.info( + f"Found logs: {dataset_converter.get_number_of_logs()} for dataset: {dataset_converter.__class__.__name__}" + ) + worker_map(worker, partial(_convert_logs, cfg=cfg, dataset_converter=dataset_converter), log_args) + logger.info(f"Finished logs: {dataset_converter.__class__.__name__}") + + logger.info(f"Finished processing dataset: {dataset_converter.__class__.__name__}") + + +def _convert_maps(args: List[Dict[str, int]], cfg: DictConfig, dataset_converter: AbstractDatasetConverter) -> List: + + map_writer = build_map_writer(cfg.map_writer) + for arg in args: + dataset_converter.convert_map(arg["map_index"], map_writer) + return [] + + +def _convert_logs(args: List[Dict[str, int]], cfg: DictConfig, dataset_converter: AbstractDatasetConverter) -> None: + + setup_dataset_paths(cfg.dataset_paths) + + def _internal_convert_log(args: Dict[str, int], dataset_converter_: AbstractDatasetConverter) -> int: + # for i2 in tqdm(range(300), leave=False) + log_writer = build_log_writer(cfg.log_writer) + for arg in args: + dataset_converter_.convert_log(arg["log_index"], log_writer) + del log_writer + gc.collect() + + # for arg in : + _internal_convert_log(args, dataset_converter) + gc.collect() + return [] + + +if __name__ == "__main__": + main() diff --git a/src/py123d/script/run_viser.py b/src/py123d/script/run_viser.py new file mode 100644 index 00000000..df14382c --- /dev/null +++ b/src/py123d/script/run_viser.py @@ -0,0 +1,43 @@ +import logging + +import hydra +from omegaconf import DictConfig + +from py123d.script.builders.scene_builder_builder import build_scene_builder +from py123d.script.builders.scene_filter_builder import build_scene_filter +from py123d.script.builders.viser_config_builder import build_viser_config +from py123d.script.run_conversion import build_worker +from py123d.script.utils.dataset_path_utils import setup_dataset_paths +from py123d.visualization.viser.viser_viewer import ViserViewer + +logger = logging.getLogger(__name__) + +CONFIG_PATH = "config/viser" +CONFIG_NAME = "default_viser" + + +@hydra.main(config_path=CONFIG_PATH, config_name=CONFIG_NAME, version_base=None) +def main(cfg: DictConfig) -> None: + + # Initialize dataset paths + setup_dataset_paths(cfg.dataset_paths) + + # Build worker + worker = build_worker(cfg) + + # Build scene filter and scene builder + scene_filter = build_scene_filter(cfg.scene_filter) + scene_builder = build_scene_builder(cfg.scene_builder) + + # Get scenes from scene builder + scenes = scene_builder.get_scenes(scene_filter, worker=worker) + + # Build Viser config + viser_config = build_viser_config(cfg.viser_config) + + # Launch Viser viewer with the scenes + ViserViewer(scenes=scenes, viser_config=viser_config) + + +if __name__ == "__main__": + main() diff --git a/d123/simulation/gym/__init__.py b/src/py123d/script/utils/__init__.py similarity index 100% rename from d123/simulation/gym/__init__.py rename to src/py123d/script/utils/__init__.py diff --git a/src/py123d/script/utils/dataset_path_utils.py b/src/py123d/script/utils/dataset_path_utils.py new file mode 100644 index 00000000..393c05f4 --- /dev/null +++ b/src/py123d/script/utils/dataset_path_utils.py @@ -0,0 +1,41 @@ +import logging +from pathlib import Path +from typing import Optional + +from omegaconf import DictConfig, OmegaConf + +logger = logging.getLogger(__name__) +_global_dataset_paths: Optional[DictConfig] = None + + +def setup_dataset_paths(cfg: DictConfig) -> None: + """Setup the global dataset paths. + + :param cfg: The configuration containing dataset paths. + :return: None + """ + + global _global_dataset_paths + + if _global_dataset_paths is None: + # Make it immutable + OmegaConf.set_struct(cfg, True) # Prevents adding new keys + OmegaConf.set_readonly(cfg, True) # Prevents any modifications + _global_dataset_paths = cfg + + return None + + +def get_dataset_paths() -> DictConfig: + """Get the global dataset paths from anywhere in your code. + + :return: global dataset paths configuration + """ + if _global_dataset_paths is None: + dataset_paths_config_yaml = Path(__file__).parent.parent / "config" / "common" / "default_dataset_paths.yaml" + logger.warning(f"Dataset paths not set. Using default config: {dataset_paths_config_yaml}") + + cfg = OmegaConf.load(dataset_paths_config_yaml) + setup_dataset_paths(cfg.dataset_paths) + + return _global_dataset_paths diff --git a/d123/simulation/gym/environment/gym_observation/__init__.py b/src/py123d/visualization/__init__.py similarity index 100% rename from d123/simulation/gym/environment/gym_observation/__init__.py rename to src/py123d/visualization/__init__.py diff --git a/d123/dataset/dataset_specific/av2/.gitkeep b/src/py123d/visualization/bokeh/.gitkeep similarity index 100% rename from d123/dataset/dataset_specific/av2/.gitkeep rename to src/py123d/visualization/bokeh/.gitkeep diff --git a/d123/simulation/gym/environment/output_converter/__init__.py b/src/py123d/visualization/color/__init__.py similarity index 100% rename from d123/simulation/gym/environment/output_converter/__init__.py rename to src/py123d/visualization/color/__init__.py diff --git a/d123/common/visualization/color/color.py b/src/py123d/visualization/color/color.py similarity index 100% rename from d123/common/visualization/color/color.py rename to src/py123d/visualization/color/color.py diff --git a/d123/common/visualization/color/config.py b/src/py123d/visualization/color/config.py similarity index 88% rename from d123/common/visualization/color/config.py rename to src/py123d/visualization/color/config.py index 6d610844..1e522f32 100644 --- a/d123/common/visualization/color/config.py +++ b/src/py123d/visualization/color/config.py @@ -1,7 +1,7 @@ from dataclasses import dataclass, field from typing import Optional -from d123.common.visualization.color.color import BLACK, Color +from py123d.visualization.color.color import BLACK, Color @dataclass diff --git a/d123/common/visualization/color/default.py b/src/py123d/visualization/color/default.py similarity index 83% rename from d123/common/visualization/color/default.py rename to src/py123d/visualization/color/default.py index 076a1c81..ed5ccd25 100644 --- a/d123/common/visualization/color/default.py +++ b/src/py123d/visualization/color/default.py @@ -1,8 +1,9 @@ from typing import Dict -from d123.common.datatypes.detection.detection import TrafficLightStatus -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.visualization.color.color import ( +from py123d.conversion.registry.box_detection_label_registry import DefaultBoxDetectionLabel +from py123d.datatypes.detections.traffic_light_detections import TrafficLightStatus +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.visualization.color.color import ( BLACK, DARKER_GREY, ELLIS_5, @@ -12,8 +13,7 @@ WHITE, Color, ) -from d123.common.visualization.color.config import PlotConfig -from d123.dataset.maps.map_datatypes import MapLayer +from py123d.visualization.color.config import PlotConfig HEADING_MARKER_STYLE: str = "^" # "^": triangle, "-": line @@ -83,8 +83,8 @@ ), } -BOX_DETECTION_CONFIG: Dict[DetectionType, PlotConfig] = { - DetectionType.VEHICLE: PlotConfig( +BOX_DETECTION_CONFIG: Dict[DefaultBoxDetectionLabel, PlotConfig] = { + DefaultBoxDetectionLabel.VEHICLE: PlotConfig( fill_color=ELLIS_5[4], fill_color_alpha=1.0, line_color=BLACK, @@ -95,7 +95,7 @@ marker_size=1.0, zorder=3, ), - DetectionType.PEDESTRIAN: PlotConfig( + DefaultBoxDetectionLabel.PEDESTRIAN: PlotConfig( fill_color=NEW_TAB_10[6], fill_color_alpha=1.0, line_color=BLACK, @@ -106,7 +106,7 @@ marker_size=1.0, zorder=2, ), - DetectionType.BICYCLE: PlotConfig( + DefaultBoxDetectionLabel.BICYCLE: PlotConfig( fill_color=ELLIS_5[3], fill_color_alpha=1.0, line_color=BLACK, @@ -117,7 +117,7 @@ marker_size=1.0, zorder=2, ), - DetectionType.TRAFFIC_CONE: PlotConfig( + DefaultBoxDetectionLabel.TRAFFIC_CONE: PlotConfig( fill_color=NEW_TAB_10[5], fill_color_alpha=1.0, line_color=BLACK, @@ -127,7 +127,7 @@ marker_style=None, zorder=2, ), - DetectionType.BARRIER: PlotConfig( + DefaultBoxDetectionLabel.BARRIER: PlotConfig( fill_color=NEW_TAB_10[5], fill_color_alpha=1.0, line_color=BLACK, @@ -137,7 +137,7 @@ marker_style=None, zorder=2, ), - DetectionType.CZONE_SIGN: PlotConfig( + DefaultBoxDetectionLabel.CZONE_SIGN: PlotConfig( fill_color=NEW_TAB_10[5], fill_color_alpha=1.0, line_color=BLACK, @@ -147,7 +147,7 @@ marker_style=None, zorder=2, ), - DetectionType.GENERIC_OBJECT: PlotConfig( + DefaultBoxDetectionLabel.GENERIC_OBJECT: PlotConfig( fill_color=NEW_TAB_10[5], fill_color_alpha=1.0, line_color=BLACK, @@ -157,7 +157,7 @@ marker_style=None, zorder=2, ), - DetectionType.SIGN: PlotConfig( + DefaultBoxDetectionLabel.SIGN: PlotConfig( fill_color=NEW_TAB_10[8], fill_color_alpha=1.0, line_color=BLACK, @@ -167,6 +167,16 @@ marker_style=None, zorder=2, ), + DefaultBoxDetectionLabel.EGO: PlotConfig( + fill_color=ELLIS_5[0], + fill_color_alpha=1.0, + line_color=BLACK, + line_color_alpha=1.0, + line_width=1.0, + line_style="-", + marker_style=HEADING_MARKER_STYLE, + zorder=4, + ), } EGO_VEHICLE_CONFIG: PlotConfig = PlotConfig( diff --git a/d123/simulation/gym/environment/reward_builder/__init__.py b/src/py123d/visualization/matplotlib/__init__.py similarity index 100% rename from d123/simulation/gym/environment/reward_builder/__init__.py rename to src/py123d/visualization/matplotlib/__init__.py diff --git a/d123/common/visualization/matplotlib/camera.py b/src/py123d/visualization/matplotlib/camera.py similarity index 85% rename from d123/common/visualization/matplotlib/camera.py rename to src/py123d/visualization/matplotlib/camera.py index 52d873ae..5155aae4 100644 --- a/d123/common/visualization/matplotlib/camera.py +++ b/src/py123d/visualization/matplotlib/camera.py @@ -10,13 +10,13 @@ # from PIL import ImageColor from pyquaternion import Quaternion -from d123.common.datatypes.detection.detection import BoxDetectionSE3, BoxDetectionWrapper -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import Camera -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.common.geometry.bounding_box.bounding_box_index import BoundingBoxSE3Index, Corners3DIndex -from d123.common.geometry.transform.se3 import convert_absolute_to_relative_se3_array -from d123.common.visualization.color.default import BOX_DETECTION_CONFIG +from py123d.conversion.registry.box_detection_label_registry import DefaultBoxDetectionLabel +from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeIntrinsics +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry import BoundingBoxSE3Index, Corners3DIndex +from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_se3_array +from py123d.visualization.color.default import BOX_DETECTION_CONFIG # from navsim.common.dataclasses import Annotations, Camera, Lidar # from navsim.common.enums import BoundingBoxIndex, LidarIndex @@ -25,7 +25,7 @@ # from navsim.visualization.lidar import filter_lidar_pc, get_lidar_pc_color -def add_camera_ax(ax: plt.Axes, camera: Camera) -> plt.Axes: +def add_camera_ax(ax: plt.Axes, camera: PinholeCamera) -> plt.Axes: """ Adds camera image to matplotlib ax object :param ax: matplotlib ax object @@ -70,26 +70,15 @@ def add_camera_ax(ax: plt.Axes, camera: Camera) -> plt.Axes: def add_box_detections_to_camera_ax( ax: plt.Axes, - camera: Camera, + camera: PinholeCamera, box_detections: BoxDetectionWrapper, ego_state_se3: EgoStateSE3, + return_image: bool = False, ) -> plt.Axes: - # box_labels = annotations.names - # boxes = _transform_annotations_to_camera( - # annotations.boxes, - # camera.sensor2lidar_rotation, - # camera.sensor2lidar_translation, - # ) - # box_positions, box_dimensions, box_heading = ( - # boxes[:, BoundingBoxIndex.POSITION], - # boxes[:, BoundingBoxIndex.DIMENSION], - # boxes[:, BoundingBoxIndex.HEADING], - # ) - box_detection_array = np.zeros((len(box_detections.box_detections), len(BoundingBoxSE3Index)), dtype=np.float64) - detection_types = np.array( - [detection.metadata.detection_type for detection in box_detections.box_detections], dtype=object + default_labels = np.array( + [detection.metadata.default_label for detection in box_detections.box_detections], dtype=object ) for idx, box_detection in enumerate(box_detections.box_detections): assert isinstance( @@ -103,7 +92,7 @@ def add_box_detections_to_camera_ax( ) # box_detection_array[..., BoundingBoxSE3Index.XYZ] -= ego_state_se3.rear_axle_se3.point_3d.array detection_positions, detection_extents, detection_yaws = _transform_annotations_to_camera( - box_detection_array, camera.extrinsic + box_detection_array, camera.extrinsic.transformation_matrix ) corners_norm = np.stack(np.unravel_index(np.arange(len(Corners3DIndex)), [2] * 3), axis=1) @@ -115,13 +104,17 @@ def add_box_detections_to_camera_ax( corners += detection_positions.reshape(-1, 1, 3) # Then draw project corners to image. - box_corners, corners_pc_in_fov = _transform_points_to_image(corners.reshape(-1, 3), camera.metadata.intrinsic) + box_corners, corners_pc_in_fov = _transform_points_to_image(corners.reshape(-1, 3), camera.metadata.intrinsics) box_corners = box_corners.reshape(-1, 8, 2) corners_pc_in_fov = corners_pc_in_fov.reshape(-1, 8) valid_corners = corners_pc_in_fov.any(-1) - box_corners, detection_types = box_corners[valid_corners], detection_types[valid_corners] - image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, detection_types) + box_corners, default_labels = box_corners[valid_corners], default_labels[valid_corners] + image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, default_labels) + + if return_image: + # ax.imshow(image) + return ax, image ax.imshow(image) return ax @@ -131,7 +124,7 @@ def _transform_annotations_to_camera( boxes: npt.NDArray[np.float32], # sensor2lidar_rotation: npt.NDArray[np.float32], # sensor2lidar_translation: npt.NDArray[np.float32], - extrinsic: npt.NDArray[np.float64], + extrinsic: npt.NDArray[np.float32], ) -> npt.NDArray[np.float32]: """ Helper function to transform bounding boxes into camera frame @@ -144,17 +137,17 @@ def _transform_annotations_to_camera( sensor2lidar_rotation = extrinsic[:3, :3] sensor2lidar_translation = extrinsic[:3, 3] - locs, rots = ( + locs, quaternions = ( boxes[:, BoundingBoxSE3Index.XYZ], - boxes[:, BoundingBoxSE3Index.YAW], + boxes[:, BoundingBoxSE3Index.QUATERNION], ) dims_cam = boxes[ :, [BoundingBoxSE3Index.LENGTH, BoundingBoxSE3Index.HEIGHT, BoundingBoxSE3Index.WIDTH] ] # l, w, h -> l, h, w - rots_cam = np.zeros_like(rots) - for idx, rot in enumerate(rots): - rot = Quaternion(axis=[0, 0, 1], radians=rot) + rots_cam = np.zeros_like(quaternions[..., 0]) + for idx, quaternion in enumerate(quaternions): + rot = Quaternion(array=quaternion) rot = Quaternion(matrix=sensor2lidar_rotation).inverse * rot rots_cam[idx] = -rot.yaw_pitch_roll[0] @@ -218,7 +211,7 @@ def _rotation_3d_in_axis(points: npt.NDArray[np.float32], angles: npt.NDArray[np def _plot_rect_3d_on_img( image: npt.NDArray[np.float32], box_corners: npt.NDArray[np.float32], - detection_types: List[DetectionType], + labels: List[DefaultBoxDetectionLabel], thickness: int = 3, ) -> npt.NDArray[np.uint8]: """ @@ -245,7 +238,7 @@ def _plot_rect_3d_on_img( (6, 7), ) for i in range(len(box_corners)): - color = BOX_DETECTION_CONFIG[detection_types[i]].fill_color.rgb + color = BOX_DETECTION_CONFIG[labels[i]].fill_color.rgb corners = box_corners[i].astype(np.int64) for start, end in line_indices: cv2.line( @@ -261,7 +254,7 @@ def _plot_rect_3d_on_img( def _transform_points_to_image( points: npt.NDArray[np.float32], - intrinsic: npt.NDArray[np.float32], + intrinsics: PinholeIntrinsics, image_shape: Optional[Tuple[int, int]] = None, eps: float = 1e-3, ) -> Tuple[npt.NDArray[np.float32], npt.NDArray[np.bool_]]: @@ -276,8 +269,10 @@ def _transform_points_to_image( """ points = points[:, :3] + K = intrinsics.camera_matrix + viewpad = np.eye(4) - viewpad[: intrinsic.shape[0], : intrinsic.shape[1]] = intrinsic + viewpad[: K.shape[0], : K.shape[1]] = K pc_img = np.concatenate([points, np.ones_like(points)[:, :1]], -1) pc_img = viewpad @ pc_img.T diff --git a/d123/common/visualization/matplotlib/lidar.py b/src/py123d/visualization/matplotlib/lidar.py similarity index 100% rename from d123/common/visualization/matplotlib/lidar.py rename to src/py123d/visualization/matplotlib/lidar.py diff --git a/d123/common/visualization/matplotlib/observation.py b/src/py123d/visualization/matplotlib/observation.py similarity index 76% rename from d123/common/visualization/matplotlib/observation.py rename to src/py123d/visualization/matplotlib/observation.py index 7e225cc7..e0a826f2 100644 --- a/d123/common/visualization/matplotlib/observation.py +++ b/src/py123d/visualization/matplotlib/observation.py @@ -4,14 +4,18 @@ import numpy as np import shapely.geometry as geom -from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2, EgoStateSE3 -from d123.common.geometry.base import Point2D -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2, BoundingBoxSE3 -from d123.common.geometry.transform.tranform_2d import translate_along_yaw -from d123.common.visualization.color.config import PlotConfig -from d123.common.visualization.color.default import ( +from py123d.conversion.registry.box_detection_label_registry import DefaultBoxDetectionLabel +from py123d.datatypes.detections.box_detections import BoxDetectionWrapper +from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper +from py123d.datatypes.maps.abstract_map import AbstractMap +from py123d.datatypes.maps.abstract_map_objects import AbstractLane +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE2, EgoStateSE3 +from py123d.geometry import BoundingBoxSE2, BoundingBoxSE3, Point2D, StateSE2Index, Vector2D +from py123d.geometry.transform.transform_se2 import translate_se2_along_body_frame +from py123d.visualization.color.config import PlotConfig +from py123d.visualization.color.default import ( BOX_DETECTION_CONFIG, CENTERLINE_CONFIG, EGO_VEHICLE_CONFIG, @@ -19,16 +23,12 @@ ROUTE_CONFIG, TRAFFIC_LIGHT_CONFIG, ) -from d123.common.visualization.matplotlib.utils import ( +from py123d.visualization.matplotlib.utils import ( add_shapely_linestring_to_ax, add_shapely_polygon_to_ax, get_pose_triangle, shapely_geometry_local_coords, ) -from d123.dataset.maps.abstract_map import AbstractMap -from d123.dataset.maps.abstract_map_objects import AbstractLane -from d123.dataset.maps.map_datatypes import MapLayer -from d123.dataset.scene.abstract_scene import AbstractScene def add_default_map_on_ax( @@ -56,7 +56,7 @@ def add_default_map_on_ax( for map_object in map_objects: try: if layer in [MapLayer.LANE_GROUP]: - if route_lane_group_ids is not None and int(map_object.id) in route_lane_group_ids: + if route_lane_group_ids is not None and int(map_object.object_id) in route_lane_group_ids: add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, ROUTE_CONFIG) else: add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) @@ -74,10 +74,10 @@ def add_default_map_on_ax( except Exception: import traceback - print(f"Error adding map object of type {layer.name} and id {map_object.id}") + print(f"Error adding map object of type {layer.name} and id {map_object.object_id}") traceback.print_exc() - ax.set_title(f"Map: {map_api.map_name}") + ax.set_title(f"Map: {map_api.location}") def add_box_detections_to_ax(ax: plt.Axes, box_detections: BoxDetectionWrapper) -> None: @@ -85,7 +85,7 @@ def add_box_detections_to_ax(ax: plt.Axes, box_detections: BoxDetectionWrapper) # TODO: Optionally, continue on boxes outside of plot. # if box_detection.metadata.detection_type == DetectionType.GENERIC_OBJECT: # continue - plot_config = BOX_DETECTION_CONFIG[box_detection.metadata.detection_type] + plot_config = BOX_DETECTION_CONFIG[box_detection.metadata.default_label] add_bounding_box_to_ax(ax, box_detection.bounding_box, plot_config) @@ -96,10 +96,10 @@ def add_box_future_detections_to_ax(ax: plt.Axes, scene: AbstractScene, iteratio agents_poses = { agent.metadata.track_token: [agent.center_se3] for agent in initial_agents - if agent.metadata.detection_type == DetectionType.VEHICLE + if agent.metadata.default_label == DefaultBoxDetectionLabel.VEHICLE } frequency = 1 - for iteration in range(iteration + frequency, scene.get_number_of_iterations(), frequency): + for iteration in range(iteration + frequency, scene.number_of_iterations, frequency): agents = scene.get_box_detections_at_iteration(iteration) for agent in agents: if agent.metadata.track_token in agents_poses: @@ -115,10 +115,10 @@ def add_box_future_detections_to_ax(ax: plt.Axes, scene: AbstractScene, iteratio ax.plot( poses[i : i + 2, 0], poses[i : i + 2, 1], - color=BOX_DETECTION_CONFIG[DetectionType.VEHICLE].fill_color.hex, + color=BOX_DETECTION_CONFIG[DefaultBoxDetectionLabel.VEHICLE].fill_color.hex, alpha=alphas[i + 1], - linewidth=BOX_DETECTION_CONFIG[DetectionType.VEHICLE].line_width * 5, - zorder=BOX_DETECTION_CONFIG[DetectionType.VEHICLE].zorder, + linewidth=BOX_DETECTION_CONFIG[DefaultBoxDetectionLabel.VEHICLE].line_width * 5, + zorder=BOX_DETECTION_CONFIG[DefaultBoxDetectionLabel.VEHICLE].zorder, ) @@ -138,7 +138,7 @@ def add_traffic_lights_to_ax( TRAFFIC_LIGHT_CONFIG[traffic_light_detection.status], ) else: - raise ValueError(f"Lane with id {traffic_light_detection.lane_id} not found in map {map_api.map_name}.") + raise ValueError(f"Lane with id {traffic_light_detection.lane_id} not found in map {map_api.location}.") def add_bounding_box_to_ax( @@ -152,12 +152,13 @@ def add_bounding_box_to_ax( if plot_config.marker_style is not None: assert plot_config.marker_style in ["-", "^"], f"Unknown marker style: {plot_config.marker_style}" if plot_config.marker_style == "-": - center_se2 = ( - bounding_box.center if isinstance(bounding_box, BoundingBoxSE2) else bounding_box.center.state_se2 - ) + center_se2 = bounding_box.center_se2 arrow = np.zeros((2, 2), dtype=np.float64) arrow[0] = center_se2.point_2d.array - arrow[1] = translate_along_yaw(center_se2, Point2D(bounding_box.length / 2.0 + 0.5, 0.0)).point_2d.array + arrow[1] = translate_se2_along_body_frame( + center_se2, + Vector2D(bounding_box.length / 2.0 + 0.5, 0.0), + ).array[StateSE2Index.XY] ax.plot( arrow[:, 0], arrow[:, 1], diff --git a/d123/common/visualization/matplotlib/plots.py b/src/py123d/visualization/matplotlib/plots.py similarity index 77% rename from d123/common/visualization/matplotlib/plots.py rename to src/py123d/visualization/matplotlib/plots.py index 80e51714..01100f01 100644 --- a/d123/common/visualization/matplotlib/plots.py +++ b/src/py123d/visualization/matplotlib/plots.py @@ -5,13 +5,13 @@ import matplotlib.pyplot as plt from tqdm import tqdm -from d123.common.visualization.matplotlib.observation import ( +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.visualization.matplotlib.observation import ( add_box_detections_to_ax, add_default_map_on_ax, add_ego_vehicle_to_ax, add_traffic_lights_to_ax, ) -from d123.dataset.scene.abstract_scene import AbstractScene def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes: @@ -20,11 +20,13 @@ def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, ra box_detections = scene.get_box_detections_at_iteration(iteration) traffic_light_detections = scene.get_traffic_light_detections_at_iteration(iteration) route_lane_group_ids = scene.get_route_lane_group_ids(iteration) - map_api = scene.map_api + map_api = scene.get_map_api() point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d - add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=route_lane_group_ids) - add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) + if map_api is not None: + add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=route_lane_group_ids) + if traffic_light_detections is not None: + add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) add_box_detections_to_ax(ax, box_detections) add_ego_vehicle_to_ax(ax, ego_vehicle_state) @@ -60,11 +62,9 @@ def render_scene_animation( output_path = Path(output_path) output_path.mkdir(parents=True, exist_ok=True) - scene.open() - if end_idx is None: - end_idx = scene.get_number_of_iterations() - end_idx = min(end_idx, scene.get_number_of_iterations()) + end_idx = scene.number_of_iterations + end_idx = min(end_idx, scene.number_of_iterations) fig, ax = plt.subplots(figsize=(10, 10)) @@ -78,6 +78,5 @@ def update(i): pbar = tqdm(total=len(frames), desc=f"Rendering {scene.log_name} as {format}") ani = animation.FuncAnimation(fig, update, frames=frames, repeat=False) - ani.save(output_path / f"{scene.log_name}_{scene.token}.{format}", writer="ffmpeg", fps=fps, dpi=dpi) + ani.save(output_path / f"{scene.log_name}_{scene.uuid}.{format}", writer="ffmpeg", fps=fps, dpi=dpi) plt.close(fig) - scene.close() diff --git a/d123/common/visualization/matplotlib/utils.py b/src/py123d/visualization/matplotlib/utils.py similarity index 91% rename from d123/common/visualization/matplotlib/utils.py rename to src/py123d/visualization/matplotlib/utils.py index 144530ab..81c60260 100644 --- a/d123/common/visualization/matplotlib/utils.py +++ b/src/py123d/visualization/matplotlib/utils.py @@ -1,4 +1,3 @@ -from functools import lru_cache from typing import Union import matplotlib.patches as patches @@ -8,8 +7,8 @@ import shapely.geometry as geom from matplotlib.path import Path -from d123.common.geometry.base import StateSE2, StateSE3 -from d123.common.visualization.color.config import PlotConfig +from py123d.geometry import StateSE2, StateSE3 +from py123d.visualization.color.config import PlotConfig def add_shapely_polygon_to_ax( @@ -35,19 +34,20 @@ def _add_element_helper(element: geom.Polygon): # Create path with exterior and interior rings def create_polygon_path(polygon): # Get exterior coordinates - exterior_coords = list(polygon.exterior.coords) + # NOTE: Only take first two dimensions in case of 3D coords + exterior_coords = np.array(polygon.exterior.coords)[:, :2].tolist() # Start with exterior ring - vertices = exterior_coords + vertices_2d = exterior_coords codes = [Path.MOVETO] + [Path.LINETO] * (len(exterior_coords) - 2) + [Path.CLOSEPOLY] # Add interior rings (holes) for interior in polygon.interiors: interior_coords = list(interior.coords) - vertices.extend(interior_coords) + vertices_2d.extend(interior_coords) codes.extend([Path.MOVETO] + [Path.LINETO] * (len(interior_coords) - 2) + [Path.CLOSEPOLY]) - return Path(vertices, codes) + return Path(vertices_2d, codes) path = create_polygon_path(element) @@ -99,7 +99,6 @@ def add_shapely_linestring_to_ax( return ax -@lru_cache(maxsize=32) def get_pose_triangle(size: float) -> geom.Polygon: """Create a triangle shape for the pose.""" half_size = size / 2 diff --git a/d123/dataset/dataset_specific/kitti_360/.gitkeep b/src/py123d/visualization/utils/.gitkeep similarity index 100% rename from d123/dataset/dataset_specific/kitti_360/.gitkeep rename to src/py123d/visualization/utils/.gitkeep diff --git a/d123/simulation/gym/environment/reward_builder/components/__init__.py b/src/py123d/visualization/viser/__init__.py similarity index 100% rename from d123/simulation/gym/environment/reward_builder/components/__init__.py rename to src/py123d/visualization/viser/__init__.py diff --git a/src/py123d/visualization/viser/elements/__init__.py b/src/py123d/visualization/viser/elements/__init__.py new file mode 100644 index 00000000..542ec2dc --- /dev/null +++ b/src/py123d/visualization/viser/elements/__init__.py @@ -0,0 +1,7 @@ +from py123d.visualization.viser.elements.detection_elements import add_box_detections_to_viser_server +from py123d.visualization.viser.elements.map_elements import add_map_to_viser_server +from py123d.visualization.viser.elements.sensor_elements import ( + add_camera_frustums_to_viser_server, + add_camera_gui_to_viser_server, + add_lidar_pc_to_viser_server, +) diff --git a/src/py123d/visualization/viser/elements/detection_elements.py b/src/py123d/visualization/viser/elements/detection_elements.py new file mode 100644 index 00000000..be08021b --- /dev/null +++ b/src/py123d/visualization/viser/elements/detection_elements.py @@ -0,0 +1,154 @@ +from typing import Optional, Union + +import numpy as np +import numpy.typing as npt +import trimesh +import viser + +from py123d.conversion.registry.box_detection_label_registry import DefaultBoxDetectionLabel +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry.geometry_index import BoundingBoxSE3Index, Corners3DIndex, StateSE3Index +from py123d.geometry.utils.bounding_box_utils import ( + bbse3_array_to_corners_array, + corners_array_to_3d_mesh, + corners_array_to_edge_lines, +) +from py123d.visualization.color.default import BOX_DETECTION_CONFIG +from py123d.visualization.viser.viser_config import ViserConfig + + +def add_box_detections_to_viser_server( + scene: AbstractScene, + scene_interation: int, + initial_ego_state: EgoStateSE3, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + box_detection_handles: Optional[Union[viser.GlbHandle, viser.LineSegmentsHandle]], +) -> None: + visible_handle_keys = [] + if viser_config.bounding_box_visible: + if viser_config.bounding_box_type == "mesh": + mesh = _get_bounding_box_meshes(scene, scene_interation, initial_ego_state) + box_detection_handles["mesh"] = viser_server.scene.add_mesh_trimesh( + "box_detections", + mesh=mesh, + visible=True, + ) + visible_handle_keys.append("mesh") + elif viser_config.bounding_box_type == "lines": + lines, colors, se3_array = _get_bounding_box_outlines(scene, scene_interation, initial_ego_state) + box_detection_handles["lines"] = viser_server.scene.add_line_segments( + "box_detections", + points=lines, + colors=colors, + line_width=viser_config.bounding_box_line_width, + visible=True, + ) + # viser_server.scene.add_batched_axes( + # "frames", + # batched_wxyzs=se3_array[:-1, StateSE3Index.QUATERNION], + # batched_positions=se3_array[:-1, StateSE3Index.XYZ], + # ) + # ego_rear_axle_se3 = scene.get_ego_state_at_iteration(scene_interation).rear_axle_se3.array + # ego_rear_axle_se3[StateSE3Index.XYZ] -= initial_ego_state.center_se3.array[StateSE3Index.XYZ] + # viser_server.scene.add_frame( + # "ego_rear_axle", + # position=ego_rear_axle_se3[StateSE3Index.XYZ], + # wxyz=ego_rear_axle_se3[StateSE3Index.QUATERNION], + # ) + visible_handle_keys.append("lines") + + else: + raise ValueError(f"Unknown bounding box type: {viser_config.bounding_box_type}") + + for key in box_detection_handles: + if key not in visible_handle_keys and box_detection_handles[key] is not None: + box_detection_handles[key].visible = False + + +def _get_bounding_box_meshes(scene: AbstractScene, iteration: int, initial_ego_state: EgoStateSE3) -> trimesh.Trimesh: + + ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) + box_detections = scene.get_box_detections_at_iteration(iteration) + + # Load boxes to visualize, including ego vehicle at the last position + boxes = [bd.bounding_box_se3 for bd in box_detections.box_detections] + [ego_vehicle_state.bounding_box_se3] + boxes_labels = [bd.metadata.default_label for bd in box_detections.box_detections] + [DefaultBoxDetectionLabel.EGO] + + # create meshes for all boxes + box_se3_array = np.array([box.array for box in boxes]) + box_se3_array[..., BoundingBoxSE3Index.XYZ] -= initial_ego_state.center_se3.array[StateSE3Index.XYZ] + box_corners_array = bbse3_array_to_corners_array(box_se3_array) + box_vertices, box_faces = corners_array_to_3d_mesh(box_corners_array) + + # Create colors for each box based on detection type + box_colors = [] + for box_lable in boxes_labels: + box_colors.append(BOX_DETECTION_CONFIG[box_lable].fill_color.rgba) + + # Convert to numpy array and repeat for each vertex + box_colors = np.array(box_colors) + vertex_colors = np.repeat(box_colors, len(Corners3DIndex), axis=0) + + # Create trimesh object + mesh = trimesh.Trimesh(vertices=box_vertices, faces=box_faces) + mesh.visual.vertex_colors = vertex_colors + + return mesh + + +# def _get_bounding_box_outlines( +# scene: AbstractScene, iteration: int, initial_ego_state: EgoStateSE3 +# ) -> npt.NDArray[np.float64]: + +# ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) +# box_detections = scene.get_box_detections_at_iteration(iteration) + +# # Load boxes to visualize, including ego vehicle at the last position +# boxes = [bd.bounding_box_se3 for bd in box_detections.box_detections] + [ego_vehicle_state.bounding_box_se3] +# boxes_type = [bd.metadata.detection_type for bd in box_detections.box_detections] + [DetectionType.EGO] + +# # Create lines for all boxes +# box_se3_array = np.array([box.array for box in boxes]) +# box_se3_array[..., BoundingBoxSE3Index.XYZ] -= initial_ego_state.center_se3.array[StateSE3Index.XYZ] +# box_corners_array = bbse3_array_to_corners_array(box_se3_array) +# box_outlines = corners_array_to_edge_lines(box_corners_array) + +# # Create colors for all boxes +# box_colors = np.zeros(box_outlines.shape, dtype=np.float32) +# for i, box_type in enumerate(boxes_type): +# box_colors[i, ...] = BOX_DETECTION_CONFIG[box_type].fill_color.rgb_norm + +# box_outlines = box_outlines.reshape(-1, *box_outlines.shape[2:]) +# box_colors = box_colors.reshape(-1, *box_colors.shape[2:]) + +# return box_outlines, box_colors + + +def _get_bounding_box_outlines( + scene: AbstractScene, iteration: int, initial_ego_state: EgoStateSE3 +) -> npt.NDArray[np.float64]: + + ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) + box_detections = scene.get_box_detections_at_iteration(iteration) + + # Load boxes to visualize, including ego vehicle at the last position + boxes = [bd.bounding_box_se3 for bd in box_detections.box_detections] + [ego_vehicle_state.bounding_box_se3] + boxes_labels = [bd.metadata.default_label for bd in box_detections.box_detections] + [DefaultBoxDetectionLabel.EGO] + + # Create lines for all boxes + box_se3_array = np.array([box.array for box in boxes]) + box_se3_array[..., BoundingBoxSE3Index.XYZ] -= initial_ego_state.center_se3.array[StateSE3Index.XYZ] + box_corners_array = bbse3_array_to_corners_array(box_se3_array) + box_outlines = corners_array_to_edge_lines(box_corners_array) + + # Create colors for all boxes + box_colors = np.zeros(box_outlines.shape, dtype=np.float32) + for i, box_label in enumerate(boxes_labels): + box_colors[i, ...] = BOX_DETECTION_CONFIG[box_label].fill_color.rgb_norm + + box_outlines = box_outlines.reshape(-1, *box_outlines.shape[2:]) + box_colors = box_colors.reshape(-1, *box_colors.shape[2:]) + + return box_outlines, box_colors, box_se3_array diff --git a/src/py123d/visualization/viser/elements/map_elements.py b/src/py123d/visualization/viser/elements/map_elements.py new file mode 100644 index 00000000..6e16726e --- /dev/null +++ b/src/py123d/visualization/viser/elements/map_elements.py @@ -0,0 +1,129 @@ +from typing import Dict, Optional + +import numpy as np +import trimesh +import viser + +from py123d.datatypes.maps.abstract_map import MapLayer +from py123d.datatypes.maps.abstract_map_objects import AbstractSurfaceMapObject +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry import Point3D, Point3DIndex +from py123d.visualization.color.default import MAP_SURFACE_CONFIG +from py123d.visualization.viser.viser_config import ViserConfig + +last_query_position: Optional[Point3D] = None + + +def add_map_to_viser_server( + scene: AbstractScene, + iteration: int, + initial_ego_state: EgoStateSE3, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + map_handles: Dict[MapLayer, viser.GlbHandle], +) -> None: + global last_query_position + + if viser_config.map_visible: + + map_trimesh_dict: Optional[Dict[MapLayer, trimesh.Trimesh]] = None + + if len(map_handles) == 0 or viser_config._force_map_update: + current_ego_state = initial_ego_state + map_trimesh_dict = _get_map_trimesh_dict( + scene, + initial_ego_state, + current_ego_state, + viser_config, + ) + last_query_position = current_ego_state.center_se3.point_3d + viser_config._force_map_update = False + + elif viser_config.map_requery: + current_ego_state = scene.get_ego_state_at_iteration(iteration) + current_position = current_ego_state.center_se3.point_3d + + if np.linalg.norm(current_position.array - last_query_position.array) > viser_config.map_radius / 2: + last_query_position = current_position + map_trimesh_dict = _get_map_trimesh_dict( + scene, + initial_ego_state, + current_ego_state, + viser_config, + ) + + if map_trimesh_dict is not None: + for map_layer, mesh in map_trimesh_dict.items(): + # if map_layer in map_handles: + # map_handles[map_layer].mesh = mesh + # else: + map_handles[map_layer] = viser_server.scene.add_mesh_trimesh( + f"/map/{map_layer.serialize()}", + mesh, + visible=True, + ) + + +def _get_map_trimesh_dict( + scene: AbstractScene, + initial_ego_state: EgoStateSE3, + current_ego_state: Optional[EgoStateSE3], + viser_config: ViserConfig, +) -> Dict[MapLayer, trimesh.Trimesh]: + + # Dictionary to hold the output trimesh meshes. + output_trimesh_dict: Dict[MapLayer, trimesh.Trimesh] = {} + + # Unpack scene center for translation of map objects. + scene_center: Point3D = initial_ego_state.center.point_3d + scene_center_array = scene_center.array + scene_query_position = current_ego_state.center.point_3d + + # Load map objects within a certain radius around the scene center. + map_layers = [ + MapLayer.LANE_GROUP, + MapLayer.INTERSECTION, + MapLayer.WALKWAY, + MapLayer.CROSSWALK, + MapLayer.CARPARK, + MapLayer.GENERIC_DRIVABLE, + ] + map_api = scene.get_map_api() + if map_api is not None: + map_objects_dict = map_api.get_proximal_map_objects( + scene_query_position.point_2d, + radius=viser_config.map_radius, + layers=map_layers, + ) + + # Create trimesh meshes for each map layer. + for map_layer in map_objects_dict.keys(): + surface_meshes = [] + for map_surface in map_objects_dict[map_layer]: + map_surface: AbstractSurfaceMapObject + + trimesh_mesh = map_surface.trimesh_mesh + trimesh_mesh.vertices -= scene_center_array + + # Adjust height of non-road surfaces to avoid z-fighting in the visualization. + if map_layer in [ + MapLayer.WALKWAY, + MapLayer.CROSSWALK, + MapLayer.CARPARK, + ]: + trimesh_mesh.vertices[..., Point3DIndex.Z] += viser_config.map_non_road_z_offset + + # If the map does not have z-values, we place the surfaces on the ground level of the ego vehicle. + if not scene.log_metadata.map_metadata.map_has_z: + trimesh_mesh.vertices[..., Point3DIndex.Z] += ( + scene_query_position.z - initial_ego_state.vehicle_parameters.height / 2 + ) + + # Color the mesh based on the map layer type. + trimesh_mesh.visual.face_colors = MAP_SURFACE_CONFIG[map_layer].fill_color.rgba + surface_meshes.append(trimesh_mesh) + + output_trimesh_dict[map_layer] = trimesh.util.concatenate(surface_meshes) + + return output_trimesh_dict diff --git a/src/py123d/visualization/viser/elements/render_elements.py b/src/py123d/visualization/viser/elements/render_elements.py new file mode 100644 index 00000000..f807033e --- /dev/null +++ b/src/py123d/visualization/viser/elements/render_elements.py @@ -0,0 +1,74 @@ +import numpy as np + +from py123d.conversion.utils.sensor_utils.camera_conventions import convert_camera_convention +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry.geometry_index import StateSE3Index +from py123d.geometry.rotation import EulerAngles +from py123d.geometry.se import StateSE3 +from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame +from py123d.geometry.vector import Vector3D + + +def get_ego_3rd_person_view_position( + scene: AbstractScene, + iteration: int, + initial_ego_state: EgoStateSE3, +) -> StateSE3: + scene_center_array = initial_ego_state.center.point_3d.array + ego_pose = scene.get_ego_state_at_iteration(iteration).rear_axle_se3.array + ego_pose[StateSE3Index.XYZ] -= scene_center_array + ego_pose_se3 = StateSE3.from_array(ego_pose) + ego_pose_se3 = translate_se3_along_body_frame(ego_pose_se3, Vector3D(-15.0, 0.0, 15)) + + # adjust the pitch to -10 degrees. + # euler_angles_array = ego_pose_se3.euler_angles.array + # euler_angles_array[1] += np.deg2rad(30) + # new_quaternion = EulerAngles.from_array(euler_angles_array).quaternion + + ego_pose_se3 = _pitch_se3_by_degrees(ego_pose_se3, 30.0) + + return convert_camera_convention( + ego_pose_se3, + from_convention="pXpZmY", + to_convention="pZmYpX", + ) + + +def get_ego_bev_view_position( + scene: AbstractScene, + iteration: int, + initial_ego_state: EgoStateSE3, +) -> StateSE3: + scene_center_array = initial_ego_state.center.point_3d.array + ego_center = scene.get_ego_state_at_iteration(iteration).center.array + ego_center[StateSE3Index.XYZ] -= scene_center_array + ego_center_planar = StateSE3.from_array(ego_center) + + planar_euler_angles = EulerAngles(0.0, 0.0, ego_center_planar.euler_angles.yaw) + quaternion = planar_euler_angles.quaternion + ego_center_planar._array[StateSE3Index.QUATERNION] = quaternion.array + + ego_center_planar = translate_se3_along_body_frame(ego_center_planar, Vector3D(0.0, 0.0, 50)) + ego_center_planar = _pitch_se3_by_degrees(ego_center_planar, 90.0) + + return convert_camera_convention( + ego_center_planar, + from_convention="pXpZmY", + to_convention="pZmYpX", + ) + + +def _pitch_se3_by_degrees(state_se3: StateSE3, degrees: float) -> StateSE3: + + quaternion = EulerAngles(0.0, np.deg2rad(degrees), state_se3.yaw).quaternion + + return StateSE3( + x=state_se3.x, + y=state_se3.y, + z=state_se3.z, + qw=quaternion.qw, + qx=quaternion.qx, + qy=quaternion.qy, + qz=quaternion.qz, + ) diff --git a/src/py123d/visualization/viser/elements/sensor_elements.py b/src/py123d/visualization/viser/elements/sensor_elements.py new file mode 100644 index 00000000..2cc6bacb --- /dev/null +++ b/src/py123d/visualization/viser/elements/sensor_elements.py @@ -0,0 +1,327 @@ +import concurrent.futures +from typing import Dict, List, Optional, Tuple + +import cv2 +import numpy as np +import numpy.typing as npt +import viser + +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraMetadata, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.geometry import StateSE3Index +from py123d.geometry.transform.transform_se3 import ( + convert_relative_to_absolute_points_3d_array, + convert_relative_to_absolute_se3_array, +) +from py123d.visualization.color.color import TAB_10 +from py123d.visualization.viser.viser_config import ViserConfig + + +def add_camera_frustums_to_viser_server( + scene: AbstractScene, + scene_interation: int, + initial_ego_state: EgoStateSE3, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + camera_frustum_handles: Dict[PinholeCameraType, viser.CameraFrustumHandle], +) -> None: + + if viser_config.camera_frustum_visible: + scene_center_array = initial_ego_state.center.point_3d.array + ego_pose = scene.get_ego_state_at_iteration(scene_interation).rear_axle_se3.array + ego_pose[StateSE3Index.XYZ] -= scene_center_array + + def _add_camera_frustums_to_viser_server(camera_type: PinholeCameraType) -> None: + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) + if camera is not None: + camera_position, camera_quaternion, camera_image = _get_camera_values( + camera, + ego_pose.copy(), + viser_config.camera_frustum_image_scale, + ) + if camera_type in camera_frustum_handles: + camera_frustum_handles[camera_type].position = camera_position + camera_frustum_handles[camera_type].wxyz = camera_quaternion + camera_frustum_handles[camera_type].image = camera_image + else: + camera_frustum_handles[camera_type] = viser_server.scene.add_camera_frustum( + f"camera_frustums/{camera_type.serialize()}", + fov=camera.metadata.fov_y, + aspect=camera.metadata.aspect_ratio, + scale=viser_config.camera_frustum_scale, + image=camera_image, + position=camera_position, + wxyz=camera_quaternion, + ) + + return None + + # NOTE; In order to speed up adding camera frustums, we use multithreading and resize the images. + with concurrent.futures.ThreadPoolExecutor(max_workers=len(viser_config.camera_frustum_types)) as executor: + future_to_camera = { + executor.submit(_add_camera_frustums_to_viser_server, camera_type): camera_type + for camera_type in viser_config.camera_frustum_types + } + for future in concurrent.futures.as_completed(future_to_camera): + _ = future.result() + + # TODO: Remove serial implementation, if not needed anymore. + # for camera_type in viser_config.camera_frustum_types: + # _add_camera_frustums_to_viser_server(camera_type) + + return None + + +def add_fisheye_frustums_to_viser_server( + scene: AbstractScene, + scene_interation: int, + initial_ego_state: EgoStateSE3, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + fisheye_frustum_handles: Dict[FisheyeMEICameraType, viser.CameraFrustumHandle], +) -> None: + if viser_config.fisheye_frustum_visible: + scene_center_array = initial_ego_state.center.point_3d.array + ego_pose = scene.get_ego_state_at_iteration(scene_interation).rear_axle_se3.array + ego_pose[StateSE3Index.XYZ] -= scene_center_array + + def _add_fisheye_frustums_to_viser_server(fisheye_camera_type: FisheyeMEICameraType) -> None: + camera = scene.get_fisheye_mei_camera_at_iteration(scene_interation, fisheye_camera_type) + if camera is not None: + fcam_position, fcam_quaternion, fcam_image = _get_fisheye_camera_values( + camera, + ego_pose.copy(), + viser_config.fisheye_frustum_image_scale, + ) + if fisheye_camera_type in fisheye_frustum_handles: + fisheye_frustum_handles[fisheye_camera_type].position = fcam_position + fisheye_frustum_handles[fisheye_camera_type].wxyz = fcam_quaternion + fisheye_frustum_handles[fisheye_camera_type].image = fcam_image + else: + # NOTE @DanielDauner: The FOV is just taking as a static value here. + # The function se + fisheye_frustum_handles[fisheye_camera_type] = viser_server.scene.add_camera_frustum( + f"camera_frustums/{fisheye_camera_type.serialize()}", + fov=185, # vertical fov + aspect=camera.metadata.aspect_ratio, + scale=viser_config.fisheye_frustum_scale, + image=fcam_image, + position=fcam_position, + wxyz=fcam_quaternion, + ) + + return None + + # NOTE; In order to speed up adding camera frustums, we use multithreading and resize the images. + with concurrent.futures.ThreadPoolExecutor( + max_workers=len(viser_config.fisheye_mei_camera_frustum_types) + ) as executor: + future_to_camera = { + executor.submit(_add_fisheye_frustums_to_viser_server, fcam_type): fcam_type + for fcam_type in viser_config.fisheye_mei_camera_frustum_types + } + for future in concurrent.futures.as_completed(future_to_camera): + _ = future.result() + + return None + + +def add_camera_gui_to_viser_server( + scene: AbstractScene, + scene_interation: int, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + camera_gui_handles: Dict[PinholeCameraType, viser.GuiImageHandle], +) -> None: + if viser_config.camera_gui_visible: + for camera_type in viser_config.camera_gui_types: + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) + if camera is not None: + if camera_type in camera_gui_handles: + camera_gui_handles[camera_type].image = _rescale_image( + camera.image, viser_config.camera_gui_image_scale + ) + else: + with viser_server.gui.add_folder(f"Camera {camera_type.serialize()}"): + camera_gui_handles[camera_type] = viser_server.gui.add_image( + image=_rescale_image(camera.image, viser_config.camera_gui_image_scale), + label=camera_type.serialize(), + ) + + +def add_lidar_pc_to_viser_server( + scene: AbstractScene, + scene_interation: int, + initial_ego_state: EgoStateSE3, + viser_server: viser.ViserServer, + viser_config: ViserConfig, + lidar_pc_handles: Dict[LiDARType, Optional[viser.PointCloudHandle]], +) -> None: + if viser_config.lidar_visible: + + scene_center_array = initial_ego_state.center.point_3d.array + ego_pose = scene.get_ego_state_at_iteration(scene_interation).rear_axle_se3.array + ego_pose[StateSE3Index.XYZ] -= scene_center_array + + def _load_lidar_points(lidar_type: LiDARType) -> npt.NDArray[np.float32]: + lidar = scene.get_lidar_at_iteration(scene_interation, lidar_type) + if lidar is not None: + return lidar.xyz + else: + return np.zeros((0, 3), dtype=np.float32) + + with concurrent.futures.ThreadPoolExecutor(max_workers=len(viser_config.lidar_types)) as executor: + future_to_lidar = { + executor.submit(_load_lidar_points, lidar_type): lidar_type for lidar_type in viser_config.lidar_types + } + lidar_points_3d_list: List[npt.NDArray[np.float32]] = [] + for future in concurrent.futures.as_completed(future_to_lidar): + lidar_points_3d_list.append(future.result()) + + points_3d_local = ( + np.concatenate(lidar_points_3d_list, axis=0) if lidar_points_3d_list else np.zeros((0, 3), dtype=np.float32) + ) + + colors = [] + for idx, points in enumerate(lidar_points_3d_list): + color = np.array(TAB_10[idx % len(TAB_10)].rgb, dtype=np.uint8) + colors.append(np.tile(color, (points.shape[0], 1))) + colors = np.vstack(colors) if colors else np.zeros((0, 3), dtype=np.uint8) + + points = convert_relative_to_absolute_points_3d_array(ego_pose, points_3d_local) + colors = np.zeros_like(points) + + # # TODO: remove: + # lidar = scene.get_lidar_at_iteration(scene_interation, LiDARType.LIDAR_TOP) + # lidar_extrinsic = convert_relative_to_absolute_se3_array( + # origin=ego_pose, se3_array=lidar.metadata.extrinsic.array + # ) + + # viser_server.scene.add_frame( + # "lidar_frame", + # position=lidar_extrinsic[StateSE3Index.XYZ], + # wxyz=lidar_extrinsic[StateSE3Index.QUATERNION], + # ) + + if lidar_pc_handles[LiDARType.LIDAR_MERGED] is not None: + lidar_pc_handles[LiDARType.LIDAR_MERGED].points = points + lidar_pc_handles[LiDARType.LIDAR_MERGED].colors = colors + else: + lidar_pc_handles[LiDARType.LIDAR_MERGED] = viser_server.scene.add_point_cloud( + "lidar_points", + points=points, + colors=colors, + point_size=viser_config.lidar_point_size, + point_shape=viser_config.lidar_point_shape, + ) + elif lidar_pc_handles[LiDARType.LIDAR_MERGED] is not None: + lidar_pc_handles[LiDARType.LIDAR_MERGED].visible = False + + +def _get_camera_values( + camera: PinholeCamera, + ego_pose: npt.NDArray[np.float64], + resize_factor: Optional[float] = None, +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + assert ego_pose.ndim == 1 and len(ego_pose) == len(StateSE3Index) + + rel_camera_pose = camera.extrinsic.array + abs_camera_pose = convert_relative_to_absolute_se3_array(origin=ego_pose, se3_array=rel_camera_pose) + + camera_position = abs_camera_pose[StateSE3Index.XYZ] + camera_rotation = abs_camera_pose[StateSE3Index.QUATERNION] + + camera_image = _rescale_image(camera.image, resize_factor) + return camera_position, camera_rotation, camera_image + + +def _get_fisheye_camera_values( + camera: FisheyeMEICamera, + ego_pose: npt.NDArray[np.float64], + resize_factor: Optional[float] = None, +) -> Tuple[npt.NDArray[np.float64], npt.NDArray[np.float64], npt.NDArray[np.uint8]]: + assert ego_pose.ndim == 1 and len(ego_pose) == len(StateSE3Index) + + rel_camera_pose = camera.extrinsic.array + abs_camera_pose = convert_relative_to_absolute_se3_array(origin=ego_pose, se3_array=rel_camera_pose) + + camera_position = abs_camera_pose[StateSE3Index.XYZ] + camera_rotation = abs_camera_pose[StateSE3Index.QUATERNION] + + camera_image = _rescale_image(camera.image, resize_factor) + return camera_position, camera_rotation, camera_image + + +def _rescale_image(image: npt.NDArray[np.uint8], scale: float) -> npt.NDArray[np.uint8]: + if scale == 1.0: + return image + new_width = int(image.shape[1] * scale) + new_height = int(image.shape[0] * scale) + downscaled_image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR) + return downscaled_image + + +import numpy as np + + +def calculate_fov(metadata: FisheyeMEICameraMetadata) -> tuple[float, float]: + """ + Calculate horizontal and vertical FOV in degrees. + + Returns: + (horizontal_fov, vertical_fov) in degrees + """ + xi = metadata.mirror_parameter + gamma1 = metadata.projection.gamma1 + gamma2 = metadata.projection.gamma2 + u0 = metadata.projection.u0 + v0 = metadata.projection.v0 + + width = metadata.width + height = metadata.height + + # Calculate corner positions (furthest from principal point) + corners = np.array([[0, 0], [width, 0], [0, height], [width, height]]) + + # Convert to normalized coordinates + x_norm = (corners[:, 0] - u0) / gamma1 + y_norm = (corners[:, 1] - v0) / gamma2 + + # For MEI model, inverse projection (ignoring distortion for FOV estimate): + # r² = x² + y² + # θ = arctan(r / (1 - ξ·√(1 + r²))) + + r_squared = x_norm**2 + y_norm**2 + r = np.sqrt(r_squared) + + # Calculate incident angle for each corner + # From MEI model: r = (X/Z_s) where Z_s = Z + ξ·√(X² + Y² + Z²) + # This gives: θ = arctan(r·√(1 + (1-ξ²)r²) / (1 - ξ²·r²)) + # Simplified approximation: + + if xi < 1e-6: # Perspective camera + theta = np.arctan(r) + else: + # For small angles or as approximation + denominator = 1 - xi * np.sqrt(1 + r_squared) + theta = np.arctan2(r, denominator) + + np.max(np.abs(theta)) + + # Calculate horizontal and vertical FOV separately + x_max = np.max(np.abs(x_norm)) + y_max = np.max(np.abs(y_norm)) + + if xi < 1e-6: + h_fov = 2 * np.arctan(x_max) + v_fov = 2 * np.arctan(y_max) + else: + denom_h = 1 - xi * np.sqrt(1 + x_max**2) + denom_v = 1 - xi * np.sqrt(1 + y_max**2) + h_fov = 2 * np.arctan2(x_max, denom_h) + v_fov = 2 * np.arctan2(y_max, denom_v) + + return h_fov, v_fov diff --git a/src/py123d/visualization/viser/viser_config.py b/src/py123d/visualization/viser/viser_config.py new file mode 100644 index 00000000..be22f7f5 --- /dev/null +++ b/src/py123d/visualization/viser/viser_config.py @@ -0,0 +1,122 @@ +from dataclasses import dataclass, field +from typing import List, Literal, Optional, Tuple, Union + +from py123d.common.utils.enums import SerialIntEnum +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType +from py123d.visualization.color.color import ELLIS_5 + +all_camera_types: List[PinholeCameraType] = [ + PinholeCameraType.PCAM_F0, + PinholeCameraType.PCAM_B0, + PinholeCameraType.PCAM_L0, + PinholeCameraType.PCAM_L1, + PinholeCameraType.PCAM_L2, + PinholeCameraType.PCAM_R0, + PinholeCameraType.PCAM_R1, + PinholeCameraType.PCAM_R2, + PinholeCameraType.PCAM_STEREO_L, + PinholeCameraType.PCAM_STEREO_R, +] + +all_lidar_types: List[LiDARType] = [ + LiDARType.LIDAR_MERGED, + LiDARType.LIDAR_TOP, + LiDARType.LIDAR_FRONT, + LiDARType.LIDAR_SIDE_LEFT, + LiDARType.LIDAR_SIDE_RIGHT, + LiDARType.LIDAR_BACK, + LiDARType.LIDAR_DOWN, +] + + +@dataclass +class ViserConfig: + + # Server + server_host: str = "localhost" + server_port: int = 8080 + server_label: str = "123D Viser Server" + server_verbose: bool = True + + # Theme + theme_control_layout: Literal["floating", "collapsible", "fixed"] = "floating" + theme_control_width: Literal["small", "medium", "large"] = "large" + theme_dark_mode: bool = False + theme_show_logo: bool = True + theme_show_share_button: bool = True + theme_brand_color: Optional[Tuple[int, int, int]] = ELLIS_5[4].rgb + + # Play Controls + is_playing: bool = False + playback_speed: float = 1.0 # Multiplier for real-time speed + + # Map + map_visible: bool = True + map_radius: float = 200.0 # [m] + map_non_road_z_offset: float = 0.1 # small z-translation to place crosswalks, parking, etc. on top of the road + map_requery: bool = True # Re-query map when ego vehicle moves out of current map bounds + + # Bounding boxes + bounding_box_visible: bool = True + bounding_box_type: Literal["mesh", "lines"] = "mesh" + bounding_box_line_width: float = 4.0 + + # Pinhole Cameras + # -> Frustum + camera_frustum_visible: bool = True + camera_frustum_types: List[PinholeCameraType] = field(default_factory=lambda: all_camera_types.copy()) + camera_frustum_scale: float = 1.0 + camera_frustum_image_scale: float = 0.25 # Resize factor for the camera image shown on the frustum (<1.0 for speed) + + # -> GUI + camera_gui_visible: bool = True + camera_gui_types: List[PinholeCameraType] = field(default_factory=lambda: [PinholeCameraType.PCAM_F0].copy()) + camera_gui_image_scale: float = 0.25 # Resize factor for the camera image shown in the GUI (<1.0 for speed) + + # Fisheye MEI Cameras + # -> Frustum + fisheye_frustum_visible: bool = True + fisheye_mei_camera_frustum_visible: bool = True + fisheye_mei_camera_frustum_types: List[PinholeCameraType] = field( + default_factory=lambda: [fcam for fcam in FisheyeMEICameraType] + ) + fisheye_frustum_scale: float = 1.0 + fisheye_frustum_image_scale: float = 0.25 # Resize factor for the camera image shown on the frustum + + # LiDAR + lidar_visible: bool = True + lidar_types: List[LiDARType] = field(default_factory=lambda: all_lidar_types.copy()) + lidar_point_size: float = 0.05 + lidar_point_shape: Literal["square", "diamond", "circle", "rounded", "sparkle"] = "circle" + + # internal use + _force_map_update: bool = False + + def __post_init__(self): + def _resolve_enum_arguments( + serial_enum_cls: SerialIntEnum, input: Optional[List[Union[int, str, SerialIntEnum]]] + ) -> List[SerialIntEnum]: + + if input is None: + return None + assert isinstance(input, list), f"input must be a list of {serial_enum_cls.__name__}" + return [serial_enum_cls.from_arbitrary(value) for value in input] + + self.camera_frustum_types = _resolve_enum_arguments( + PinholeCameraType, + self.camera_frustum_types, + ) + self.camera_gui_types = _resolve_enum_arguments( + FisheyeMEICameraType, + self.camera_gui_types, + ) + self.fisheye_mei_camera_frustum_types = _resolve_enum_arguments( + FisheyeMEICameraType, + self.fisheye_mei_camera_frustum_types, + ) + self.lidar_types = _resolve_enum_arguments( + LiDARType, + self.lidar_types, + ) diff --git a/src/py123d/visualization/viser/viser_viewer.py b/src/py123d/visualization/viser/viser_viewer.py new file mode 100644 index 00000000..1fb5559e --- /dev/null +++ b/src/py123d/visualization/viser/viser_viewer.py @@ -0,0 +1,399 @@ +import io +import logging +import time +from typing import Dict, List, Optional, Union + +import imageio.v3 as iio +import viser +from tqdm import tqdm +from viser.theme import TitlebarButton, TitlebarConfig, TitlebarImage + +from py123d.datatypes.maps.map_datatypes import MapLayer +from py123d.datatypes.scene.abstract_scene import AbstractScene +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType +from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 +from py123d.visualization.viser.elements import ( + add_box_detections_to_viser_server, + add_camera_frustums_to_viser_server, + add_camera_gui_to_viser_server, + add_lidar_pc_to_viser_server, + add_map_to_viser_server, +) +from py123d.visualization.viser.elements.render_elements import ( + get_ego_3rd_person_view_position, + get_ego_bev_view_position, +) +from py123d.visualization.viser.elements.sensor_elements import add_fisheye_frustums_to_viser_server +from py123d.visualization.viser.viser_config import ViserConfig + +logger = logging.getLogger(__name__) + + +def _build_viser_server(viser_config: ViserConfig) -> viser.ViserServer: + server = viser.ViserServer( + host=viser_config.server_host, + port=viser_config.server_port, + label=viser_config.server_label, + verbose=viser_config.server_verbose, + ) + + buttons = ( + TitlebarButton( + text="Getting Started", + icon=None, + href="https://danieldauner.github.io/py123d", + ), + TitlebarButton( + text="Github", + icon="GitHub", + href="https://github.com/DanielDauner/py123d", + ), + TitlebarButton( + text="Documentation", + icon="Description", + href="https://danieldauner.github.io/py123d", + ), + ) + image = TitlebarImage( + image_url_light="https://danieldauner.github.io/py123d/_static/logo_black.png", + image_url_dark="https://danieldauner.github.io/py123d/_static/logo_white.png", + image_alt="123D", + href="https://danieldauner.github.io/py123d/", + ) + titlebar_theme = TitlebarConfig(buttons=buttons, image=image) + + server.gui.configure_theme( + titlebar_content=titlebar_theme, + control_layout=viser_config.theme_control_layout, + control_width=viser_config.theme_control_width, + dark_mode=viser_config.theme_dark_mode, + show_logo=viser_config.theme_show_logo, + show_share_button=viser_config.theme_show_share_button, + brand_color=viser_config.theme_brand_color, + ) + return server + + +class ViserViewer: + def __init__( + self, + scenes: List[AbstractScene], + viser_config: ViserConfig = ViserConfig(), + scene_index: int = 0, + ) -> None: + assert len(scenes) > 0, "At least one scene must be provided." + + self._scenes = scenes + self._viser_config = viser_config + self._scene_index = scene_index + + self._viser_server = _build_viser_server(self._viser_config) + self.set_scene(self._scenes[self._scene_index % len(self._scenes)]) + + def next(self) -> None: + self._viser_server.flush() + self._viser_server.gui.reset() + self._viser_server.scene.reset() + self._scene_index = (self._scene_index + 1) % len(self._scenes) + self.set_scene(self._scenes[self._scene_index]) + + def set_scene(self, scene: AbstractScene) -> None: + num_frames = scene.number_of_iterations + initial_ego_state: EgoStateSE3 = scene.get_ego_state_at_iteration(0) + server_playing = True + server_rendering = False + + with self._viser_server.gui.add_folder("Playback"): + gui_info = self._viser_server.gui.add_markdown(content=_get_scene_info_markdown(scene)) + + gui_timestep = self._viser_server.gui.add_slider( + "Timestep", + min=0, + max=num_frames - 1, + step=1, + initial_value=0, + disabled=True, + ) + gui_next_frame = self._viser_server.gui.add_button("Next Frame", disabled=True) + gui_prev_frame = self._viser_server.gui.add_button("Prev Frame", disabled=True) + gui_next_scene = self._viser_server.gui.add_button("Next Scene", disabled=False) + gui_playing = self._viser_server.gui.add_checkbox("Playing", self._viser_config.is_playing) + gui_speed = self._viser_server.gui.add_slider( + "Playback speed", min=0.1, max=10.0, step=0.1, initial_value=self._viser_config.playback_speed + ) + gui_speed_options = self._viser_server.gui.add_button_group( + "Options.", ("0.5", "1.0", "2.0", "5.0", "10.0") + ) + + with self._viser_server.gui.add_folder("Modalities", expand_by_default=True): + modalities_map_visible = self._viser_server.gui.add_checkbox("Map", self._viser_config.map_visible) + modalities_bounding_box_visible = self._viser_server.gui.add_checkbox( + "Bounding Boxes", self._viser_config.bounding_box_visible + ) + modalities_camera_frustum_visible = self._viser_server.gui.add_checkbox( + "Camera Frustums", self._viser_config.camera_frustum_visible + ) + modalities_lidar_visible = self._viser_server.gui.add_checkbox("Lidar", self._viser_config.lidar_visible) + + with self._viser_server.gui.add_folder("Options", expand_by_default=True): + option_bounding_box_type = self._viser_server.gui.add_dropdown( + "Bounding Box Type", ("mesh", "lines"), initial_value=self._viser_config.bounding_box_type + ) + options_map_radius_slider = self._viser_server.gui.add_slider( + "Map Radius", min=10.0, max=1000.0, step=1.0, initial_value=self._viser_config.map_radius + ) + options_map_radius_options = self._viser_server.gui.add_button_group( + "Map Radius Options.", ("25", "50", "100", "500") + ) + + with self._viser_server.gui.add_folder("Render", expand_by_default=False): + render_format = self._viser_server.gui.add_dropdown("Format", ["gif", "mp4"], initial_value="mp4") + render_view = self._viser_server.gui.add_dropdown( + "View", ["3rd Person", "BEV", "Manual"], initial_value="3rd Person" + ) + render_button = self._viser_server.gui.add_button("Render Scene") + + # Options: + @modalities_map_visible.on_update + def _(_) -> None: + for map_handle in map_handles.values(): + map_handle.visible = modalities_map_visible.value + self._viser_config.map_visible = modalities_map_visible.value + + @modalities_bounding_box_visible.on_update + def _(_) -> None: + if box_detection_handles["lines"] is not None: + box_detection_handles["lines"].visible = modalities_bounding_box_visible.value + if box_detection_handles["mesh"] is not None: + box_detection_handles["mesh"].visible = modalities_bounding_box_visible.value + self._viser_config.bounding_box_visible = modalities_bounding_box_visible.value + + @modalities_camera_frustum_visible.on_update + def _(_) -> None: + for frustum_handle in camera_frustum_handles.values(): + frustum_handle.visible = modalities_camera_frustum_visible.value + self._viser_config.camera_frustum_visible = modalities_camera_frustum_visible.value + + @modalities_lidar_visible.on_update + def _(_) -> None: + for lidar_pc_handle in lidar_pc_handles.values(): + if lidar_pc_handle is not None: + lidar_pc_handle.visible = modalities_lidar_visible.value + self._viser_config.lidar_visible = modalities_lidar_visible.value + + @option_bounding_box_type.on_update + def _(_) -> None: + self._viser_config.bounding_box_type = option_bounding_box_type.value + + @options_map_radius_slider.on_update + def _(_) -> None: + self._viser_config.map_radius = options_map_radius_slider.value + self._viser_config._force_map_update = True + + @options_map_radius_options.on_click + def _(_) -> None: + options_map_radius_slider.value = float(options_map_radius_options.value) + self._viser_config._force_map_update = True + + # Frame step buttons. + @gui_next_frame.on_click + def _(_) -> None: + gui_timestep.value = (gui_timestep.value + 1) % num_frames + + @gui_prev_frame.on_click + def _(_) -> None: + gui_timestep.value = (gui_timestep.value - 1) % num_frames + + @gui_next_scene.on_click + def _(_) -> None: + nonlocal server_playing + server_playing = False + + # Disable frame controls when we're playing. + @gui_playing.on_update + def _(_) -> None: + gui_timestep.disabled = gui_playing.value + gui_next_frame.disabled = gui_playing.value + gui_prev_frame.disabled = gui_playing.value + self._viser_config.is_playing = gui_playing.value + + # Set the framerate when we click one of the options. + @gui_speed_options.on_click + def _(_) -> None: + gui_speed.value = float(gui_speed_options.value) + + # Toggle frame visibility when the timestep slider changes. + @gui_timestep.on_update + def _(_) -> None: + start = time.perf_counter() + add_box_detections_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + box_detection_handles, + ) + add_camera_frustums_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + camera_frustum_handles, + ) + add_camera_gui_to_viser_server( + scene, + gui_timestep.value, + self._viser_server, + self._viser_config, + camera_gui_handles, + ) + add_fisheye_frustums_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + fisheye_frustum_handles, + ) + add_lidar_pc_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + lidar_pc_handles, + ) + add_map_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + map_handles, + ) + rendering_time = time.perf_counter() - start + + sleep_time = 1.0 / gui_speed.value - rendering_time + + # Calculate sleep time based on speed factor + base_frame_time = scene.log_metadata.timestep_seconds + target_frame_time = base_frame_time / gui_speed.value + sleep_time = target_frame_time - rendering_time + + if sleep_time > 0 and not server_rendering: + time.sleep(max(sleep_time, 0.0)) + + @render_button.on_click + def _(event: viser.GuiEvent) -> None: + nonlocal server_rendering + client = event.client + assert client is not None + + client.scene.reset() + + server_rendering = True + images = [] + + for i in tqdm(range(scene.number_of_iterations)): + gui_timestep.value = i + if render_view.value == "BEV": + ego_view = get_ego_bev_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + elif render_view.value == "3rd Person": + ego_view = get_ego_3rd_person_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + images.append(client.get_render(height=1080, width=1920)) + format = render_format.value + buffer = io.BytesIO() + if format == "gif": + iio.imwrite(buffer, images, extension=".gif", loop=False) + elif format == "mp4": + iio.imwrite(buffer, images, extension=".mp4", fps=20) + content = buffer.getvalue() + scene_name = f"{scene.log_metadata.split}_{scene.uuid}" + client.send_file_download(f"{scene_name}.{format}", content, save_immediately=True) + server_rendering = False + + box_detection_handles: Dict[str, Union[viser.GlbHandle, viser.LineSegmentsHandle]] = { + "mesh": None, + "lines": None, + } + camera_frustum_handles: Dict[PinholeCameraType, viser.CameraFrustumHandle] = {} + fisheye_frustum_handles: Dict[FisheyeMEICameraType, viser.CameraFrustumHandle] = {} + camera_gui_handles: Dict[PinholeCameraType, viser.GuiImageHandle] = {} + lidar_pc_handles: Dict[LiDARType, Optional[viser.PointCloudHandle]] = {LiDARType.LIDAR_MERGED: None} + map_handles: Dict[MapLayer, viser.MeshHandle] = {} + + add_box_detections_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + box_detection_handles, + ) + add_camera_frustums_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + camera_frustum_handles, + ) + add_camera_gui_to_viser_server( + scene, + gui_timestep.value, + self._viser_server, + self._viser_config, + camera_gui_handles, + ) + add_fisheye_frustums_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + fisheye_frustum_handles, + ) + add_lidar_pc_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + lidar_pc_handles, + ) + add_map_to_viser_server( + scene, + gui_timestep.value, + initial_ego_state, + self._viser_server, + self._viser_config, + map_handles, + ) + + # Playback update loop. + while server_playing: + if gui_playing.value and not server_rendering: + gui_timestep.value = (gui_timestep.value + 1) % num_frames + + # update config + self._viser_config.playback_speed = gui_speed.value + + self._viser_server.flush() + self.next() + + +def _get_scene_info_markdown(scene: AbstractScene) -> str: + markdown = f""" + - Dataset: {scene.log_metadata.split} + - Location: {scene.log_metadata.location if scene.log_metadata.location else 'N/A'} + - UUID: {scene.uuid} + """ + return markdown diff --git a/d123/simulation/gym/environment/scenario_sampler/__init__.py b/tests/unit/__init__.py similarity index 100% rename from d123/simulation/gym/environment/scenario_sampler/__init__.py rename to tests/unit/__init__.py diff --git a/d123/simulation/gym/environment/simulation_builder/__init__.py b/tests/unit/common/__init__.py similarity index 100% rename from d123/simulation/gym/environment/simulation_builder/__init__.py rename to tests/unit/common/__init__.py diff --git a/d123/simulation/gym/policy/__init__.py b/tests/unit/common/multithreading/__init__.py similarity index 100% rename from d123/simulation/gym/policy/__init__.py rename to tests/unit/common/multithreading/__init__.py diff --git a/d123/simulation/gym/policy/ppo/__init__.py b/tests/unit/common/utils/__init__.py similarity index 100% rename from d123/simulation/gym/policy/ppo/__init__.py rename to tests/unit/common/utils/__init__.py diff --git a/d123/simulation/metrics/__init__.py b/tests/unit/conversion/__init__.py similarity index 100% rename from d123/simulation/metrics/__init__.py rename to tests/unit/conversion/__init__.py diff --git a/d123/dataset/dataset_specific/nuscenes/.gitkeep b/tests/unit/datatypes/.gitkeep similarity index 100% rename from d123/dataset/dataset_specific/nuscenes/.gitkeep rename to tests/unit/datatypes/.gitkeep diff --git a/d123/simulation/metrics/sim_agents/__init__.py b/tests/unit/datatypes/__init__.py similarity index 100% rename from d123/simulation/metrics/sim_agents/__init__.py rename to tests/unit/datatypes/__init__.py diff --git a/d123/simulation/observation/__init__.py b/tests/unit/datatypes/detections/__init__.py similarity index 100% rename from d123/simulation/observation/__init__.py rename to tests/unit/datatypes/detections/__init__.py diff --git a/d123/simulation/planning/__init__.py b/tests/unit/datatypes/maps/__init__.py similarity index 100% rename from d123/simulation/planning/__init__.py rename to tests/unit/datatypes/maps/__init__.py diff --git a/d123/simulation/planning/planner_output/__init__.py b/tests/unit/datatypes/sensors/__init__.py similarity index 100% rename from d123/simulation/planning/planner_output/__init__.py rename to tests/unit/datatypes/sensors/__init__.py diff --git a/d123/simulation/time_controller/__init__.py b/tests/unit/datatypes/time/__init__.py similarity index 100% rename from d123/simulation/time_controller/__init__.py rename to tests/unit/datatypes/time/__init__.py diff --git a/d123/training/__init__.py b/tests/unit/datatypes/vehicle_state/__init__.py similarity index 100% rename from d123/training/__init__.py rename to tests/unit/datatypes/vehicle_state/__init__.py diff --git a/d123/training/models/__init__.py b/tests/unit/geometry/__init__.py similarity index 100% rename from d123/training/models/__init__.py rename to tests/unit/geometry/__init__.py diff --git a/tests/unit/geometry/test_bounding_box.py b/tests/unit/geometry/test_bounding_box.py new file mode 100644 index 00000000..34c24fc6 --- /dev/null +++ b/tests/unit/geometry/test_bounding_box.py @@ -0,0 +1,220 @@ +import unittest + +import numpy as np +import shapely.geometry as geom + +from py123d.common.utils.mixin import ArrayMixin +from py123d.geometry import BoundingBoxSE2, BoundingBoxSE3, Point2D, Point3D, StateSE2, StateSE3 +from py123d.geometry.geometry_index import ( + BoundingBoxSE2Index, + BoundingBoxSE3Index, + Corners2DIndex, + Corners3DIndex, + Point2DIndex, +) + + +class TestBoundingBoxSE2(unittest.TestCase): + """Unit tests for BoundingBoxSE2 class.""" + + def setUp(self): + """Set up test fixtures.""" + self.center = StateSE2(1.0, 2.0, 0.5) + self.length = 4.0 + self.width = 2.0 + self.bbox = BoundingBoxSE2(self.center, self.length, self.width) + + def test_init(self): + """Test BoundingBoxSE2 initialization.""" + bbox = BoundingBoxSE2(self.center, self.length, self.width) + self.assertEqual(bbox.length, self.length) + self.assertEqual(bbox.width, self.width) + np.testing.assert_array_equal(bbox.center.array, self.center.array) + + def test_from_array(self): + """Test BoundingBoxSE2.from_array method.""" + array = np.array([1.0, 2.0, 0.5, 4.0, 2.0]) + bbox = BoundingBoxSE2.from_array(array) + np.testing.assert_array_equal(bbox.array, array) + + def test_from_array_copy(self): + """Test BoundingBoxSE2.from_array with copy parameter.""" + array = np.array([1.0, 2.0, 0.5, 4.0, 2.0]) + bbox_copy = BoundingBoxSE2.from_array(array, copy=True) + bbox_no_copy = BoundingBoxSE2.from_array(array, copy=False) + + array[0] = 999.0 + self.assertNotEqual(bbox_copy.array[0], 999.0) + self.assertEqual(bbox_no_copy.array[0], 999.0) + + def test_properties(self): + """Test BoundingBoxSE2 properties.""" + self.assertEqual(self.bbox.length, self.length) + self.assertEqual(self.bbox.width, self.width) + np.testing.assert_array_equal(self.bbox.center.array, self.center.array) + np.testing.assert_array_equal(self.bbox.center_se2.array, self.center.array) + + def test_array_property(self): + """Test array property.""" + expected = np.array([1.0, 2.0, 0.5, 4.0, 2.0]) + np.testing.assert_array_equal(self.bbox.array, expected) + + def test_array_mixin(self): + """Test that BoundingBoxSE2 is an instance of ArrayMixin.""" + self.assertIsInstance(self.bbox, ArrayMixin) + + expected = np.array([1.0, 2.0, 0.5, 4.0, 2.0], dtype=np.float16) + output_array = np.array(self.bbox, dtype=np.float16) + np.testing.assert_array_equal(output_array, expected) + self.assertEqual(output_array.dtype, np.float16) + self.assertEqual(output_array.shape, (len(BoundingBoxSE2Index),)) + + def test_bounding_box_se2_property(self): + """Test bounding_box_se2 property returns self.""" + self.assertIs(self.bbox.bounding_box_se2, self.bbox) + + def test_corners_array(self): + """Test corners_array property.""" + corners = self.bbox.corners_array + self.assertEqual(corners.shape, (len(Corners2DIndex), len(Point2DIndex))) + self.assertIsInstance(corners, np.ndarray) + + def test_corners_dict(self): + """Test corners_dict property.""" + corners_dict = self.bbox.corners_dict + self.assertEqual(len(corners_dict), len(Corners2DIndex)) + for index in Corners2DIndex: + self.assertIn(index, corners_dict) + self.assertIsInstance(corners_dict[index], Point2D) + + def test_shapely_polygon(self): + """Test shapely_polygon property.""" + polygon = self.bbox.shapely_polygon + self.assertIsInstance(polygon, geom.Polygon) + self.assertAlmostEqual(polygon.area, self.length * self.width) + + def test_array_assertions(self): + """Test array assertions in from_array.""" + # Test 2D array + with self.assertRaises(AssertionError): + BoundingBoxSE2.from_array(np.array([[1, 2, 3, 4, 5]])) + + # Test wrong size + with self.assertRaises(AssertionError): + BoundingBoxSE2.from_array(np.array([1, 2, 3, 4])) + + +class TestBoundingBoxSE3(unittest.TestCase): + """Unit tests for BoundingBoxSE3 class.""" + + def setUp(self): + """Set up test fixtures.""" + self.array = np.array([1.0, 2.0, 3.0, 0.98185617, 0.06407135, 0.09115755, 0.1534393, 4.0, 2.0, 1.5]) + self.center = StateSE3(1.0, 2.0, 3.0, 0.98185617, 0.06407135, 0.09115755, 0.1534393) + self.length = 4.0 + self.width = 2.0 + self.height = 1.5 + self.bbox = BoundingBoxSE3(self.center, self.length, self.width, self.height) + + def test_init(self): + """Test BoundingBoxSE3 initialization.""" + bbox = BoundingBoxSE3(self.center, self.length, self.width, self.height) + self.assertEqual(bbox.length, self.length) + self.assertEqual(bbox.width, self.width) + self.assertEqual(bbox.height, self.height) + np.testing.assert_array_equal(bbox.center.array, self.center.array) + + def test_from_array(self): + """Test BoundingBoxSE3.from_array method.""" + array = self.array.copy() + bbox = BoundingBoxSE3.from_array(array) + np.testing.assert_array_equal(bbox.array, array) + + def test_from_array_copy(self): + """Test BoundingBoxSE3.from_array with copy parameter.""" + array = self.array.copy() + bbox_copy = BoundingBoxSE3.from_array(array, copy=True) + bbox_no_copy = BoundingBoxSE3.from_array(array, copy=False) + + array[0] = 999.0 + self.assertNotEqual(bbox_copy.array[0], 999.0) + self.assertEqual(bbox_no_copy.array[0], 999.0) + + def test_properties(self): + """Test BoundingBoxSE3 properties.""" + self.assertEqual(self.bbox.length, self.length) + self.assertEqual(self.bbox.width, self.width) + self.assertEqual(self.bbox.height, self.height) + np.testing.assert_array_equal(self.bbox.center.array, self.center.array) + np.testing.assert_array_equal(self.bbox.center_se3.array, self.center.array) + + def test_array_property(self): + """Test array property.""" + expected = self.array.copy() + np.testing.assert_array_equal(self.bbox.array, expected) + + def test_array_mixin(self): + """Test that BoundingBoxSE3 is an instance of ArrayMixin.""" + self.assertIsInstance(self.bbox, ArrayMixin) + + expected = np.array(self.array, dtype=np.float16) + output_array = np.array(self.bbox, dtype=np.float16) + np.testing.assert_array_equal(output_array, expected) + self.assertEqual(output_array.dtype, np.float16) + self.assertEqual(output_array.shape, (len(BoundingBoxSE3Index),)) + + def test_bounding_box_se2_property(self): + """Test bounding_box_se2 property.""" + bbox_2d = self.bbox.bounding_box_se2 + self.assertIsInstance(bbox_2d, BoundingBoxSE2) + self.assertEqual(bbox_2d.length, self.length) + self.assertEqual(bbox_2d.width, self.width) + self.assertEqual(bbox_2d.center.x, self.center.x) + self.assertEqual(bbox_2d.center.y, self.center.y) + self.assertEqual(bbox_2d.center.yaw, self.center.euler_angles.yaw) + + def test_corners_array(self): + """Test corners_array property.""" + corners = self.bbox.corners_array + self.assertEqual(corners.shape, (8, 3)) + self.assertIsInstance(corners, np.ndarray) + + def test_corners_dict(self): + """Test corners_dict property.""" + corners_dict = self.bbox.corners_dict + self.assertEqual(len(corners_dict), 8) + for index in Corners3DIndex: + self.assertIn(index, corners_dict) + self.assertIsInstance(corners_dict[index], Point3D) + + def test_shapely_polygon(self): + """Test shapely_polygon property.""" + polygon = self.bbox.shapely_polygon + self.assertIsInstance(polygon, geom.Polygon) + self.assertAlmostEqual(polygon.area, self.length * self.width) + + def test_array_assertions(self): + """Test array assertions in from_array.""" + # Test 2D array + with self.assertRaises(AssertionError): + BoundingBoxSE3.from_array(np.array([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]])) + + # Test wrong size, less than required + with self.assertRaises(AssertionError): + BoundingBoxSE3.from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9])) + + # Test wrong size, greater than required + with self.assertRaises(AssertionError): + BoundingBoxSE3.from_array(np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])) + + def test_zero_dimensions(self): + """Test bounding box with zero dimensions.""" + center = StateSE3(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0) + bbox = BoundingBoxSE3(center, 0.0, 0.0, 0.0) + self.assertEqual(bbox.length, 0.0) + self.assertEqual(bbox.width, 0.0) + self.assertEqual(bbox.height, 0.0) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/geometry/test_occupancy_map.py b/tests/unit/geometry/test_occupancy_map.py new file mode 100644 index 00000000..2390300d --- /dev/null +++ b/tests/unit/geometry/test_occupancy_map.py @@ -0,0 +1,285 @@ +import unittest + +import numpy as np +import shapely.geometry as geom + +from py123d.geometry import OccupancyMap2D + + +class TestOccupancyMap2D(unittest.TestCase): + """Unit tests for OccupancyMap2D class.""" + + def setUp(self): + """Set up test fixtures with various geometries.""" + self.square1 = geom.Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]) + self.square2 = geom.Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]) + self.circle = geom.Point(1, 1).buffer(0.5) + self.line = geom.LineString([(0, 0), (1, 1)]) + + self.geometries = [self.square1, self.square2, self.circle, self.line] + self.string_ids = ["square1", "square2", "circle", "line"] + self.int_ids = [1, 2, 3, 4] + + def test_init_with_default_ids(self): + """Test initialization with default string IDs.""" + occ_map = OccupancyMap2D(self.geometries) + + self.assertEqual(len(occ_map), 4) + self.assertEqual(occ_map.ids, ["0", "1", "2", "3"]) + self.assertEqual(len(occ_map.geometries), 4) + + def test_init_with_string_ids(self): + """Test initialization with custom string IDs.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + self.assertEqual(len(occ_map), 4) + self.assertEqual(occ_map.ids, self.string_ids) + self.assertEqual(occ_map["square1"], self.square1) + + def test_init_with_int_ids(self): + """Test initialization with integer IDs.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.int_ids) + + self.assertEqual(len(occ_map), 4) + self.assertEqual(occ_map.ids, self.int_ids) + self.assertEqual(occ_map[1], self.square1) + + def test_init_with_mismatched_ids_length(self): + """Test that initialization fails with mismatched IDs length.""" + with self.assertRaises(AssertionError): + OccupancyMap2D(self.geometries, ids=["id1", "id2"]) + + def test_init_with_custom_node_capacity(self): + """Test initialization with custom node capacity.""" + occ_map = OccupancyMap2D(self.geometries, node_capacity=5) + self.assertEqual(occ_map._node_capacity, 5) + + def test_from_dict_constructor(self): + """Test construction from dictionary.""" + geometry_dict = {"square": self.square1, "circle": self.circle, "line": self.line} + + occ_map = OccupancyMap2D.from_dict(geometry_dict) + + self.assertEqual(len(occ_map), 3) + self.assertEqual(set(occ_map.ids), set(["square", "circle", "line"])) + self.assertEqual(occ_map["square"], self.square1) + + def test_getitem_string_id(self): + """Test geometry retrieval by string ID.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + self.assertEqual(occ_map["square1"], self.square1) + self.assertEqual(occ_map["circle"], self.circle) + + def test_getitem_int_id(self): + """Test geometry retrieval by integer ID.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.int_ids) + + self.assertEqual(occ_map[1], self.square1) + self.assertEqual(occ_map[3], self.circle) + + def test_getitem_invalid_id(self): + """Test that invalid ID raises KeyError.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + with self.assertRaises(KeyError): + _ = occ_map["nonexistent"] + + def test_len(self): + """Test length property.""" + occ_map = OccupancyMap2D(self.geometries) + self.assertEqual(len(occ_map), 4) + + empty_map = OccupancyMap2D([]) + self.assertEqual(len(empty_map), 0) + + def test_ids_property(self): + """Test IDs property getter.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + self.assertEqual(occ_map.ids, self.string_ids) + + def test_geometries_property(self): + """Test geometries property getter.""" + occ_map = OccupancyMap2D(self.geometries) + self.assertEqual(list(occ_map.geometries), self.geometries) + + def test_id_to_idx_property(self): + """Test id_to_idx property.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + expected_mapping = {"square1": 0, "square2": 1, "circle": 2, "line": 3} + self.assertEqual(occ_map.id_to_idx, expected_mapping) + + def test_intersects_with_overlapping_geometry(self): + """Test intersects method with overlapping geometry.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + # Create a geometry that intersects with square1 and circle + query_geom = geom.Polygon([(0.5, 0.5), (1.5, 0.5), (1.5, 1.5), (0.5, 1.5)]) + intersecting_ids = occ_map.intersects(query_geom) + + # NOTE: square2 does not intersect with the query geometry, the rest does. + self.assertIn("square1", intersecting_ids) + self.assertIn("circle", intersecting_ids) + self.assertIn("line", intersecting_ids) + self.assertEqual(len(intersecting_ids), 3) + + def test_intersects_with_non_overlapping_geometry(self): + """Test intersects method with non-overlapping geometry.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + # Create a geometry that doesn't intersect with any + query_geom = geom.Polygon([(10, 10), (12, 10), (12, 12), (10, 12)]) + intersecting_ids = occ_map.intersects(query_geom) + + self.assertEqual(len(intersecting_ids), 0) + + def test_query_with_intersects_predicate(self): + """Test query method with intersects predicate.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + query_geom = geom.Point(1, 1) + indices = occ_map.query(query_geom, predicate="intersects") + self.assertIsInstance(indices, np.ndarray) + self.assertIn(occ_map.id_to_idx["square1"], indices) + self.assertIn(occ_map.id_to_idx["circle"], indices) + self.assertIn(occ_map.id_to_idx["line"], indices) + self.assertNotIn(occ_map.id_to_idx["square2"], indices) + + def test_query_with_contains_predicate(self): + """Test query method with contains predicate.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + query_geom = geom.Point(4, 4) + indices = occ_map.query(query_geom, predicate="within") + + self.assertIsInstance(indices, np.ndarray) + self.assertIn(occ_map.id_to_idx["square2"], indices) + self.assertNotIn(occ_map.id_to_idx["square1"], indices) + self.assertNotIn(occ_map.id_to_idx["circle"], indices) + self.assertNotIn(occ_map.id_to_idx["line"], indices) + + def test_query_with_distance(self): + """Test query method with distance parameter.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + query_geom = geom.Point(4, 4) + indices = occ_map.query(query_geom, predicate="dwithin", distance=3.0) + + self.assertIsInstance(indices, np.ndarray) + self.assertIn(occ_map.id_to_idx["square2"], indices) + self.assertIn(occ_map.id_to_idx["square1"], indices) + self.assertNotIn(occ_map.id_to_idx["circle"], indices) + self.assertNotIn(occ_map.id_to_idx["line"], indices) + + def test_query_nearest_basic(self): + """Test query_nearest method basic functionality.""" + occ_map = OccupancyMap2D(self.geometries, ids=self.string_ids) + + query_geom = geom.Point(4, 4) + nearest_indices = occ_map.query_nearest(query_geom) + + self.assertIsInstance(nearest_indices, np.ndarray) + + def test_query_nearest_with_distance(self): + """Test query_nearest method with return_distance=True.""" + occ_map = OccupancyMap2D(self.geometries) + + query_geom = geom.Point(1, 1) + result = occ_map.query_nearest(query_geom, return_distance=True) + + self.assertIsInstance(result, tuple) + self.assertEqual(len(result), 2) + indices, distances = result + self.assertIsInstance(indices, np.ndarray) + self.assertIsInstance(distances, np.ndarray) + + def test_query_nearest_with_max_distance(self): + """Test query_nearest method with max_distance.""" + occ_map = OccupancyMap2D(self.geometries) + + query_geom = geom.Point(10, 10) + + nearest_indices = occ_map.query_nearest(query_geom, max_distance=1.0) + self.assertIsInstance(nearest_indices, np.ndarray) + self.assertEqual(len(nearest_indices), 0) + + nearest_indices = occ_map.query_nearest(query_geom, max_distance=10.0) + self.assertIsInstance(nearest_indices, np.ndarray) + self.assertTrue(len(nearest_indices) > 0) + + def test_contains_vectorized_single_point(self): + """Test contains_vectorized with a single point.""" + occ_map = OccupancyMap2D(self.geometries) + + points = np.array([[1.0, 1.0]]) # Point inside square1 and circle + result = occ_map.contains_vectorized(points) + + self.assertEqual(result.shape, (4, 1)) + self.assertIsInstance(result, np.ndarray) + self.assertEqual(result.dtype, bool) + + def test_contains_vectorized_multiple_points(self): + """Test contains_vectorized with multiple points.""" + occ_map = OccupancyMap2D(self.geometries) + + points = np.array( + [ + [1.0, 1.0], # Inside square1 and circle + [4.0, 4.0], # Inside square2 + [10.0, 10.0], # Outside all geometries + ] + ) + result = occ_map.contains_vectorized(points) + + self.assertEqual(result.shape, (4, 3)) + self.assertIsInstance(result, np.ndarray) + self.assertEqual(result.dtype, bool) + + # Check specific containment results + # Point [1.0, 1.0] should be in square1 (index 0) and circle (index 2) + self.assertTrue(result[0, 0]) # square1 contains point 0 + self.assertFalse(result[1, 0]) # square2 does not contain point 0 + self.assertTrue(result[2, 0]) # circle contains point 0 + self.assertFalse(result[3, 0]) # line does not contain point 0 + + # Point [4.0, 4.0] should be in square2 (index 1) only + self.assertFalse(result[0, 1]) # square1 does not contain point 1 + self.assertTrue(result[1, 1]) # square2 contains point 1 + self.assertFalse(result[2, 1]) # circle does not contain point 1 + self.assertFalse(result[3, 1]) # line does not contain point 1 + + # Point [10.0, 10.0] should not be in any geometry + self.assertFalse(result[0, 2]) # square1 does not contain point 2 + self.assertFalse(result[1, 2]) # square2 does not contain point 2 + self.assertFalse(result[2, 2]) # circle does not contain point 2 + self.assertFalse(result[3, 2]) # line does not contain point 2 + + def test_contains_vectorized_empty_points(self): + """Test contains_vectorized with empty points array.""" + occ_map = OccupancyMap2D(self.geometries) + + points = np.empty((0, 2)) + result = occ_map.contains_vectorized(points) + + self.assertEqual(result.shape, (4, 0)) + + def test_empty_occupancy_map(self): + """Test behavior with empty geometry list.""" + occ_map = OccupancyMap2D([]) + + self.assertEqual(len(occ_map), 0) + self.assertEqual(occ_map.ids, []) + self.assertEqual(len(occ_map.geometries), 0) + + def test_single_geometry_map(self): + """Test behavior with single geometry.""" + occ_map = OccupancyMap2D([self.square1], ids=["single"]) + + self.assertEqual(len(occ_map), 1) + self.assertEqual(occ_map.ids, ["single"]) + self.assertEqual(occ_map["single"], self.square1) + + +if __name__ == "__main__": + + unittest.main() diff --git a/tests/unit/geometry/test_point.py b/tests/unit/geometry/test_point.py new file mode 100644 index 00000000..15932540 --- /dev/null +++ b/tests/unit/geometry/test_point.py @@ -0,0 +1,200 @@ +import unittest +from unittest.mock import MagicMock, patch + +import numpy as np + +from py123d.geometry import Point2D, Point3D +from py123d.geometry.geometry_index import Point2DIndex, Point3DIndex + + +class TestPoint2D(unittest.TestCase): + """Unit tests for Point2D class.""" + + def setUp(self): + """Set up test fixtures.""" + self.x_coord = 3.5 + self.y_coord = 4.2 + self.point = Point2D(x=self.x_coord, y=self.y_coord) + self.test_array = np.zeros([2], dtype=np.float64) + self.test_array[Point2DIndex.X] = self.x_coord + self.test_array[Point2DIndex.Y] = self.y_coord + + def test_init(self): + """Test Point2D initialization.""" + point = Point2D(1.0, 2.0) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 2.0) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + # Mock Point2DIndex enum values + point = Point2D.from_array(self.test_array) + self.assertEqual(point.x, self.x_coord) + self.assertEqual(point.y, self.y_coord) + + def test_from_array_invalid_dimensions(self): + """Test from_array with invalid array dimensions.""" + # 2D array should raise assertion error + array_2d = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64) + with self.assertRaises(AssertionError): + Point2D.from_array(array_2d) + + # 3D array should raise assertion error + array_3d = np.array([[[1.0]]], dtype=np.float64) + with self.assertRaises(AssertionError): + Point2D.from_array(array_3d) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + + array_wrong_length = np.array([1.0, 2.0, 3.0], dtype=np.float64) + with self.assertRaises(AssertionError): + Point2D.from_array(array_wrong_length) + + # Empty array + empty_array = np.array([], dtype=np.float64) + with self.assertRaises(AssertionError): + Point2D.from_array(empty_array) + + def test_array_property(self): + """Test the array property.""" + expected_array = np.array([self.x_coord, self.y_coord], dtype=np.float64) + np.testing.assert_array_equal(self.point.array, expected_array) + self.assertEqual(self.point.array.dtype, np.float64) + self.assertEqual(self.point.array.shape, (2,)) + + def test_array_like(self): + """Test the __array__ behavior.""" + expected_array = np.array([self.x_coord, self.y_coord], dtype=np.float32) + output_array = np.array(self.point, dtype=np.float32) + np.testing.assert_array_equal(output_array, expected_array) + self.assertEqual(output_array.dtype, np.float32) + self.assertEqual(output_array.shape, (2,)) + + def test_shapely_point_property(self): + """Test the shapely_point property.""" + with patch("shapely.geometry.Point") as mock_point: + mock_point_instance = MagicMock() + mock_point.return_value = mock_point_instance + + result = self.point.shapely_point + + mock_point.assert_called_once_with(self.x_coord, self.y_coord) + self.assertEqual(result, mock_point_instance) + + def test_iter(self): + """Test the __iter__ method.""" + coords = list(self.point) + self.assertEqual(coords, [self.x_coord, self.y_coord]) + + # Test that it's actually iterable + x, y = self.point + self.assertEqual(x, self.x_coord) + self.assertEqual(y, self.y_coord) + + def test_hash(self): + """Test the __hash__ method.""" + point_dict = {self.point: "test"} + self.assertIn(self.point, point_dict) + self.assertEqual(point_dict[self.point], "test") + + +class TestPoint3D(unittest.TestCase): + """Unit tests for Point3D class.""" + + def setUp(self): + """Set up test fixtures.""" + self.x_coord = 3.5 + self.y_coord = 4.2 + self.z_coord = 5.1 + self.point = Point3D(self.x_coord, self.y_coord, self.z_coord) + self.test_array = np.zeros((3,), dtype=np.float64) + self.test_array[Point3DIndex.X] = self.x_coord + self.test_array[Point3DIndex.Y] = self.y_coord + self.test_array[Point3DIndex.Z] = self.z_coord + + def test_init(self): + """Test Point3D initialization.""" + point = Point3D(1.0, 2.0, 3.0) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 2.0) + self.assertEqual(point.z, 3.0) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + # Mock Point3DIndex enum values + point = Point3D.from_array(self.test_array) + self.assertEqual(point.x, self.x_coord) + self.assertEqual(point.y, self.y_coord) + self.assertEqual(point.z, self.z_coord) + + def test_from_array_invalid_dimensions(self): + """Test from_array with invalid array dimensions.""" + # 2D array should raise assertion error + array_2d = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64) + with self.assertRaises(AssertionError): + Point3D.from_array(array_2d) + + # 3D array should raise assertion error + array_3d = np.array([[[1.0]]], dtype=np.float64) + with self.assertRaises(AssertionError): + Point3D.from_array(array_3d) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + + array_wrong_length = np.array([1.0, 2.0], dtype=np.float64) + with self.assertRaises(AssertionError): + Point3D.from_array(array_wrong_length) + + # Empty array + empty_array = np.array([], dtype=np.float64) + with self.assertRaises(AssertionError): + Point3D.from_array(empty_array) + + def test_array_property(self): + """Test the array property.""" + expected_array = np.array([self.x_coord, self.y_coord, self.z_coord], dtype=np.float64) + np.testing.assert_array_equal(self.point.array, expected_array) + self.assertEqual(self.point.array.dtype, np.float64) + self.assertEqual(self.point.array.shape, (3,)) + + def test_array_like(self): + """Test the __array__ behavior.""" + expected_array = np.array([self.x_coord, self.y_coord, self.z_coord], dtype=np.float32) + output_array = np.array(self.point, dtype=np.float32) + np.testing.assert_array_equal(output_array, expected_array) + self.assertEqual(output_array.dtype, np.float32) + self.assertEqual(output_array.shape, (3,)) + + def test_shapely_point_property(self): + """Test the shapely_point property.""" + with patch("shapely.geometry.Point") as mock_point: + mock_point_instance = MagicMock() + mock_point.return_value = mock_point_instance + + result = self.point.shapely_point + + mock_point.assert_called_once_with(self.x_coord, self.y_coord, self.z_coord) + self.assertEqual(result, mock_point_instance) + + def test_iter(self): + """Test the __iter__ method.""" + coords = list(self.point) + self.assertEqual(coords, [self.x_coord, self.y_coord, self.z_coord]) + + # Test that it's actually iterable + x, y, z = self.point + self.assertEqual(x, self.x_coord) + self.assertEqual(y, self.y_coord) + self.assertEqual(z, self.z_coord) + + def test_hash(self): + """Test the __hash__ method.""" + point_dict = {self.point: "test"} + self.assertIn(self.point, point_dict) + self.assertEqual(point_dict[self.point], "test") + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/geometry/test_polyline.py b/tests/unit/geometry/test_polyline.py new file mode 100644 index 00000000..a2614410 --- /dev/null +++ b/tests/unit/geometry/test_polyline.py @@ -0,0 +1,324 @@ +import unittest + +import numpy as np +import shapely.geometry as geom + +from py123d.geometry import Point2D, Point3D, Polyline2D, Polyline3D, PolylineSE2, StateSE2 + + +class TestPolyline2D(unittest.TestCase): + """Test class for Polyline2D.""" + + def test_from_linestring(self): + """Test creating Polyline2D from LineString.""" + coords = [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + self.assertIsInstance(polyline, Polyline2D) + self.assertTrue(polyline.linestring.equals(linestring)) + + def test_from_linestring_with_z(self): + """Test creating Polyline2D from LineString with Z coordinates.""" + coords = [(0.0, 0.0, 1.0), (1.0, 1.0, 2.0), (2.0, 0.0, 3.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + self.assertIsInstance(polyline, Polyline2D) + self.assertFalse(polyline.linestring.has_z) + + def test_from_array_2d(self): + """Test creating Polyline2D from 2D array.""" + array = np.array([[0.0, 0.0], [1.0, 1.0], [2.0, 0.0]], dtype=np.float32) + polyline = Polyline2D.from_array(array) + self.assertIsInstance(polyline, Polyline2D) + np.testing.assert_array_almost_equal(polyline.array, array) + + def test_from_array_3d(self): + """Test creating Polyline2D from 3D array.""" + array = np.array([[0.0, 0.0, 1.0], [1.0, 1.0, 2.0], [2.0, 0.0, 3.0]], dtype=np.float32) + polyline = Polyline2D.from_array(array) + self.assertIsInstance(polyline, Polyline2D) + expected = array[:, :2] + np.testing.assert_array_almost_equal(polyline.array, expected) + + def test_from_array_invalid_shape(self): + """Test creating Polyline2D from invalid array shape.""" + array = np.array([[0.0], [1.0], [2.0]], dtype=np.float32) + with self.assertRaises(ValueError): + Polyline2D.from_array(array) + + def test_array_property(self): + """Test array property.""" + coords = [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + array = polyline.array + self.assertEqual(array.shape, (3, 2)) + self.assertEqual(array.dtype, np.float64) + np.testing.assert_array_almost_equal(array, coords) + + def test_length_property(self): + """Test length property.""" + coords = [(0.0, 0.0), (1.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + self.assertEqual(polyline.length, 2.0) + + def test_interpolate_single_distance(self): + """Test interpolation with single distance.""" + coords = [(0.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + point = polyline.interpolate(1.0) + self.assertIsInstance(point, Point2D) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 0.0) + + def test_interpolate_multiple_distances(self): + """Test interpolation with multiple distances.""" + coords = [(0.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + points = polyline.interpolate(np.array([0.0, 1.0, 2.0])) + self.assertIsInstance(points, np.ndarray) + self.assertEqual(points.shape, (3, 2)) + expected = np.array([[0.0, 0.0], [1.0, 0.0], [2.0, 0.0]]) + np.testing.assert_array_almost_equal(points, expected) + + def test_interpolate_normalized(self): + """Test normalized interpolation.""" + coords = [(0.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + point = polyline.interpolate(0.5, normalized=True) + self.assertIsInstance(point, Point2D) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 0.0) + + def test_project_point2d(self): + """Test projecting Point2D onto polyline.""" + coords = [(0.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + point = Point2D(1.0, 1.0) + distance = polyline.project(point) + self.assertEqual(distance, 1.0) + + def test_project_statese2(self): + """Test projecting StateSE2 onto polyline.""" + coords = [(0.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + state = StateSE2(1.0, 1.0, 0.0) + distance = polyline.project(state) + self.assertEqual(distance, 1.0) + + def test_polyline_se2_property(self): + """Test polyline_se2 property.""" + coords = [(0.0, 0.0), (1.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline2D.from_linestring(linestring) + polyline_se2 = polyline.polyline_se2 + self.assertIsInstance(polyline_se2, PolylineSE2) + + +class TestPolylineSE2(unittest.TestCase): + """Test class for PolylineSE2.""" + + def test_from_linestring(self): + """Test creating PolylineSE2 from LineString.""" + coords = [(0.0, 0.0), (1.0, 0.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = PolylineSE2.from_linestring(linestring) + self.assertIsInstance(polyline, PolylineSE2) + self.assertEqual(polyline.array.shape, (3, 3)) + + def test_from_array_2d(self): + """Test creating PolylineSE2 from 2D array.""" + array = np.array([[0.0, 0.0], [1.0, 0.0], [2.0, 0.0]], dtype=np.float32) + polyline = PolylineSE2.from_array(array) + self.assertIsInstance(polyline, PolylineSE2) + self.assertEqual(polyline.array.shape, (3, 3)) + + def test_from_array_se2(self): + """Test creating PolylineSE2 from SE2 array.""" + array = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float32) + polyline = PolylineSE2.from_array(array) + self.assertIsInstance(polyline, PolylineSE2) + np.testing.assert_array_almost_equal(polyline.array, array) + + def test_from_array_invalid_shape(self): + """Test creating PolylineSE2 from invalid array shape.""" + array = np.array([[0.0], [1.0], [2.0]], dtype=np.float32) + with self.assertRaises(ValueError): + PolylineSE2.from_array(array) + + def test_from_discrete_se2(self): + """Test creating PolylineSE2 from discrete SE2 states.""" + states = [StateSE2(0.0, 0.0, 0.0), StateSE2(1.0, 0.0, 0.0), StateSE2(2.0, 0.0, 0.0)] + polyline = PolylineSE2.from_discrete_se2(states) + self.assertIsInstance(polyline, PolylineSE2) + self.assertEqual(polyline.array.shape, (3, 3)) + + def test_length_property(self): + """Test length property.""" + array = np.array([[0.0, 0.0, 0.0], [1.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + self.assertEqual(polyline.length, 2.0) + + def test_interpolate_single_distance(self): + """Test interpolation with single distance.""" + array = np.array([[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + state = polyline.interpolate(1.0) + self.assertIsInstance(state, StateSE2) + self.assertEqual(state.x, 1.0) + self.assertEqual(state.y, 0.0) + + def test_interpolate_multiple_distances(self): + """Test interpolation with multiple distances.""" + array = np.array([[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + states = polyline.interpolate(np.array([0.0, 1.0, 2.0])) + self.assertIsInstance(states, np.ndarray) + self.assertEqual(states.shape, (3, 3)) + + def test_interpolate_normalized(self): + """Test normalized interpolation.""" + array = np.array([[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + state = polyline.interpolate(0.5, normalized=True) + self.assertIsInstance(state, StateSE2) + self.assertEqual(state.x, 1.0) + self.assertEqual(state.y, 0.0) + + def test_project_point2d(self): + """Test projecting Point2D onto SE2 polyline.""" + array = np.array([[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + point = Point2D(1.0, 1.0) + distance = polyline.project(point) + self.assertEqual(distance, 1.0) + + def test_project_statese2(self): + """Test projecting StateSE2 onto SE2 polyline.""" + array = np.array([[0.0, 0.0, 0.0], [2.0, 0.0, 0.0]], dtype=np.float64) + polyline = PolylineSE2.from_array(array) + state = StateSE2(1.0, 1.0, 0.0) + distance = polyline.project(state) + self.assertEqual(distance, 1.0) + + +class TestPolyline3D(unittest.TestCase): + """Test class for Polyline3D.""" + + def test_from_linestring_with_z(self): + """Test creating Polyline3D from LineString with Z coordinates.""" + coords = [(0.0, 0.0, 1.0), (1.0, 1.0, 2.0), (2.0, 0.0, 3.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + self.assertIsInstance(polyline, Polyline3D) + self.assertTrue(polyline.linestring.has_z) + + def test_from_linestring_without_z(self): + """Test creating Polyline3D from LineString without Z coordinates.""" + coords = [(0.0, 0.0), (1.0, 1.0), (2.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + self.assertIsInstance(polyline, Polyline3D) + self.assertTrue(polyline.linestring.has_z) + + def test_from_array(self): + """Test creating Polyline3D from 3D array.""" + array = np.array([[0.0, 0.0, 1.0], [1.0, 1.0, 2.0], [2.0, 0.0, 3.0]], dtype=np.float64) + polyline = Polyline3D.from_array(array) + self.assertIsInstance(polyline, Polyline3D) + np.testing.assert_array_almost_equal(polyline.array, array) + + def test_from_array_invalid_shape(self): + """Test creating Polyline3D from invalid array shape.""" + array = np.array([[0.0, 0.0], [1.0, 1.0]], dtype=np.float64) + with self.assertRaises(AssertionError): + Polyline3D.from_array(array) + + def test_array_property(self): + """Test array property.""" + coords = [(0.0, 0.0, 1.0), (1.0, 1.0, 2.0), (2.0, 0.0, 3.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + array = polyline.array + self.assertEqual(array.shape, (3, 3)) + self.assertEqual(array.dtype, np.float64) + np.testing.assert_array_almost_equal(array, coords) + + def test_polyline_2d_property(self): + """Test polyline_2d property.""" + coords = [(0.0, 0.0, 1.0), (1.0, 1.0, 2.0), (2.0, 0.0, 3.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + polyline_2d = polyline.polyline_2d + self.assertIsInstance(polyline_2d, Polyline2D) + self.assertFalse(polyline_2d.linestring.has_z) + + def test_polyline_se2_property(self): + """Test polyline_se2 property.""" + coords = [(0.0, 0.0, 1.0), (1.0, 0.0, 2.0), (2.0, 0.0, 3.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + polyline_se2 = polyline.polyline_se2 + self.assertIsInstance(polyline_se2, PolylineSE2) + + def test_length_property(self): + """Test length property.""" + coords = [(0.0, 0.0, 0.0), (1.0, 0.0, 0.0), (2.0, 0.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + self.assertEqual(polyline.length, 2.0) + + def test_interpolate_single_distance(self): + """Test interpolation with single distance.""" + coords = [(0.0, 0.0, 0.0), (2.0, 0.0, 2.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + point = polyline.interpolate(1.0) + self.assertIsInstance(point, Point3D) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 0.0) + self.assertEqual(point.z, 1.0) + + def test_interpolate_multiple_distances(self): + """Test interpolation with multiple distances.""" + coords = [(0.0, 0.0, 0.0), (2.0, 0.0, 2.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + points = polyline.interpolate(np.array([0.0, 1.0, 2.0])) + self.assertIsInstance(points, np.ndarray) + self.assertEqual(points.shape, (3, 3)) + + def test_interpolate_normalized(self): + """Test normalized interpolation.""" + coords = [(0.0, 0.0, 0.0), (2.0, 0.0, 2.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + point = polyline.interpolate(0.5, normalized=True) + self.assertIsInstance(point, Point3D) + self.assertEqual(point.x, 1.0) + self.assertEqual(point.y, 0.0) + self.assertEqual(point.z, 1.0) + + def test_project_point2d(self): + """Test projecting Point2D onto 3D polyline.""" + coords = [(0.0, 0.0, 0.0), (2.0, 0.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + point = Point2D(1.0, 1.0) + distance = polyline.project(point) + self.assertEqual(distance, 1.0) + + def test_project_point3d(self): + """Test projecting Point3D onto 3D polyline.""" + coords = [(0.0, 0.0, 0.0), (2.0, 0.0, 0.0)] + linestring = geom.LineString(coords) + polyline = Polyline3D.from_linestring(linestring) + point = Point3D(1.0, 1.0, 1.0) + distance = polyline.project(point) + self.assertEqual(distance, 1.0) diff --git a/tests/unit/geometry/test_rotation.py b/tests/unit/geometry/test_rotation.py new file mode 100644 index 00000000..66f51c49 --- /dev/null +++ b/tests/unit/geometry/test_rotation.py @@ -0,0 +1,211 @@ +import unittest + +import numpy as np + +from py123d.geometry.geometry_index import EulerAnglesIndex, QuaternionIndex +from py123d.geometry.rotation import EulerAngles, Quaternion + + +class TestEulerAngles(unittest.TestCase): + """Unit tests for EulerAngles class.""" + + def setUp(self): + """Set up test fixtures.""" + self.roll = 0.1 + self.pitch = 0.2 + self.yaw = 0.3 + self.euler_angles = EulerAngles(self.roll, self.pitch, self.yaw) + self.test_array = np.zeros([3], dtype=np.float64) + self.test_array[EulerAnglesIndex.ROLL] = self.roll + self.test_array[EulerAnglesIndex.PITCH] = self.pitch + self.test_array[EulerAnglesIndex.YAW] = self.yaw + + def test_init(self): + """Test EulerAngles initialization.""" + euler = EulerAngles(roll=0.1, pitch=0.2, yaw=0.3) + self.assertEqual(euler.roll, 0.1) + self.assertEqual(euler.pitch, 0.2) + self.assertEqual(euler.yaw, 0.3) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + euler = EulerAngles.from_array(self.test_array) + self.assertIsInstance(euler, EulerAngles) + self.assertAlmostEqual(euler.roll, self.roll) + self.assertAlmostEqual(euler.pitch, self.pitch) + self.assertAlmostEqual(euler.yaw, self.yaw) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + with self.assertRaises(AssertionError): + EulerAngles.from_array(np.array([1, 2])) + with self.assertRaises(AssertionError): + EulerAngles.from_array(np.array([[1, 2, 3]])) + + def test_from_array_copy(self): + """Test from_array with copy parameter.""" + original_array = self.test_array.copy() + euler_copy = EulerAngles.from_array(original_array, copy=True) + euler_no_copy = EulerAngles.from_array(original_array, copy=False) + + original_array[0] = 999.0 + self.assertNotEqual(euler_copy.roll, 999.0) + self.assertEqual(euler_no_copy.roll, 999.0) + + def test_from_rotation_matrix(self): + """Test from_rotation_matrix class method.""" + identity_matrix = np.eye(3) + euler = EulerAngles.from_rotation_matrix(identity_matrix) + self.assertAlmostEqual(euler.roll, 0.0, places=10) + self.assertAlmostEqual(euler.pitch, 0.0, places=10) + self.assertAlmostEqual(euler.yaw, 0.0, places=10) + + def test_from_rotation_matrix_invalid(self): + """Test from_rotation_matrix with invalid input.""" + with self.assertRaises(AssertionError): + EulerAngles.from_rotation_matrix(np.array([[1, 2]])) + with self.assertRaises(AssertionError): + EulerAngles.from_rotation_matrix(np.array([1, 2, 3])) + + def test_array_property(self): + """Test array property.""" + array = self.euler_angles.array + self.assertEqual(array.shape, (3,)) + self.assertEqual(array[EulerAnglesIndex.ROLL], self.roll) + self.assertEqual(array[EulerAnglesIndex.PITCH], self.pitch) + self.assertEqual(array[EulerAnglesIndex.YAW], self.yaw) + + def test_iterator(self): + """Test iterator functionality.""" + values = list(self.euler_angles) + self.assertEqual(values, [self.roll, self.pitch, self.yaw]) + + def test_hash(self): + """Test hash functionality.""" + euler1 = EulerAngles(0.1, 0.2, 0.3) + euler2 = EulerAngles(0.1, 0.2, 0.3) + euler3 = EulerAngles(0.1, 0.2, 0.4) + + self.assertEqual(hash(euler1), hash(euler2)) + self.assertNotEqual(hash(euler1), hash(euler3)) + + +class TestQuaternion(unittest.TestCase): + """Unit tests for Quaternion class.""" + + def setUp(self): + """Set up test fixtures.""" + self.qw = 1.0 + self.qx = 0.0 + self.qy = 0.0 + self.qz = 0.0 + self.quaternion = Quaternion(self.qw, self.qx, self.qy, self.qz) + self.test_array = np.zeros([4], dtype=np.float64) + self.test_array[QuaternionIndex.QW] = self.qw + self.test_array[QuaternionIndex.QX] = self.qx + self.test_array[QuaternionIndex.QY] = self.qy + self.test_array[QuaternionIndex.QZ] = self.qz + + def test_init(self): + """Test Quaternion initialization.""" + quat = Quaternion(1.0, 0.0, 0.0, 0.0) + self.assertEqual(quat.qw, 1.0) + self.assertEqual(quat.qx, 0.0) + self.assertEqual(quat.qy, 0.0) + self.assertEqual(quat.qz, 0.0) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + quat = Quaternion.from_array(self.test_array) + self.assertAlmostEqual(quat.qw, self.qw) + self.assertAlmostEqual(quat.qx, self.qx) + self.assertAlmostEqual(quat.qy, self.qy) + self.assertAlmostEqual(quat.qz, self.qz) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + with self.assertRaises(AssertionError): + Quaternion.from_array(np.array([1, 2, 3])) + with self.assertRaises(AssertionError): + Quaternion.from_array(np.array([[1, 2, 3, 4]])) + + def test_from_array_copy(self): + """Test from_array with copy parameter.""" + original_array = self.test_array.copy() + quat_copy = Quaternion.from_array(original_array, copy=True) + quat_no_copy = Quaternion.from_array(original_array, copy=False) + + original_array[0] = 999.0 + self.assertNotEqual(quat_copy.qw, 999.0) + self.assertEqual(quat_no_copy.qw, 999.0) + + def test_from_rotation_matrix(self): + """Test from_rotation_matrix class method.""" + identity_matrix = np.eye(3) + quat = Quaternion.from_rotation_matrix(identity_matrix) + self.assertAlmostEqual(quat.qw, 1.0, places=10) + self.assertAlmostEqual(quat.qx, 0.0, places=10) + self.assertAlmostEqual(quat.qy, 0.0, places=10) + self.assertAlmostEqual(quat.qz, 0.0, places=10) + + def test_from_rotation_matrix_invalid(self): + """Test from_rotation_matrix with invalid input.""" + with self.assertRaises(AssertionError): + Quaternion.from_rotation_matrix(np.array([[1, 2]])) + with self.assertRaises(AssertionError): + Quaternion.from_rotation_matrix(np.array([1, 2, 3])) + + def test_from_euler_angles(self): + """Test from_euler_angles class method.""" + euler = EulerAngles(0.0, 0.0, 0.0) + quat = Quaternion.from_euler_angles(euler) + self.assertAlmostEqual(quat.qw, 1.0, places=10) + self.assertAlmostEqual(quat.qx, 0.0, places=10) + self.assertAlmostEqual(quat.qy, 0.0, places=10) + self.assertAlmostEqual(quat.qz, 0.0, places=10) + + def test_array_property(self): + """Test array property.""" + array = self.quaternion.array + self.assertEqual(array.shape, (4,)) + np.testing.assert_array_equal(array, self.test_array) + + def test_pyquaternion_property(self): + """Test pyquaternion property.""" + pyquat = self.quaternion.pyquaternion + self.assertEqual(pyquat.w, self.qw) + self.assertEqual(pyquat.x, self.qx) + self.assertEqual(pyquat.y, self.qy) + self.assertEqual(pyquat.z, self.qz) + + def test_euler_angles_property(self): + """Test euler_angles property.""" + euler = self.quaternion.euler_angles + self.assertIsInstance(euler, EulerAngles) + self.assertAlmostEqual(euler.roll, 0.0, places=10) + self.assertAlmostEqual(euler.pitch, 0.0, places=10) + self.assertAlmostEqual(euler.yaw, 0.0, places=10) + + def test_rotation_matrix_property(self): + """Test rotation_matrix property.""" + rot_matrix = self.quaternion.rotation_matrix + self.assertEqual(rot_matrix.shape, (3, 3)) + np.testing.assert_array_almost_equal(rot_matrix, np.eye(3)) + + def test_iterator(self): + """Test iterator functionality.""" + values = list(self.quaternion) + self.assertEqual(values, [self.qw, self.qx, self.qy, self.qz]) + + def test_hash(self): + """Test hash functionality.""" + quat1 = Quaternion(1.0, 0.0, 0.0, 0.0) + quat2 = Quaternion(1.0, 0.0, 0.0, 0.0) + quat3 = Quaternion(0.0, 1.0, 0.0, 0.0) + + self.assertEqual(hash(quat1), hash(quat2)) + self.assertNotEqual(hash(quat1), hash(quat3)) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/geometry/test_vector.py b/tests/unit/geometry/test_vector.py new file mode 100644 index 00000000..4f3b159e --- /dev/null +++ b/tests/unit/geometry/test_vector.py @@ -0,0 +1,172 @@ +import unittest + +import numpy as np + +from py123d.geometry import Vector2D, Vector2DIndex, Vector3D, Vector3DIndex + + +class TestVector2D(unittest.TestCase): + """Unit tests for Vector2D class.""" + + def setUp(self): + """Set up test fixtures.""" + self.x_coord = 3.5 + self.y_coord = 4.2 + self.vector = Vector2D(x=self.x_coord, y=self.y_coord) + self.test_array = np.zeros([2], dtype=np.float64) + self.test_array[Vector2DIndex.X] = self.x_coord + self.test_array[Vector2DIndex.Y] = self.y_coord + + def test_init(self): + """Test Vector2D initialization.""" + vector = Vector2D(1.0, 2.0) + self.assertEqual(vector.x, 1.0) + self.assertEqual(vector.y, 2.0) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + vector = Vector2D.from_array(self.test_array) + self.assertEqual(vector.x, self.x_coord) + self.assertEqual(vector.y, self.y_coord) + + def test_from_array_invalid_dimensions(self): + """Test from_array with invalid array dimensions.""" + # 2D array should raise assertion error + array_2d = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector2D.from_array(array_2d) + + # 3D array should raise assertion error + array_3d = np.array([[[1.0]]], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector2D.from_array(array_3d) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + array_wrong_length = np.array([1.0, 2.0, 3.0], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector2D.from_array(array_wrong_length) + + # Empty array + empty_array = np.array([], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector2D.from_array(empty_array) + + def test_array_property(self): + """Test the array property.""" + expected_array = np.array([self.x_coord, self.y_coord], dtype=np.float64) + np.testing.assert_array_equal(self.vector.array, expected_array) + self.assertEqual(self.vector.array.dtype, np.float64) + self.assertEqual(self.vector.array.shape, (2,)) + + def test_array_like(self): + """Test the __array__ behavior.""" + expected_array = np.array([self.x_coord, self.y_coord], dtype=np.float32) + output_array = np.array(self.vector, dtype=np.float32) + np.testing.assert_array_equal(output_array, expected_array) + self.assertEqual(output_array.dtype, np.float32) + self.assertEqual(output_array.shape, (2,)) + + def test_iter(self): + """Test the __iter__ method.""" + coords = list(self.vector) + self.assertEqual(coords, [self.x_coord, self.y_coord]) + + # Test that it's actually iterable + x, y = self.vector + self.assertEqual(x, self.x_coord) + self.assertEqual(y, self.y_coord) + + def test_hash(self): + """Test the __hash__ method.""" + vector_dict = {self.vector: "test"} + self.assertIn(self.vector, vector_dict) + self.assertEqual(vector_dict[self.vector], "test") + + +class TestVector3D(unittest.TestCase): + """Unit tests for Vector3D class.""" + + def setUp(self): + """Set up test fixtures.""" + self.x_coord = 3.5 + self.y_coord = 4.2 + self.z_coord = 5.1 + self.vector = Vector3D(self.x_coord, self.y_coord, self.z_coord) + self.test_array = np.zeros((3,), dtype=np.float64) + self.test_array[Vector3DIndex.X] = self.x_coord + self.test_array[Vector3DIndex.Y] = self.y_coord + self.test_array[Vector3DIndex.Z] = self.z_coord + + def test_init(self): + """Test Vector3D initialization.""" + vector = Vector3D(1.0, 2.0, 3.0) + self.assertEqual(vector.x, 1.0) + self.assertEqual(vector.y, 2.0) + self.assertEqual(vector.z, 3.0) + + def test_from_array_valid(self): + """Test from_array class method with valid input.""" + vector = Vector3D.from_array(self.test_array) + self.assertEqual(vector.x, self.x_coord) + self.assertEqual(vector.y, self.y_coord) + self.assertEqual(vector.z, self.z_coord) + + def test_from_array_invalid_dimensions(self): + """Test from_array with invalid array dimensions.""" + # 2D array should raise assertion error + array_2d = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector3D.from_array(array_2d) + + # 3D array should raise assertion error + array_3d = np.array([[[1.0]]], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector3D.from_array(array_3d) + + def test_from_array_invalid_shape(self): + """Test from_array with invalid array shape.""" + array_wrong_length = np.array([1.0, 2.0], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector3D.from_array(array_wrong_length) + + # Empty array + empty_array = np.array([], dtype=np.float64) + with self.assertRaises(AssertionError): + Vector3D.from_array(empty_array) + + def test_array_property(self): + """Test the array property.""" + expected_array = np.array([self.x_coord, self.y_coord, self.z_coord], dtype=np.float64) + np.testing.assert_array_equal(self.vector.array, expected_array) + self.assertEqual(self.vector.array.dtype, np.float64) + self.assertEqual(self.vector.array.shape, (3,)) + + def test_array_like(self): + """Test the __array__ behavior.""" + expected_array = np.array([self.x_coord, self.y_coord, self.z_coord], dtype=np.float32) + output_array = np.array(self.vector, dtype=np.float32) + np.testing.assert_array_equal(output_array, expected_array) + self.assertEqual(output_array.dtype, np.float32) + self.assertEqual(output_array.shape, (3,)) + + def test_iter(self): + """Test the __iter__ method.""" + coords = list(self.vector) + self.assertEqual(coords, [self.x_coord, self.y_coord, self.z_coord]) + + # Test that it's actually iterable + x, y, z = self.vector + self.assertEqual(x, self.x_coord) + self.assertEqual(y, self.y_coord) + self.assertEqual(z, self.z_coord) + + def test_hash(self): + """Test the __hash__ method.""" + vector_dict = {self.vector: "test"} + self.assertIn(self.vector, vector_dict) + self.assertEqual(vector_dict[self.vector], "test") + + +if __name__ == "__main__": + unittest.main() diff --git a/d123/training/models/motion_forecasting/__init__.py b/tests/unit/geometry/transform/__init__.py similarity index 100% rename from d123/training/models/motion_forecasting/__init__.py rename to tests/unit/geometry/transform/__init__.py diff --git a/tests/unit/geometry/transform/test_transform_consistency.py b/tests/unit/geometry/transform/test_transform_consistency.py new file mode 100644 index 00000000..a798fabd --- /dev/null +++ b/tests/unit/geometry/transform/test_transform_consistency.py @@ -0,0 +1,492 @@ +import unittest + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import EulerStateSE3, StateSE2, Vector2D, Vector3D +from py123d.geometry.geometry_index import EulerStateSE3Index, Point2DIndex, Point3DIndex, StateSE2Index +from py123d.geometry.transform.transform_euler_se3 import ( + convert_absolute_to_relative_euler_se3_array, + convert_absolute_to_relative_points_3d_array, + convert_relative_to_absolute_euler_se3_array, + convert_relative_to_absolute_points_3d_array, + translate_euler_se3_along_body_frame, + translate_euler_se3_along_x, + translate_euler_se3_along_y, +) +from py123d.geometry.transform.transform_se2 import ( + convert_absolute_to_relative_point_2d_array, + convert_absolute_to_relative_se2_array, + convert_relative_to_absolute_point_2d_array, + convert_relative_to_absolute_se2_array, + translate_se2_along_body_frame, + translate_se2_along_x, + translate_se2_along_y, + translate_se2_array_along_body_frame, +) +from py123d.geometry.utils.rotation_utils import get_rotation_matrices_from_euler_array + + +class TestTransformConsistency(unittest.TestCase): + """Tests to ensure consistency between different transformation functions.""" + + def setUp(self): + self.decimal = 4 # Decimal places for np.testing.assert_array_almost_equal + self.num_consistency_tests = 10 # Number of random test cases for consistency checks + + self.max_pose_xyz = 100.0 + self.min_random_poses = 1 + self.max_random_poses = 20 + + def _get_random_se2_array(self, size: int) -> npt.NDArray[np.float64]: + """Generate a random SE2 pose""" + random_se2_array = np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz, (size, len(StateSE2Index))) + random_se2_array[:, StateSE2Index.YAW] = np.random.uniform(-np.pi, np.pi, size) # yaw angles + return random_se2_array + + def _get_random_se3_array(self, size: int) -> npt.NDArray[np.float64]: + """Generate a random SE3 poses""" + random_se3_array = np.zeros((size, len(EulerStateSE3Index)), dtype=np.float64) + random_se3_array[:, EulerStateSE3Index.XYZ] = np.random.uniform( + -self.max_pose_xyz, self.max_pose_xyz, (size, 3) + ) + random_se3_array[:, EulerStateSE3Index.YAW] = np.random.uniform(-np.pi, np.pi, size) + random_se3_array[:, EulerStateSE3Index.PITCH] = np.random.uniform(-np.pi / 2, np.pi / 2, size) + random_se3_array[:, EulerStateSE3Index.ROLL] = np.random.uniform(-np.pi, np.pi, size) + + return random_se3_array + + def test_se2_absolute_relative_conversion_consistency(self) -> None: + """Test that converting absolute->relative->absolute returns original poses""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = StateSE2.from_array(self._get_random_se2_array(1)[0]) + + # Generate random absolute poses + num_poses = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_poses = self._get_random_se2_array(num_poses) + + # Convert absolute -> relative -> absolute + relative_poses = convert_absolute_to_relative_se2_array(reference, absolute_poses) + recovered_absolute = convert_relative_to_absolute_se2_array(reference, relative_poses) + + np.testing.assert_array_almost_equal(absolute_poses, recovered_absolute, decimal=self.decimal) + + def test_se2_points_absolute_relative_conversion_consistency(self) -> None: + """Test that converting absolute->relative->absolute returns original points""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = StateSE2.from_array(self._get_random_se2_array(1)[0]) + + # Generate random absolute points + num_points = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_points = self._get_random_se2_array(num_points)[:, StateSE2Index.XY] + + # Convert absolute -> relative -> absolute + relative_points = convert_absolute_to_relative_point_2d_array(reference, absolute_points) + recovered_absolute = convert_relative_to_absolute_point_2d_array(reference, relative_points) + + np.testing.assert_array_almost_equal(absolute_points, recovered_absolute, decimal=self.decimal) + + def test_se2_points_consistency(self) -> None: + """Test whether SE2 point and pose conversions are consistent""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = StateSE2.from_array(self._get_random_se2_array(1)[0]) + + # Generate random absolute points + num_poses = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_se2 = self._get_random_se2_array(num_poses) + + # Convert absolute -> relative -> absolute + relative_se2 = convert_absolute_to_relative_se2_array(reference, absolute_se2) + relative_points = convert_absolute_to_relative_point_2d_array( + reference, absolute_se2[..., StateSE2Index.XY] + ) + np.testing.assert_array_almost_equal( + relative_se2[..., StateSE2Index.XY], relative_points, decimal=self.decimal + ) + + recovered_absolute_se2 = convert_relative_to_absolute_se2_array(reference, relative_se2) + absolute_points = convert_relative_to_absolute_point_2d_array(reference, relative_points) + np.testing.assert_array_almost_equal( + recovered_absolute_se2[..., StateSE2Index.XY], absolute_points, decimal=self.decimal + ) + + def test_se2_translation_consistency(self) -> None: + """Test that SE2 translations are consistent between different methods""" + for _ in range(self.num_consistency_tests): + # Generate random pose + pose = StateSE2.from_array(self._get_random_se2_array(1)[0]) + + # Generate random distances + dx = np.random.uniform(-10.0, 10.0) + dy = np.random.uniform(-10.0, 10.0) + + # Test x-translation consistency + result_x_direct = translate_se2_along_x(pose, dx) + result_x_body = translate_se2_along_body_frame(pose, Vector2D(dx, 0.0)) + np.testing.assert_array_almost_equal(result_x_direct.array, result_x_body.array, decimal=self.decimal) + + # Test y-translation consistency + result_y_direct = translate_se2_along_y(pose, dy) + result_y_body = translate_se2_along_body_frame(pose, Vector2D(0.0, dy)) + np.testing.assert_array_almost_equal(result_y_direct.array, result_y_body.array, decimal=self.decimal) + + # Test combined translation + result_xy_body = translate_se2_along_body_frame(pose, Vector2D(dx, dy)) + result_xy_sequential = translate_se2_along_y(translate_se2_along_x(pose, dx), dy) + np.testing.assert_array_almost_equal(result_xy_body.array, result_xy_sequential.array, decimal=self.decimal) + + def test_se3_absolute_relative_conversion_consistency(self) -> None: + """Test that converting absolute->relative->absolute returns original poses""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = EulerStateSE3.from_array(self._get_random_se3_array(1)[0]) + + # Generate random absolute poses + num_poses = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_poses = self._get_random_se3_array(num_poses) + + # Convert absolute -> relative -> absolute + relative_poses = convert_absolute_to_relative_euler_se3_array(reference, absolute_poses) + recovered_absolute = convert_relative_to_absolute_euler_se3_array(reference, relative_poses) + + np.testing.assert_array_almost_equal( + absolute_poses[..., EulerStateSE3Index.XYZ], + recovered_absolute[..., EulerStateSE3Index.XYZ], + decimal=self.decimal, + ) + + absolute_rotation_matrices = get_rotation_matrices_from_euler_array( + absolute_poses[..., EulerStateSE3Index.EULER_ANGLES] + ) + recovered_rotation_matrices = get_rotation_matrices_from_euler_array( + recovered_absolute[..., EulerStateSE3Index.EULER_ANGLES] + ) + + np.testing.assert_array_almost_equal( + absolute_rotation_matrices, + recovered_rotation_matrices, + decimal=self.decimal, + ) + + def test_se3_points_absolute_relative_conversion_consistency(self) -> None: + """Test that converting absolute->relative->absolute returns original points""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = EulerStateSE3.from_array(self._get_random_se3_array(1)[0]) + + # Generate random absolute points + num_points = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_points = self._get_random_se3_array(num_points)[:, EulerStateSE3Index.XYZ] + + # Convert absolute -> relative -> absolute + relative_points = convert_absolute_to_relative_points_3d_array(reference, absolute_points) + recovered_absolute = convert_relative_to_absolute_points_3d_array(reference, relative_points) + + np.testing.assert_array_almost_equal(absolute_points, recovered_absolute, decimal=self.decimal) + + def test_se3_points_consistency(self) -> None: + """Test whether SE3 point and pose conversions are consistent""" + for _ in range(self.num_consistency_tests): + # Generate random reference pose + reference = EulerStateSE3.from_array(self._get_random_se3_array(1)[0]) + + # Generate random absolute points + num_poses = np.random.randint(self.min_random_poses, self.max_random_poses) + absolute_se3 = self._get_random_se3_array(num_poses) + + # Convert absolute -> relative -> absolute + relative_se3 = convert_absolute_to_relative_euler_se3_array(reference, absolute_se3) + relative_points = convert_absolute_to_relative_points_3d_array( + reference, absolute_se3[..., EulerStateSE3Index.XYZ] + ) + np.testing.assert_array_almost_equal( + relative_se3[..., EulerStateSE3Index.XYZ], relative_points, decimal=self.decimal + ) + + recovered_absolute_se3 = convert_relative_to_absolute_euler_se3_array(reference, relative_se3) + absolute_points = convert_relative_to_absolute_points_3d_array(reference, relative_points) + np.testing.assert_array_almost_equal( + recovered_absolute_se3[..., EulerStateSE3Index.XYZ], absolute_points, decimal=self.decimal + ) + + def test_se2_se3_translation_along_body_consistency(self) -> None: + """Test that SE2 and SE3 translations are consistent when SE3 has no z-component or rotation""" + for _ in range(self.num_consistency_tests): + # Create equivalent SE2 and SE3 poses (SE3 with z=0 and no rotations except yaw) + + pose_se2 = StateSE2.from_array(self._get_random_se2_array(1)[0]) + pose_se3 = EulerStateSE3.from_array( + np.array([pose_se2.x, pose_se2.y, 0.0, 0.0, 0.0, pose_se2.yaw], dtype=np.float64) + ) + + # Test translation along x-axis + dx = np.random.uniform(-5.0, 5.0) + translated_se2_x = translate_se2_along_body_frame(pose_se2, Vector2D(dx, 0.0)) + translated_se3_x = translate_euler_se3_along_x(pose_se3, dx) + + np.testing.assert_array_almost_equal( + translated_se2_x.array[StateSE2Index.XY], + translated_se3_x.array[EulerStateSE3Index.XY], + decimal=self.decimal, + ) + np.testing.assert_almost_equal( + translated_se2_x.array[StateSE2Index.YAW], + translated_se3_x.array[EulerStateSE3Index.YAW], + decimal=self.decimal, + ) + + # Test translation along y-axis + dy = np.random.uniform(-5.0, 5.0) + translated_se2_y = translate_se2_along_body_frame(pose_se2, Vector2D(0.0, dy)) + translated_se3_y = translate_euler_se3_along_y(pose_se3, dy) + + np.testing.assert_array_almost_equal( + translated_se2_y.array[StateSE2Index.XY], + translated_se3_y.array[EulerStateSE3Index.XY], + decimal=self.decimal, + ) + np.testing.assert_almost_equal( + translated_se2_y.array[StateSE2Index.YAW], + translated_se3_y.array[EulerStateSE3Index.YAW], + decimal=self.decimal, + ) + + # Test translation along x- and y-axis + dx = np.random.uniform(-5.0, 5.0) + dy = np.random.uniform(-5.0, 5.0) + translated_se2_xy = translate_se2_along_body_frame(pose_se2, Vector2D(dx, dy)) + translated_se3_xy = translate_euler_se3_along_body_frame(pose_se3, Vector3D(dx, dy, 0.0)) + np.testing.assert_array_almost_equal( + translated_se2_xy.array[StateSE2Index.XY], + translated_se3_xy.array[EulerStateSE3Index.XY], + decimal=self.decimal, + ) + np.testing.assert_almost_equal( + translated_se2_xy.array[StateSE2Index.YAW], + translated_se3_xy.array[EulerStateSE3Index.YAW], + decimal=self.decimal, + ) + + def test_se2_se3_point_conversion_consistency(self) -> None: + """Test that SE2 and SE3 point conversions are consistent for 2D points embedded in 3D""" + for _ in range(self.num_consistency_tests): + # Create equivalent SE2 and SE3 reference poses + x = np.random.uniform(-10.0, 10.0) + y = np.random.uniform(-10.0, 10.0) + yaw = np.random.uniform(-np.pi, np.pi) + + reference_se2 = StateSE2.from_array(np.array([x, y, yaw], dtype=np.float64)) + reference_se3 = EulerStateSE3.from_array(np.array([x, y, 0.0, 0.0, 0.0, yaw], dtype=np.float64)) + + # Generate 2D points and embed them in 3D with z=0 + num_points = np.random.randint(1, 8) + points_2d = np.random.uniform(-20.0, 20.0, (num_points, len(Point2DIndex))) + points_3d = np.column_stack([points_2d, np.zeros(num_points)]) + + # Convert using SE2 functions + relative_2d = convert_absolute_to_relative_point_2d_array(reference_se2, points_2d) + absolute_2d_recovered = convert_relative_to_absolute_point_2d_array(reference_se2, relative_2d) + + # Convert using SE3 functions + relative_3d = convert_absolute_to_relative_points_3d_array(reference_se3, points_3d) + absolute_3d_recovered = convert_relative_to_absolute_points_3d_array(reference_se3, relative_3d) + + # Check that SE2 and SE3 conversions are consistent for the x,y components + np.testing.assert_array_almost_equal(relative_2d, relative_3d[:, Point3DIndex.XY], decimal=self.decimal) + np.testing.assert_array_almost_equal( + absolute_2d_recovered, absolute_3d_recovered[:, Point3DIndex.XY], decimal=self.decimal + ) + + # Check that z-components remain zero + np.testing.assert_array_almost_equal( + relative_3d[:, Point3DIndex.Z], np.zeros(num_points), decimal=self.decimal + ) + np.testing.assert_array_almost_equal( + absolute_3d_recovered[:, Point3DIndex.Z], np.zeros(num_points), decimal=self.decimal + ) + + def test_se2_se3_pose_conversion_consistency(self) -> None: + """Test that SE2 and SE3 pose conversions are consistent for 2D points embedded in 3D""" + for _ in range(self.num_consistency_tests): + # Create equivalent SE2 and SE3 reference poses + x = np.random.uniform(-10.0, 10.0) + y = np.random.uniform(-10.0, 10.0) + yaw = np.random.uniform(-np.pi, np.pi) + + reference_se2 = StateSE2.from_array(np.array([x, y, yaw], dtype=np.float64)) + reference_se3 = EulerStateSE3.from_array(np.array([x, y, 0.0, 0.0, 0.0, yaw], dtype=np.float64)) + + # Generate 2D poses and embed them in 3D with z=0 and zero roll/pitch + num_poses = np.random.randint(1, 8) + pose_2d = self._get_random_se2_array(num_poses) + pose_3d = np.zeros((num_poses, len(EulerStateSE3Index)), dtype=np.float64) + pose_3d[:, EulerStateSE3Index.XY] = pose_2d[:, StateSE2Index.XY] + pose_3d[:, EulerStateSE3Index.YAW] = pose_2d[:, StateSE2Index.YAW] + + # Convert using SE2 functions + relative_se2 = convert_absolute_to_relative_se2_array(reference_se2, pose_2d) + absolute_se2_recovered = convert_relative_to_absolute_se2_array(reference_se2, relative_se2) + + # Convert using SE3 functions + relative_se3 = convert_absolute_to_relative_euler_se3_array(reference_se3, pose_3d) + absolute_se3_recovered = convert_relative_to_absolute_euler_se3_array(reference_se3, relative_se3) + + # Check that SE2 and SE3 conversions are consistent for the x,y components + np.testing.assert_array_almost_equal( + relative_se2[:, StateSE2Index.XY], relative_se3[:, EulerStateSE3Index.XY], decimal=self.decimal + ) + np.testing.assert_array_almost_equal( + absolute_se2_recovered[:, StateSE2Index.XY], + absolute_se3_recovered[:, EulerStateSE3Index.XY], + decimal=self.decimal, + ) + # Check that SE2 and SE3 conversions are consistent for the yaw component + np.testing.assert_array_almost_equal( + relative_se2[:, StateSE2Index.YAW], relative_se3[:, EulerStateSE3Index.YAW], decimal=self.decimal + ) + np.testing.assert_array_almost_equal( + absolute_se2_recovered[:, StateSE2Index.YAW], + absolute_se3_recovered[:, EulerStateSE3Index.YAW], + decimal=self.decimal, + ) + + # Check that z-components remain zero + np.testing.assert_array_almost_equal( + relative_se3[:, Point3DIndex.Z], np.zeros(num_poses), decimal=self.decimal + ) + np.testing.assert_array_almost_equal( + absolute_se3_recovered[:, Point3DIndex.Z], np.zeros(num_poses), decimal=self.decimal + ) + + def test_se2_array_translation_consistency(self) -> None: + """Test that SE2 array translation is consistent with single pose translation""" + for _ in range(self.num_consistency_tests): + # Generate random poses + num_poses = np.random.randint(self.min_random_poses, self.max_random_poses) + poses_array = self._get_random_se2_array(num_poses) + + # Generate random translation + dx = np.random.uniform(-5.0, 5.0) + dy = np.random.uniform(-5.0, 5.0) + translation = Vector2D(dx, dy) + + # Translate using array function + result_array = translate_se2_array_along_body_frame(poses_array, translation) + + # Translate each pose individually + result_individual = np.zeros_like(poses_array) + for i in range(num_poses): + pose = StateSE2.from_array(poses_array[i]) + translated = translate_se2_along_body_frame(pose, translation) + result_individual[i] = translated.array + + np.testing.assert_array_almost_equal(result_array, result_individual, decimal=self.decimal) + + def test_transform_empty_arrays(self) -> None: + """Test that transform functions handle empty arrays correctly""" + reference_se2 = StateSE2.from_array(np.array([1.0, 2.0, np.pi / 4], dtype=np.float64)) + reference_se3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.1, 0.2, 0.3], dtype=np.float64)) + + # Test SE2 empty arrays + empty_se2_poses = np.array([], dtype=np.float64).reshape(0, len(StateSE2Index)) + empty_2d_points = np.array([], dtype=np.float64).reshape(0, len(Point2DIndex)) + + result_se2_poses = convert_absolute_to_relative_se2_array(reference_se2, empty_se2_poses) + result_2d_points = convert_absolute_to_relative_point_2d_array(reference_se2, empty_2d_points) + + self.assertEqual(result_se2_poses.shape, (0, len(StateSE2Index))) + self.assertEqual(result_2d_points.shape, (0, len(Point2DIndex))) + + # Test SE3 empty arrays + empty_se3_poses = np.array([], dtype=np.float64).reshape(0, len(EulerStateSE3Index)) + empty_3d_points = np.array([], dtype=np.float64).reshape(0, len(Point3DIndex)) + + result_se3_poses = convert_absolute_to_relative_euler_se3_array(reference_se3, empty_se3_poses) + result_3d_points = convert_absolute_to_relative_points_3d_array(reference_se3, empty_3d_points) + + self.assertEqual(result_se3_poses.shape, (0, len(EulerStateSE3Index))) + self.assertEqual(result_3d_points.shape, (0, len(Point3DIndex))) + + def test_transform_identity_operations(self) -> None: + """Test that transforms with identity reference frames work correctly""" + # Identity SE2 pose + identity_se2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + identity_se3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + + for _ in range(self.num_consistency_tests): + # Test SE2 identity transforms + num_poses = np.random.randint(1, 10) + se2_poses = self._get_random_se2_array(num_poses) + se2_points = se2_poses[:, StateSE2Index.XY] + + relative_se2_poses = convert_absolute_to_relative_se2_array(identity_se2, se2_poses) + relative_se2_points = convert_absolute_to_relative_point_2d_array(identity_se2, se2_points) + + np.testing.assert_array_almost_equal(se2_poses, relative_se2_poses, decimal=self.decimal) + np.testing.assert_array_almost_equal(se2_points, relative_se2_points, decimal=self.decimal) + + # Test SE3 identity transforms + se3_poses = self._get_random_se3_array(num_poses) + se3_points = se3_poses[:, EulerStateSE3Index.XYZ] + + relative_se3_poses = convert_absolute_to_relative_euler_se3_array(identity_se3, se3_poses) + relative_se3_points = convert_absolute_to_relative_points_3d_array(identity_se3, se3_points) + + np.testing.assert_array_almost_equal( + se3_poses[..., EulerStateSE3Index.EULER_ANGLES], + relative_se3_poses[..., EulerStateSE3Index.EULER_ANGLES], + decimal=self.decimal, + ) + np.testing.assert_array_almost_equal(se3_points, relative_se3_points, decimal=self.decimal) + + def test_transform_large_rotations(self) -> None: + """Test transforms with large rotation angles beyond [-π, π]""" + for _ in range(self.num_consistency_tests): + # Create poses with large rotation angles + large_yaw_se2 = np.random.uniform(-4 * np.pi, 4 * np.pi) + large_euler_se3 = np.random.uniform(-4 * np.pi, 4 * np.pi, 3) + + reference_se2 = StateSE2.from_array(np.array([0.0, 0.0, large_yaw_se2], dtype=np.float64)) + reference_se3 = EulerStateSE3.from_array( + np.array([0.0, 0.0, 0.0, large_euler_se3[0], large_euler_se3[1], large_euler_se3[2]], dtype=np.float64) + ) + + # Generate test poses/points + test_se2_poses = self._get_random_se2_array(5) + test_se3_poses = self._get_random_se3_array(5) + test_2d_points = test_se2_poses[:, StateSE2Index.XY] + test_3d_points = test_se3_poses[:, EulerStateSE3Index.XYZ] + + # Test round-trip conversions should still work + relative_se2 = convert_absolute_to_relative_se2_array(reference_se2, test_se2_poses) + recovered_se2 = convert_relative_to_absolute_se2_array(reference_se2, relative_se2) + + relative_se3 = convert_absolute_to_relative_euler_se3_array(reference_se3, test_se3_poses) + recovered_se3 = convert_relative_to_absolute_euler_se3_array(reference_se3, relative_se3) + + relative_2d_points = convert_absolute_to_relative_point_2d_array(reference_se2, test_2d_points) + recovered_2d_points = convert_relative_to_absolute_point_2d_array(reference_se2, relative_2d_points) + + relative_3d_points = convert_absolute_to_relative_points_3d_array(reference_se3, test_3d_points) + recovered_3d_points = convert_relative_to_absolute_points_3d_array(reference_se3, relative_3d_points) + + # Check consistency (allowing for angle wrapping) + np.testing.assert_array_almost_equal( + test_se2_poses[:, StateSE2Index.XY], + recovered_se2[:, StateSE2Index.XY], + decimal=self.decimal, + ) + np.testing.assert_array_almost_equal( + test_se3_poses[:, EulerStateSE3Index.XYZ], + recovered_se3[:, EulerStateSE3Index.XYZ], + decimal=self.decimal, + ) + np.testing.assert_array_almost_equal(test_2d_points, recovered_2d_points, decimal=self.decimal) + np.testing.assert_array_almost_equal(test_3d_points, recovered_3d_points, decimal=self.decimal) + + +if __name__ == "__main__": + unittest.main() diff --git a/tests/unit/geometry/transform/test_transform_euler_se3.py b/tests/unit/geometry/transform/test_transform_euler_se3.py new file mode 100644 index 00000000..8b146fd1 --- /dev/null +++ b/tests/unit/geometry/transform/test_transform_euler_se3.py @@ -0,0 +1,334 @@ +import unittest + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import EulerStateSE3, Vector3D +from py123d.geometry.transform.transform_euler_se3 import ( + convert_absolute_to_relative_euler_se3_array, + convert_absolute_to_relative_points_3d_array, + convert_relative_to_absolute_euler_se3_array, + convert_relative_to_absolute_points_3d_array, + translate_euler_se3_along_body_frame, + translate_euler_se3_along_x, + translate_euler_se3_along_y, + translate_euler_se3_along_z, +) + + +class TestTransformEulerSE3(unittest.TestCase): + + def setUp(self): + self.decimal = 6 # Decimal places for np.testing.assert_array_almost_equal + self.num_consistency_tests = 10 # Number of random test cases for consistency checks + + def test_translate_se3_along_x(self) -> None: + """Tests translating a SE3 state along the body frame forward direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 1.0 + result: EulerStateSE3 = translate_euler_se3_along_x(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_x_negative(self) -> None: + """Tests translating a SE3 state along the body frame backward direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = -0.5 + result: EulerStateSE3 = translate_euler_se3_along_x(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.5, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_x_with_rotation(self) -> None: + """Tests translating a SE3 state along the body frame forward direction with yaw rotation.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64)) + distance: float = 2.5 + result: EulerStateSE3 = translate_euler_se3_along_x(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array( + np.array([0.0, 2.5, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_y(self) -> None: + """Tests translating a SE3 state along the body frame lateral direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 1.0 + result: EulerStateSE3 = translate_euler_se3_along_y(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 1.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_y_with_existing_position(self) -> None: + """Tests translating a SE3 state along the body frame lateral direction with existing position.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 2.5 + result: EulerStateSE3 = translate_euler_se3_along_y(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 4.5, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_y_negative(self) -> None: + """Tests translating a SE3 state along the body frame lateral direction in the negative direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = -1.0 + result: EulerStateSE3 = translate_euler_se3_along_y(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_y_with_rotation(self) -> None: + """Tests translating a SE3 state along the body frame lateral direction with roll rotation.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, np.pi / 2, 0.0, 0.0], dtype=np.float64)) + distance: float = -1.0 + result: EulerStateSE3 = translate_euler_se3_along_y(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array( + np.array([1.0, 2.0, 2.0, np.pi / 2, 0.0, 0.0], dtype=np.float64) + ) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_z(self) -> None: + """Tests translating a SE3 state along the body frame vertical direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 1.0 + result: EulerStateSE3 = translate_euler_se3_along_z(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_z_large_distance(self) -> None: + """Tests translating a SE3 state along the body frame vertical direction with a large distance.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 5.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 10.0 + result: EulerStateSE3 = translate_euler_se3_along_z(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 15.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_z_negative(self) -> None: + """Tests translating a SE3 state along the body frame vertical direction in the negative direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 5.0, 0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = -2.0 + result: EulerStateSE3 = translate_euler_se3_along_z(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_z_with_rotation(self) -> None: + """Tests translating a SE3 state along the body frame vertical direction with pitch rotation.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, np.pi / 2, 0.0], dtype=np.float64)) + distance: float = 2.0 + result: EulerStateSE3 = translate_euler_se3_along_z(pose, distance) + expected: EulerStateSE3 = EulerStateSE3.from_array( + np.array([3.0, 2.0, 3.0, 0.0, np.pi / 2, 0.0], dtype=np.float64) + ) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_body_frame(self) -> None: + """Tests translating a SE3 state along the body frame forward direction.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + translation: Vector3D = Vector3D.from_array(np.array([1.0, 0.0, 0.0], dtype=np.float64)) + result: EulerStateSE3 = translate_euler_se3_along_body_frame(pose, translation) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_body_frame_multiple_axes(self) -> None: + """Tests translating a SE3 state along the body frame in multiple axes.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + translation: Vector3D = Vector3D.from_array(np.array([0.5, -1.0, 2.0], dtype=np.float64)) + result: EulerStateSE3 = translate_euler_se3_along_body_frame(pose, translation) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.5, 1.0, 5.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_body_frame_zero_translation(self) -> None: + """Tests translating a SE3 state along the body frame with zero translation.""" + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + translation: Vector3D = Vector3D.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + result: EulerStateSE3 = translate_euler_se3_along_body_frame(pose, translation) + expected: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 2.0, 3.0, 0.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array) + + def test_translate_se3_along_body_frame_with_rotation(self) -> None: + """Tests translating a SE3 state along the body frame forward direction with yaw rotation.""" + # Rotate 90 degrees around z-axis, then translate 1 unit along body x-axis + pose: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64)) + translation: Vector3D = Vector3D.from_array(np.array([1.0, 0.0, 0.0], dtype=np.float64)) + result: EulerStateSE3 = translate_euler_se3_along_body_frame(pose, translation) + # Should move in +Y direction in world frame + expected: EulerStateSE3 = EulerStateSE3.from_array( + np.array([0.0, 1.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se3_along_body_frame_consistency(self) -> None: + """Tests consistency between translate_se3_along_body_frame and axis-specific translation functions.""" + + for _ in range(self.num_consistency_tests): + # Generate random parameters + x_distance: float = np.random.uniform(-10.0, 10.0) + y_distance: float = np.random.uniform(-10.0, 10.0) + z_distance: float = np.random.uniform(-10.0, 10.0) + + start_x: float = np.random.uniform(-5.0, 5.0) + start_y: float = np.random.uniform(-5.0, 5.0) + start_z: float = np.random.uniform(-5.0, 5.0) + + start_roll: float = np.random.uniform(-np.pi, np.pi) + start_pitch: float = np.random.uniform(-np.pi, np.pi) + start_yaw: float = np.random.uniform(-np.pi, np.pi) + + original_pose: EulerStateSE3 = EulerStateSE3.from_array( + np.array( + [ + start_x, + start_y, + start_z, + start_roll, + start_pitch, + start_yaw, + ], + dtype=np.float64, + ) + ) + + # x-axis translation + translation_x: Vector3D = Vector3D.from_array(np.array([x_distance, 0.0, 0.0], dtype=np.float64)) + result_body_frame_x: EulerStateSE3 = translate_euler_se3_along_body_frame(original_pose, translation_x) + result_axis_x: EulerStateSE3 = translate_euler_se3_along_x(original_pose, x_distance) + np.testing.assert_array_almost_equal(result_body_frame_x.array, result_axis_x.array, decimal=self.decimal) + + # y-axis translation + translation_y: Vector3D = Vector3D.from_array(np.array([0.0, y_distance, 0.0], dtype=np.float64)) + result_body_frame_y: EulerStateSE3 = translate_euler_se3_along_body_frame(original_pose, translation_y) + result_axis_y: EulerStateSE3 = translate_euler_se3_along_y(original_pose, y_distance) + np.testing.assert_array_almost_equal(result_body_frame_y.array, result_axis_y.array, decimal=self.decimal) + + # z-axis translation + translation_z: Vector3D = Vector3D.from_array(np.array([0.0, 0.0, z_distance], dtype=np.float64)) + result_body_frame_z: EulerStateSE3 = translate_euler_se3_along_body_frame(original_pose, translation_z) + result_axis_z: EulerStateSE3 = translate_euler_se3_along_z(original_pose, z_distance) + np.testing.assert_array_almost_equal(result_body_frame_z.array, result_axis_z.array, decimal=self.decimal) + + # all axes translation + translation_all: Vector3D = Vector3D.from_array( + np.array([x_distance, y_distance, z_distance], dtype=np.float64) + ) + result_body_frame_all: EulerStateSE3 = translate_euler_se3_along_body_frame(original_pose, translation_all) + intermediate_pose: EulerStateSE3 = translate_euler_se3_along_x(original_pose, x_distance) + intermediate_pose = translate_euler_se3_along_y(intermediate_pose, y_distance) + result_axis_all: EulerStateSE3 = translate_euler_se3_along_z(intermediate_pose, z_distance) + np.testing.assert_array_almost_equal( + result_body_frame_all.array, result_axis_all.array, decimal=self.decimal + ) + + def test_convert_absolute_to_relative_se3_array(self) -> None: + """Tests converting absolute SE3 poses to relative SE3 poses.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + absolute_poses: npt.NDArray[np.float64] = np.array( + [ + [2.0, 2.0, 2.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + ], + dtype=np.float64, + ) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_euler_se3_array(reference, absolute_poses) + expected: npt.NDArray[np.float64] = np.array( + [ + [1.0, 1.0, 1.0, 0.0, 0.0, 0.0], + [-1.0, 0.0, -1.0, 0.0, 0.0, 0.0], + ], + dtype=np.float64, + ) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_absolute_to_relative_se3_array_single_pose(self) -> None: + """Tests converting a single absolute SE3 pose to a relative SE3 pose.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + absolute_poses: npt.NDArray[np.float64] = np.array([[1.0, 2.0, 3.0, 0.0, 0.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_euler_se3_array(reference, absolute_poses) + expected: npt.NDArray[np.float64] = np.array([[1.0, 2.0, 3.0, 0.0, 0.0, 0.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_absolute_to_relative_se3_array_with_rotation(self) -> None: + """Tests converting absolute SE3 poses to relative SE3 poses with 90 degree yaw rotation.""" + reference: EulerStateSE3 = EulerStateSE3.from_array( + np.array([0.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + absolute_poses: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_euler_se3_array(reference, absolute_poses) + expected: npt.NDArray[np.float64] = np.array([[0.0, -1.0, 0.0, 0.0, 0.0, -np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_se3_array(self) -> None: + """Tests converting relative SE3 poses to absolute SE3 poses.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + relative_poses: npt.NDArray[np.float64] = np.array( + [ + [1.0, 1.0, 1.0, 0.0, 0.0, 0.0], + [-1.0, 0.0, -1.0, 0.0, 0.0, 0.0], + ], + dtype=np.float64, + ) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_euler_se3_array(reference, relative_poses) + expected: npt.NDArray[np.float64] = np.array( + [ + [2.0, 2.0, 2.0, 0.0, 0.0, 0.0], + [0.0, 1.0, 0.0, 0.0, 0.0, 0.0], + ], + dtype=np.float64, + ) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_relative_to_absolute_se3_array_with_rotation(self) -> None: + """Tests converting relative SE3 poses to absolute SE3 poses with 90 degree yaw rotation.""" + reference: EulerStateSE3 = EulerStateSE3.from_array( + np.array([1.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + relative_poses: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0, 0.0, 0.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_euler_se3_array(reference, relative_poses) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 0.0, 0.0, 0.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_points_3d_array(self) -> None: + """Tests converting absolute 3D points to relative 3D points.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + absolute_points: npt.NDArray[np.float64] = np.array([[2.0, 2.0, 2.0], [0.0, 1.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_points_3d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 1.0], [-1.0, 0.0, -1.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_absolute_to_relative_points_3d_array_origin_reference(self) -> None: + """Tests converting absolute 3D points to relative 3D points with origin reference.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([0.0, 0.0, 0.0, 0.0, 0.0, 0.0], dtype=np.float64)) + absolute_points: npt.NDArray[np.float64] = np.array([[1.0, 2.0, 3.0], [-1.0, -2.0, -3.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_points_3d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 2.0, 3.0], [-1.0, -2.0, -3.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_absolute_to_relative_points_3d_array_with_rotation(self) -> None: + """Tests converting absolute 3D points to relative 3D points with 90 degree yaw rotation.""" + reference: EulerStateSE3 = EulerStateSE3.from_array( + np.array([0.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + absolute_points: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 1.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_points_3d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([[0.0, -1.0, 0.0], [1.0, 0.0, 1.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_points_3d_array(self) -> None: + """Tests converting relative 3D points to absolute 3D points.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + relative_points: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 1.0], [-1.0, 0.0, -1.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_points_3d_array(reference, relative_points) + expected: npt.NDArray[np.float64] = np.array([[2.0, 2.0, 2.0], [0.0, 1.0, 0.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_relative_to_absolute_points_3d_array_empty(self) -> None: + """Tests converting an empty array of relative 3D points to absolute 3D points.""" + reference: EulerStateSE3 = EulerStateSE3.from_array(np.array([1.0, 1.0, 1.0, 0.0, 0.0, 0.0], dtype=np.float64)) + relative_points: npt.NDArray[np.float64] = np.array([], dtype=np.float64).reshape(0, 3) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_points_3d_array(reference, relative_points) + expected: npt.NDArray[np.float64] = np.array([], dtype=np.float64).reshape(0, 3) + np.testing.assert_array_almost_equal(result, expected) + + def test_convert_relative_to_absolute_points_3d_array_with_rotation(self) -> None: + """Tests converting relative 3D points to absolute 3D points with 90 degree yaw rotation.""" + reference: EulerStateSE3 = EulerStateSE3.from_array( + np.array([1.0, 0.0, 0.0, 0.0, 0.0, np.pi / 2], dtype=np.float64) + ) + relative_points: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, 1.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_points_3d_array(reference, relative_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 0.0], [0.0, 0.0, 1.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) diff --git a/tests/unit/geometry/transform/test_transform_se2.py b/tests/unit/geometry/transform/test_transform_se2.py new file mode 100644 index 00000000..60af633e --- /dev/null +++ b/tests/unit/geometry/transform/test_transform_se2.py @@ -0,0 +1,219 @@ +import unittest + +import numpy as np +import numpy.typing as npt + +from py123d.geometry import StateSE2, Vector2D +from py123d.geometry.transform.transform_se2 import ( + convert_absolute_to_relative_point_2d_array, + convert_absolute_to_relative_se2_array, + convert_relative_to_absolute_point_2d_array, + convert_relative_to_absolute_se2_array, + translate_se2_along_body_frame, + translate_se2_along_x, + translate_se2_along_y, + translate_se2_array_along_body_frame, +) + + +class TestTransformSE2(unittest.TestCase): + + def setUp(self): + self.decimal = 6 # Decimal places for np.testing.assert_array_almost_equal + + def test_translate_se2_along_x(self) -> None: + """Tests translating a SE2 state along the X-axis.""" + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 1.0 + result: StateSE2 = translate_se2_along_x(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([1.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_x_negative(self) -> None: + """Tests translating a SE2 state along the X-axis in the negative direction.""" + pose: StateSE2 = StateSE2.from_array(np.array([1.0, 2.0, 0.0], dtype=np.float64)) + distance: float = -0.5 + result: StateSE2 = translate_se2_along_x(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([0.5, 2.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_x_with_rotation(self) -> None: + """Tests translating a SE2 state along the X-axis with 90 degree rotation.""" + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, np.pi / 2], dtype=np.float64)) + distance: float = 1.0 + result: StateSE2 = translate_se2_along_x(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([0.0, 1.0, np.pi / 2], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_y(self) -> None: + """Tests translating a SE2 state along the Y-axis.""" + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + distance: float = 1.0 + result: StateSE2 = translate_se2_along_y(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([0.0, 1.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_y_negative(self) -> None: + """Tests translating a SE2 state along the Y-axis in the negative direction.""" + pose: StateSE2 = StateSE2.from_array(np.array([1.0, 2.0, 0.0], dtype=np.float64)) + distance: float = -1.5 + result: StateSE2 = translate_se2_along_y(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([1.0, 0.5, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_y_with_rotation(self) -> None: + """Tests translating a SE2 state along the Y-axis with -90 degree rotation.""" + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, -np.pi / 2], dtype=np.float64)) + distance: float = 2.0 + result: StateSE2 = translate_se2_along_y(pose, distance) + expected: StateSE2 = StateSE2.from_array(np.array([2.0, 0.0, -np.pi / 2], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_body_frame_forward(self) -> None: + """Tests translating a SE2 state along the body frame forward direction, with 90 degree rotation.""" + # Move 1 unit forward in the direction of yaw (pi/2 = 90 degrees = +Y direction) + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, np.pi / 2], dtype=np.float64)) + vector: Vector2D = Vector2D(1.0, 0.0) + result: StateSE2 = translate_se2_along_body_frame(pose, vector) + expected: StateSE2 = StateSE2.from_array(np.array([0.0, 1.0, np.pi / 2], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_body_frame_backward(self) -> None: + """Tests translating a SE2 state along the body frame backward direction.""" + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + vector: Vector2D = Vector2D(-1.0, 0.0) + result: StateSE2 = translate_se2_along_body_frame(pose, vector) + expected: StateSE2 = StateSE2.from_array(np.array([-1.0, 0.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_body_frame_diagonal(self) -> None: + """Tests translating a SE2 state along the body frame diagonal direction.""" + pose: StateSE2 = StateSE2.from_array(np.array([1.0, 0.0, np.deg2rad(45)], dtype=np.float64)) + vector: Vector2D = Vector2D(1.0, 0.0) + result: StateSE2 = translate_se2_along_body_frame(pose, vector) + expected: StateSE2 = StateSE2.from_array( + np.array([1.0 + np.sqrt(2.0) / 2, 0.0 + np.sqrt(2.0) / 2, np.deg2rad(45)], dtype=np.float64) + ) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_body_frame_lateral(self) -> None: + """Tests translating a SE2 state along the body frame lateral direction.""" + # Move 1 unit to the right (positive y in body frame) + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + vector: Vector2D = Vector2D(0.0, 1.0) + result: StateSE2 = translate_se2_along_body_frame(pose, vector) + expected: StateSE2 = StateSE2.from_array(np.array([0.0, 1.0, 0.0], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_along_body_frame_lateral_with_rotation(self) -> None: + """Tests translating a SE2 state along the body frame lateral direction with 90 degree rotation.""" + # Move 1 unit to the right when facing 90 degrees + pose: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, np.pi / 2], dtype=np.float64)) + vector: Vector2D = Vector2D(0.0, 1.0) + result: StateSE2 = translate_se2_along_body_frame(pose, vector) + expected: StateSE2 = StateSE2.from_array(np.array([-1.0, 0.0, np.pi / 2], dtype=np.float64)) + np.testing.assert_array_almost_equal(result.array, expected.array, decimal=self.decimal) + + def test_translate_se2_array_along_body_frame_single_distance(self) -> None: + """Tests translating a SE2 state array along the body frame forward direction.""" + poses: npt.NDArray[np.float64] = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, np.pi / 2]], dtype=np.float64) + distance: Vector2D = Vector2D(1.0, 0.0) + result: npt.NDArray[np.float64] = translate_se2_array_along_body_frame(poses, distance) + expected: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0], [0.0, 1.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_translate_se2_array_along_body_frame_multiple_distances(self) -> None: + """Tests translating a SE2 state array along the body frame forward direction with different distances.""" + poses: npt.NDArray[np.float64] = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, np.pi]], dtype=np.float64) + distance: Vector2D = Vector2D(2.0, 0.0) + result: npt.NDArray[np.float64] = translate_se2_array_along_body_frame(poses, distance) + expected: npt.NDArray[np.float64] = np.array([[2.0, 0.0, 0.0], [-2.0, 0.0, np.pi]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_translate_se2_array_along_body_frame_lateral(self) -> None: + """Tests translating a SE2 state array along the body frame lateral direction with 90 degree rotation.""" + poses: npt.NDArray[np.float64] = np.array([[0.0, 0.0, 0.0], [0.0, 0.0, np.pi / 2]], dtype=np.float64) + distance: Vector2D = Vector2D(0.0, 1.0) + result: npt.NDArray[np.float64] = translate_se2_array_along_body_frame(poses, distance) + expected: npt.NDArray[np.float64] = np.array([[0.0, 1.0, 0.0], [-1.0, 0.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_se2_array(self) -> None: + """Tests converting absolute SE2 poses to relative SE2 poses.""" + origin: StateSE2 = StateSE2.from_array(np.array([1.0, 1.0, 0.0], dtype=np.float64)) + absolute_poses: npt.NDArray[np.float64] = np.array([[2.0, 2.0, 0.0], [0.0, 1.0, np.pi / 2]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_se2_array(origin, absolute_poses) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 0.0], [-1.0, 0.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_se2_array_with_rotation(self) -> None: + """Tests converting absolute SE2 poses to relative SE2 poses with 90 degree rotation.""" + reference: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, np.pi / 2], dtype=np.float64)) + absolute_poses: npt.NDArray[np.float64] = np.array([[1.0, 0.0, np.pi / 2]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_se2_array(reference, absolute_poses) + expected: npt.NDArray[np.float64] = np.array([[0.0, -1.0, 0.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_se2_array_identity(self) -> None: + """Tests converting absolute SE2 poses to relative SE2 poses with identity transformation.""" + reference: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, 0.0], dtype=np.float64)) + absolute_poses: npt.NDArray[np.float64] = np.array([[1.0, 2.0, np.pi / 4]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_se2_array(reference, absolute_poses) + expected: npt.NDArray[np.float64] = np.array([[1.0, 2.0, np.pi / 4]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_se2_array(self) -> None: + """Tests converting relative SE2 poses to absolute SE2 poses.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 1.0, 0.0], dtype=np.float64)) + relative_poses: npt.NDArray[np.float64] = np.array([[1.0, 1.0, 0.0], [-1.0, 0.0, np.pi / 2]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_se2_array(reference, relative_poses) + expected: npt.NDArray[np.float64] = np.array([[2.0, 2.0, 0.0], [0.0, 1.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_se2_array_with_rotation(self) -> None: + """Tests converting relative SE2 poses to absolute SE2 poses with rotation.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 0.0, np.pi / 2], dtype=np.float64)) + relative_poses: npt.NDArray[np.float64] = np.array([[1.0, 0.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_se2_array(reference, relative_poses) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0, np.pi / 2]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_point_2d_array(self) -> None: + """Tests converting absolute 2D points to relative 2D points.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 1.0, 0.0], dtype=np.float64)) + absolute_points: npt.NDArray[np.float64] = np.array([[2.0, 2.0], [0.0, 1.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_point_2d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0], [-1.0, 0.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_point_2d_array_with_rotation(self) -> None: + """Tests converting absolute 2D points to relative 2D points with 90 degree rotation.""" + reference: StateSE2 = StateSE2.from_array(np.array([0.0, 0.0, np.pi / 2], dtype=np.float64)) + absolute_points: npt.NDArray[np.float64] = np.array([[0.0, 1.0], [1.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_point_2d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 0.0], [0.0, -1.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_absolute_to_relative_point_2d_array_empty(self) -> None: + """Tests converting an empty array of absolute 2D points to relative 2D points.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 1.0, 0.0], dtype=np.float64)) + absolute_points: npt.NDArray[np.float64] = np.array([], dtype=np.float64).reshape(0, 2) + result: npt.NDArray[np.float64] = convert_absolute_to_relative_point_2d_array(reference, absolute_points) + expected: npt.NDArray[np.float64] = np.array([], dtype=np.float64).reshape(0, 2) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_point_2d_array(self) -> None: + """Tests converting relative 2D points to absolute 2D points.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 1.0, 0.0], dtype=np.float64)) + relative_points: npt.NDArray[np.float64] = np.array([[1.0, 1.0], [-1.0, 0.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_point_2d_array(reference, relative_points) + expected: npt.NDArray[np.float64] = np.array([[2.0, 2.0], [0.0, 1.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) + + def test_convert_relative_to_absolute_point_2d_array_with_rotation(self) -> None: + """Tests converting relative 2D points to absolute 2D points with 90 degree rotation.""" + reference: StateSE2 = StateSE2.from_array(np.array([1.0, 0.0, np.pi / 2], dtype=np.float64)) + relative_points: npt.NDArray[np.float64] = np.array([[1.0, 0.0], [0.0, 1.0]], dtype=np.float64) + result: npt.NDArray[np.float64] = convert_relative_to_absolute_point_2d_array(reference, relative_points) + expected: npt.NDArray[np.float64] = np.array([[1.0, 1.0], [0.0, 0.0]], dtype=np.float64) + np.testing.assert_array_almost_equal(result, expected, decimal=self.decimal) diff --git a/tests/unit/geometry/transform/test_transform_se3.py b/tests/unit/geometry/transform/test_transform_se3.py new file mode 100644 index 00000000..44e86504 --- /dev/null +++ b/tests/unit/geometry/transform/test_transform_se3.py @@ -0,0 +1,306 @@ +import unittest + +import numpy as np +import numpy.typing as npt + +import py123d.geometry.transform.transform_euler_se3 as euler_transform_se3 +from py123d.geometry import EulerStateSE3, EulerStateSE3Index, Point3D, StateSE3, StateSE3Index +from py123d.geometry.transform.transform_se3 import ( + convert_absolute_to_relative_points_3d_array, + convert_absolute_to_relative_se3_array, + convert_points_3d_array_between_origins, + convert_relative_to_absolute_points_3d_array, + convert_relative_to_absolute_se3_array, + convert_se3_array_between_origins, + translate_se3_along_body_frame, + translate_se3_along_x, + translate_se3_along_y, + translate_se3_along_z, +) +from py123d.geometry.utils.rotation_utils import ( + get_quaternion_array_from_euler_array, + get_rotation_matrices_from_euler_array, + get_rotation_matrices_from_quaternion_array, +) + + +class TestTransformSE3(unittest.TestCase): + + def setUp(self): + euler_se3_a = EulerStateSE3( + x=1.0, + y=2.0, + z=3.0, + roll=np.deg2rad(90), + pitch=0.0, + yaw=0.0, + ) + euler_se3_b = EulerStateSE3( + x=1.0, + y=-2.0, + z=3.0, + roll=0.0, + pitch=np.deg2rad(90), + yaw=0.0, + ) + euler_se3_c = EulerStateSE3( + x=-1.0, + y=2.0, + z=-3.0, + roll=0.0, + pitch=0.0, + yaw=np.deg2rad(90), + ) + + quat_se3_a: StateSE3 = euler_se3_a.state_se3 + quat_se3_b: StateSE3 = euler_se3_b.state_se3 + quat_se3_c: StateSE3 = euler_se3_c.state_se3 + + self.euler_se3 = [euler_se3_a, euler_se3_b, euler_se3_c] + self.quat_se3 = [quat_se3_a, quat_se3_b, quat_se3_c] + + self.max_pose_xyz = 100.0 + + def _get_random_euler_se3_array(self, size: int) -> npt.NDArray[np.float64]: + """Generate a random SE3 poses""" + random_se3_array = np.zeros((size, len(EulerStateSE3Index)), dtype=np.float64) + random_se3_array[:, EulerStateSE3Index.XYZ] = np.random.uniform( + -self.max_pose_xyz, self.max_pose_xyz, (size, 3) + ) + random_se3_array[:, EulerStateSE3Index.YAW] = np.random.uniform(-np.pi, np.pi, size) + random_se3_array[:, EulerStateSE3Index.PITCH] = np.random.uniform(-np.pi / 2, np.pi / 2, size) + random_se3_array[:, EulerStateSE3Index.ROLL] = np.random.uniform(-np.pi, np.pi, size) + + return random_se3_array + + def _convert_euler_se3_array_to_quat_se3_array( + self, euler_se3_array: npt.NDArray[np.float64] + ) -> npt.NDArray[np.float64]: + """Convert an array of SE3 poses from Euler angles to Quaternion representation""" + quat_se3_array = np.zeros((euler_se3_array.shape[0], len(StateSE3Index)), dtype=np.float64) + quat_se3_array[:, StateSE3Index.XYZ] = euler_se3_array[:, EulerStateSE3Index.XYZ] + quat_se3_array[:, StateSE3Index.QUATERNION] = get_quaternion_array_from_euler_array( + euler_se3_array[:, EulerStateSE3Index.EULER_ANGLES] + ) + return quat_se3_array + + def _get_random_quat_se3_array(self, size: int) -> npt.NDArray[np.float64]: + """Generate a random SE3 poses in Quaternion representation""" + random_euler_se3_array = self._get_random_euler_se3_array(size) + random_quat_se3_array = self._convert_euler_se3_array_to_quat_se3_array(random_euler_se3_array) + return random_quat_se3_array + + def test_sanity(self): + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + np.testing.assert_allclose( + quat_se3.point_3d.array, + euler_se3.point_3d.array, + atol=1e-6, + ) + np.testing.assert_allclose( + quat_se3.rotation_matrix, + euler_se3.rotation_matrix, + atol=1e-6, + ) + + def test_random_sanity(self): + for _ in range(10): + random_euler_se3_array = self._get_random_euler_se3_array(np.random.randint(1, 10)) + random_quat_se3_array = self._convert_euler_se3_array_to_quat_se3_array(random_euler_se3_array) + + np.testing.assert_allclose( + random_euler_se3_array[:, EulerStateSE3Index.XYZ], + random_quat_se3_array[:, StateSE3Index.XYZ], + atol=1e-6, + ) + quat_rotation_matrices = get_rotation_matrices_from_quaternion_array( + random_quat_se3_array[:, StateSE3Index.QUATERNION] + ) + euler_rotation_matrices = get_rotation_matrices_from_euler_array( + random_euler_se3_array[:, EulerStateSE3Index.EULER_ANGLES] + ) + np.testing.assert_allclose(euler_rotation_matrices, quat_rotation_matrices, atol=1e-6) + + def test_convert_absolute_to_relative_points_3d_array(self): + + random_points_3d = np.random.rand(10, 3) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + rel_points_quat = convert_absolute_to_relative_points_3d_array(quat_se3, random_points_3d) + rel_points_euler = euler_transform_se3.convert_absolute_to_relative_points_3d_array( + euler_se3, random_points_3d + ) + np.testing.assert_allclose(rel_points_quat, rel_points_euler, atol=1e-6) + + def test_convert_absolute_to_relative_se3_array(self): + + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + random_euler_se3_array = self._get_random_euler_se3_array(np.random.randint(1, 10)) + random_quat_se3_array = self._convert_euler_se3_array_to_quat_se3_array(random_euler_se3_array) + + rel_se3_quat = convert_absolute_to_relative_se3_array(quat_se3, random_quat_se3_array) + rel_se3_euler = euler_transform_se3.convert_absolute_to_relative_euler_se3_array( + euler_se3, random_euler_se3_array + ) + np.testing.assert_allclose( + rel_se3_euler[..., EulerStateSE3Index.XYZ], rel_se3_quat[..., StateSE3Index.XYZ], atol=1e-6 + ) + # We compare rotation matrices to avoid issues with quaternion sign ambiguity + quat_rotation_matrices = get_rotation_matrices_from_quaternion_array( + rel_se3_quat[..., StateSE3Index.QUATERNION] + ) + euler_rotation_matrices = get_rotation_matrices_from_euler_array( + rel_se3_euler[..., EulerStateSE3Index.EULER_ANGLES] + ) + np.testing.assert_allclose(quat_rotation_matrices, euler_rotation_matrices, atol=1e-6) + + def test_convert_relative_to_absolute_points_3d_array(self): + + random_points_3d = np.random.rand(10, 3) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + rel_points_quat = convert_relative_to_absolute_points_3d_array(quat_se3, random_points_3d) + rel_points_euler = euler_transform_se3.convert_relative_to_absolute_points_3d_array( + euler_se3, random_points_3d + ) + np.testing.assert_allclose(rel_points_quat, rel_points_euler, atol=1e-6) + + def test_convert_relative_to_absolute_se3_array(self): + + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + random_euler_se3_array = self._get_random_euler_se3_array(np.random.randint(1, 10)) + random_quat_se3_array = self._convert_euler_se3_array_to_quat_se3_array(random_euler_se3_array) + + abs_se3_quat = convert_relative_to_absolute_se3_array(quat_se3, random_quat_se3_array) + abs_se3_euler = euler_transform_se3.convert_relative_to_absolute_euler_se3_array( + euler_se3, random_euler_se3_array + ) + np.testing.assert_allclose( + abs_se3_euler[..., EulerStateSE3Index.XYZ], abs_se3_quat[..., StateSE3Index.XYZ], atol=1e-6 + ) + + # We compare rotation matrices to avoid issues with quaternion sign ambiguity + quat_rotation_matrices = get_rotation_matrices_from_quaternion_array( + abs_se3_quat[..., StateSE3Index.QUATERNION] + ) + euler_rotation_matrices = get_rotation_matrices_from_euler_array( + abs_se3_euler[..., EulerStateSE3Index.EULER_ANGLES] + ) + np.testing.assert_allclose(quat_rotation_matrices, euler_rotation_matrices, atol=1e-6) + # convert_points_3d_array_between_origins(quat_se3, random_quat_se3_array) + + def test_convert_se3_array_between_origins(self): + for _ in range(10): + random_quat_se3_array = self._get_random_quat_se3_array(np.random.randint(1, 10)) + + from_se3 = StateSE3.from_array(self._get_random_quat_se3_array(1)[0]) + to_se3 = StateSE3.from_array(self._get_random_quat_se3_array(1)[0]) + identity_se3_array = np.zeros(len(StateSE3Index), dtype=np.float64) + identity_se3_array[StateSE3Index.QW] = 1.0 + identity_se3 = StateSE3.from_array(identity_se3_array) + + # Check if consistent with absolute-relative-absolute conversion + converted_se3_quat = convert_se3_array_between_origins(from_se3, to_se3, random_quat_se3_array) + + abs_from_se3_quat = convert_relative_to_absolute_se3_array(from_se3, random_quat_se3_array) + rel_to_se3_quat = convert_absolute_to_relative_se3_array(to_se3, abs_from_se3_quat) + + np.testing.assert_allclose( + converted_se3_quat[..., StateSE3Index.XYZ], + rel_to_se3_quat[..., StateSE3Index.XYZ], + atol=1e-6, + ) + np.testing.assert_allclose( + converted_se3_quat[..., StateSE3Index.QUATERNION], + rel_to_se3_quat[..., StateSE3Index.QUATERNION], + atol=1e-6, + ) + + # Check if consistent with absolute conversion to identity origin + absolute_se3_quat = convert_se3_array_between_origins(from_se3, identity_se3, random_quat_se3_array) + np.testing.assert_allclose( + absolute_se3_quat[..., StateSE3Index.XYZ], + abs_from_se3_quat[..., StateSE3Index.XYZ], + atol=1e-6, + ) + + def test_convert_points_3d_array_between_origins(self): + random_points_3d = np.random.rand(10, 3) + for _ in range(10): + from_se3 = StateSE3.from_array(self._get_random_quat_se3_array(1)[0]) + to_se3 = StateSE3.from_array(self._get_random_quat_se3_array(1)[0]) + identity_se3_array = np.zeros(len(StateSE3Index), dtype=np.float64) + identity_se3_array[StateSE3Index.QW] = 1.0 + identity_se3 = StateSE3.from_array(identity_se3_array) + + # Check if consistent with absolute-relative-absolute conversion + converted_points_quat = convert_points_3d_array_between_origins(from_se3, to_se3, random_points_3d) + abs_from_se3_quat = convert_relative_to_absolute_points_3d_array(from_se3, random_points_3d) + rel_to_se3_quat = convert_absolute_to_relative_points_3d_array(to_se3, abs_from_se3_quat) + np.testing.assert_allclose(converted_points_quat, rel_to_se3_quat, atol=1e-6) + + # Check if consistent with se3 array conversion + random_se3_poses = np.zeros((random_points_3d.shape[0], len(StateSE3Index)), dtype=np.float64) + random_se3_poses[:, StateSE3Index.XYZ] = random_points_3d + random_se3_poses[:, StateSE3Index.QUATERNION] = np.array([1.0, 0.0, 0.0, 0.0]) # Identity rotation + converted_se3_quat_poses = convert_se3_array_between_origins(from_se3, to_se3, random_se3_poses) + np.testing.assert_allclose( + converted_se3_quat_poses[:, StateSE3Index.XYZ], + converted_points_quat, + atol=1e-6, + ) + + # Check if consistent with absolute conversion to identity origin + absolute_se3_quat = convert_points_3d_array_between_origins(from_se3, identity_se3, random_points_3d) + np.testing.assert_allclose( + absolute_se3_quat[..., StateSE3Index.XYZ], + abs_from_se3_quat[..., StateSE3Index.XYZ], + atol=1e-6, + ) + + def test_translate_se3_along_x(self): + for _ in range(10): + distance = np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + translated_quat = translate_se3_along_x(quat_se3, distance) + translated_euler = euler_transform_se3.translate_euler_se3_along_x(euler_se3, distance) + np.testing.assert_allclose(translated_quat.point_3d.array, translated_euler.point_3d.array, atol=1e-6) + np.testing.assert_allclose(translated_quat.rotation_matrix, translated_euler.rotation_matrix, atol=1e-6) + np.testing.assert_allclose(quat_se3.quaternion.array, translated_quat.quaternion.array, atol=1e-6) + + def test_translate_se3_along_y(self): + for _ in range(10): + distance = np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + translated_quat = translate_se3_along_y(quat_se3, distance) + translated_euler = euler_transform_se3.translate_euler_se3_along_y(euler_se3, distance) + np.testing.assert_allclose(translated_quat.point_3d.array, translated_euler.point_3d.array, atol=1e-6) + np.testing.assert_allclose(translated_quat.rotation_matrix, translated_euler.rotation_matrix, atol=1e-6) + np.testing.assert_allclose(quat_se3.quaternion.array, translated_quat.quaternion.array, atol=1e-6) + + def test_translate_se3_along_z(self): + for _ in range(10): + distance = np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + translated_quat = translate_se3_along_z(quat_se3, distance) + translated_euler = euler_transform_se3.translate_euler_se3_along_z(euler_se3, distance) + np.testing.assert_allclose(translated_quat.point_3d.array, translated_euler.point_3d.array, atol=1e-6) + np.testing.assert_allclose(translated_quat.rotation_matrix, translated_euler.rotation_matrix, atol=1e-6) + np.testing.assert_allclose(quat_se3.quaternion.array, translated_quat.quaternion.array, atol=1e-6) + + def test_translate_se3_along_body_frame(self): + for _ in range(10): + vector_3d = Point3D( + x=np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz), + y=np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz), + z=np.random.uniform(-self.max_pose_xyz, self.max_pose_xyz), + ) + for quat_se3, euler_se3 in zip(self.quat_se3, self.euler_se3): + translated_quat = translate_se3_along_body_frame(quat_se3, vector_3d) + translated_euler = euler_transform_se3.translate_euler_se3_along_body_frame(euler_se3, vector_3d) + np.testing.assert_allclose(translated_quat.point_3d.array, translated_euler.point_3d.array, atol=1e-6) + np.testing.assert_allclose(translated_quat.rotation_matrix, translated_euler.rotation_matrix, atol=1e-6) + np.testing.assert_allclose(quat_se3.quaternion.array, translated_quat.quaternion.array, atol=1e-6) + + +if __name__ == "__main__": + unittest.main() diff --git a/d123/training/models/sim_agent/__init__.py b/tests/unit/geometry/utils/__init__.py similarity index 100% rename from d123/training/models/sim_agent/__init__.py rename to tests/unit/geometry/utils/__init__.py diff --git a/tests/unit/geometry/utils/test_bounding_box_utils.py b/tests/unit/geometry/utils/test_bounding_box_utils.py new file mode 100644 index 00000000..3b5718ca --- /dev/null +++ b/tests/unit/geometry/utils/test_bounding_box_utils.py @@ -0,0 +1,266 @@ +import unittest + +import numpy as np +import numpy.typing as npt +import shapely + +from py123d.geometry.geometry_index import ( + BoundingBoxSE3Index, + Corners2DIndex, + Corners3DIndex, + EulerStateSE3Index, + Point2DIndex, + Point3DIndex, +) +from py123d.geometry.se import EulerStateSE3, StateSE3 +from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame +from py123d.geometry.utils.bounding_box_utils import ( + bbse2_array_to_corners_array, + bbse2_array_to_polygon_array, + bbse3_array_to_corners_array, + corners_2d_array_to_polygon_array, + get_corners_3d_factors, +) +from py123d.geometry.vector import Vector3D + + +class TestBoundingBoxUtils(unittest.TestCase): + + def setUp(self): + self._num_consistency_checks = 10 + self._max_pose_xyz = 100.0 + self._max_extent = 200.0 + + def _get_random_euler_se3_array(self, size: int) -> npt.NDArray[np.float64]: + """Generate random SE3 poses""" + random_se3_array = np.zeros((size, len(EulerStateSE3Index)), dtype=np.float64) + random_se3_array[:, EulerStateSE3Index.XYZ] = np.random.uniform( + -self._max_pose_xyz, + self._max_pose_xyz, + (size, len(Point3DIndex)), + ) + random_se3_array[:, EulerStateSE3Index.YAW] = np.random.uniform(-np.pi, np.pi, size) + random_se3_array[:, EulerStateSE3Index.PITCH] = np.random.uniform(-np.pi / 2, np.pi / 2, size) + random_se3_array[:, EulerStateSE3Index.ROLL] = np.random.uniform(-np.pi, np.pi, size) + + return random_se3_array + + def test_bbse2_array_to_corners_array_one_dim(self): + bounding_box_se2_array = np.array([1.0, 2.0, 0.0, 4.0, 2.0]) + corners_array = bbse2_array_to_corners_array(bounding_box_se2_array) + + # fill expected + expected_corners = np.zeros((4, 2), dtype=np.float64) + expected_corners[Corners2DIndex.FRONT_LEFT] = [1.0 + 2.0, 2.0 + 1.0] + expected_corners[Corners2DIndex.FRONT_RIGHT] = [1.0 + 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_RIGHT] = [1.0 - 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_LEFT] = [1.0 - 2.0, 2.0 + 1.0] + + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + def test_bbse2_array_to_corners_array_n_dim(self): + bounding_box_se2_array = np.array([1.0, 2.0, 0.0, 4.0, 2.0]) + bounding_box_se2_array = np.tile(bounding_box_se2_array, (3, 1)) + + corners_array = bbse2_array_to_corners_array(bounding_box_se2_array) + + # fill expected + expected_corners = np.zeros((4, 2), dtype=np.float64) + expected_corners[Corners2DIndex.FRONT_LEFT] = [1.0 + 2.0, 2.0 + 1.0] + expected_corners[Corners2DIndex.FRONT_RIGHT] = [1.0 + 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_RIGHT] = [1.0 - 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_LEFT] = [1.0 - 2.0, 2.0 + 1.0] + expected_corners = np.tile(expected_corners, (3, 1, 1)) + + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + def test_bbse2_array_to_corners_array_zero_dim(self): + bounding_box_se2_array = np.zeros((0, 5), dtype=np.float64) + corners_array = bbse2_array_to_corners_array(bounding_box_se2_array) + expected_corners = np.zeros((0, 4, 2), dtype=np.float64) + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + def test_bbse2_array_to_corners_array_rotation(self): + bounding_box_se2_array = np.array([1.0, 2.0, np.pi / 2, 4.0, 2.0]) + corners_array = bbse2_array_to_corners_array(bounding_box_se2_array) + + # fill expected + expected_corners = np.zeros((len(Corners2DIndex), len(Point2DIndex)), dtype=np.float64) + expected_corners[Corners2DIndex.FRONT_LEFT] = [1.0 - 1.0, 2.0 + 2.0] + expected_corners[Corners2DIndex.FRONT_RIGHT] = [1.0 + 1.0, 2.0 + 2.0] + expected_corners[Corners2DIndex.BACK_RIGHT] = [1.0 + 1.0, 2.0 - 2.0] + expected_corners[Corners2DIndex.BACK_LEFT] = [1.0 - 1.0, 2.0 - 2.0] + + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + def test_corners_2d_array_to_polygon_array_one_dim(self): + corners_array = np.array( + [ + [3.0, 3.0], + [3.0, 1.0], + [-1.0, 1.0], + [-1.0, 3.0], + ] + ) + polygon = corners_2d_array_to_polygon_array(corners_array) + + expected_polygon = shapely.geometry.Polygon(corners_array) + np.testing.assert_allclose(polygon.area, expected_polygon.area, atol=1e-6) + self.assertTrue(polygon.equals(expected_polygon)) + + def test_corners_2d_array_to_polygon_array_n_dim(self): + corners_array = np.array( + [ + [ + [3.0, 3.0], + [3.0, 1.0], + [-1.0, 1.0], + [-1.0, 3.0], + ], + [ + [4.0, 4.0], + [4.0, 2.0], + [0.0, 2.0], + [0.0, 4.0], + ], + ] + ) + polygons = corners_2d_array_to_polygon_array(corners_array) + + expected_polygon_1 = shapely.geometry.Polygon(corners_array[0]) + expected_polygon_2 = shapely.geometry.Polygon(corners_array[1]) + + np.testing.assert_allclose(polygons[0].area, expected_polygon_1.area, atol=1e-6) + self.assertTrue(polygons[0].equals(expected_polygon_1)) + + np.testing.assert_allclose(polygons[1].area, expected_polygon_2.area, atol=1e-6) + self.assertTrue(polygons[1].equals(expected_polygon_2)) + + def test_corners_2d_array_to_polygon_array_zero_dim(self): + corners_array = np.zeros((0, 4, 2), dtype=np.float64) + polygons = corners_2d_array_to_polygon_array(corners_array) + expected_polygons = np.zeros((0,), dtype=np.object_) + np.testing.assert_array_equal(polygons, expected_polygons) + + def test_bbse2_array_to_polygon_array_one_dim(self): + bounding_box_se2_array = np.array([1.0, 2.0, 0.0, 4.0, 2.0]) + polygon = bbse2_array_to_polygon_array(bounding_box_se2_array) + + expected_corners = np.zeros((4, 2), dtype=np.float64) + expected_corners[Corners2DIndex.FRONT_LEFT] = [1.0 + 2.0, 2.0 + 1.0] + expected_corners[Corners2DIndex.FRONT_RIGHT] = [1.0 + 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_RIGHT] = [1.0 - 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_LEFT] = [1.0 - 2.0, 2.0 + 1.0] + expected_polygon = shapely.geometry.Polygon(expected_corners) + + np.testing.assert_allclose(polygon.area, expected_polygon.area, atol=1e-6) + self.assertTrue(polygon.equals(expected_polygon)) + + def test_bbse2_array_to_polygon_array_n_dim(self): + bounding_box_se2_array = np.array([1.0, 2.0, 0.0, 4.0, 2.0]) + bounding_box_se2_array = np.tile(bounding_box_se2_array, (3, 1)) + + polygons = bbse2_array_to_polygon_array(bounding_box_se2_array) + + expected_corners = np.zeros((4, 2), dtype=np.float64) + expected_corners[Corners2DIndex.FRONT_LEFT] = [1.0 + 2.0, 2.0 + 1.0] + expected_corners[Corners2DIndex.FRONT_RIGHT] = [1.0 + 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_RIGHT] = [1.0 - 2.0, 2.0 - 1.0] + expected_corners[Corners2DIndex.BACK_LEFT] = [1.0 - 2.0, 2.0 + 1.0] + expected_polygon = shapely.geometry.Polygon(expected_corners) + + for polygon in polygons: + np.testing.assert_allclose(polygon.area, expected_polygon.area, atol=1e-6) + self.assertTrue(polygon.equals(expected_polygon)) + + def test_bbse2_array_to_polygon_array_zero_dim(self): + bounding_box_se2_array = np.zeros((0, 5), dtype=np.float64) + polygons = bbse2_array_to_polygon_array(bounding_box_se2_array) + expected_polygons = np.zeros((0,), dtype=np.object_) + np.testing.assert_array_equal(polygons, expected_polygons) + + def test_bbse3_array_to_corners_array_one_dim(self): + bounding_box_se3_array = np.array([1.0, 2.0, 3.0, 1.0, 0.0, 0.0, 0.0, 4.0, 2.0, 6.0]) + corners_array = bbse3_array_to_corners_array(bounding_box_se3_array) + + # fill expected + expected_corners = np.zeros((8, 3), dtype=np.float64) + expected_corners[Corners3DIndex.FRONT_LEFT_BOTTOM] = [1.0 + 2.0, 2.0 + 1.0, 3.0 - 3.0] + expected_corners[Corners3DIndex.FRONT_RIGHT_BOTTOM] = [1.0 + 2.0, 2.0 - 1.0, 3.0 - 3.0] + expected_corners[Corners3DIndex.BACK_RIGHT_BOTTOM] = [1.0 - 2.0, 2.0 - 1.0, 3.0 - 3.0] + expected_corners[Corners3DIndex.BACK_LEFT_BOTTOM] = [1.0 - 2.0, 2.0 + 1.0, 3.0 - 3.0] + expected_corners[Corners3DIndex.FRONT_LEFT_TOP] = [1.0 + 2.0, 2.0 + 1.0, 3.0 + 3.0] + expected_corners[Corners3DIndex.FRONT_RIGHT_TOP] = [1.0 + 2.0, 2.0 - 1.0, 3.0 + 3.0] + expected_corners[Corners3DIndex.BACK_RIGHT_TOP] = [1.0 - 2.0, 2.0 - 1.0, 3.0 + 3.0] + expected_corners[Corners3DIndex.BACK_LEFT_TOP] = [1.0 - 2.0, 2.0 + 1.0, 3.0 + 3.0] + + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + def test_bbse3_array_to_corners_array_one_dim_rotation(self): + for _ in range(self._num_consistency_checks): + se3_state = EulerStateSE3.from_array(self._get_random_euler_se3_array(1)[0]).state_se3 + se3_array = se3_state.array + + # construct a bounding box + bounding_box_se3_array = np.zeros((len(BoundingBoxSE3Index),), dtype=np.float64) + length, width, height = np.random.uniform(0.0, self._max_extent, size=3) + + bounding_box_se3_array[BoundingBoxSE3Index.STATE_SE3] = se3_array + bounding_box_se3_array[BoundingBoxSE3Index.LENGTH] = length + bounding_box_se3_array[BoundingBoxSE3Index.WIDTH] = width + bounding_box_se3_array[BoundingBoxSE3Index.HEIGHT] = height + + corners_array = bbse3_array_to_corners_array(bounding_box_se3_array) + + corners_3d_factors = get_corners_3d_factors() + for corner_idx in Corners3DIndex: + body_translate_vector = Vector3D.from_array( + corners_3d_factors[corner_idx] * bounding_box_se3_array[BoundingBoxSE3Index.EXTENT] + ) + np.testing.assert_allclose( + corners_array[corner_idx], + translate_se3_along_body_frame(se3_state, body_translate_vector).point_3d.array, + atol=1e-6, + ) + + def test_bbse3_array_to_corners_array_n_dim(self): + for _ in range(self._num_consistency_checks): + N = np.random.randint(1, 20) + se3_array = self._get_random_euler_se3_array(N) + se3_state_array = np.array([EulerStateSE3.from_array(arr).state_se3.array for arr in se3_array]) + + # construct a bounding box + bounding_box_se3_array = np.zeros((N, len(BoundingBoxSE3Index)), dtype=np.float64) + lengths, widths, heights = np.random.uniform(0.0, self._max_extent, size=(3, N)) + + bounding_box_se3_array[:, BoundingBoxSE3Index.STATE_SE3] = se3_state_array + bounding_box_se3_array[:, BoundingBoxSE3Index.LENGTH] = lengths + bounding_box_se3_array[:, BoundingBoxSE3Index.WIDTH] = widths + bounding_box_se3_array[:, BoundingBoxSE3Index.HEIGHT] = heights + + corners_array = bbse3_array_to_corners_array(bounding_box_se3_array) + + corners_3d_factors = get_corners_3d_factors() + for obj_idx in range(N): + for corner_idx in Corners3DIndex: + body_translate_vector = Vector3D.from_array( + corners_3d_factors[corner_idx] * bounding_box_se3_array[obj_idx, BoundingBoxSE3Index.EXTENT] + ) + np.testing.assert_allclose( + corners_array[obj_idx, corner_idx], + translate_se3_along_body_frame( + StateSE3.from_array(bounding_box_se3_array[obj_idx, BoundingBoxSE3Index.STATE_SE3]), + body_translate_vector, + ).point_3d.array, + atol=1e-6, + ) + + def test_bbse3_array_to_corners_array_zero_dim(self): + bounding_box_se3_array = np.zeros((0, len(BoundingBoxSE3Index)), dtype=np.float64) + corners_array = bbse3_array_to_corners_array(bounding_box_se3_array) + expected_corners = np.zeros((0, 8, 3), dtype=np.float64) + np.testing.assert_allclose(corners_array, expected_corners, atol=1e-6) + + +if __name__ == "__main__": + unittest.main() diff --git a/d123/dataset/conversion/map/opendrive/opendrive_converter.py b/tests/unit/geometry/utils/test_polyline_utils.py similarity index 100% rename from d123/dataset/conversion/map/opendrive/opendrive_converter.py rename to tests/unit/geometry/utils/test_polyline_utils.py diff --git a/tests/unit/geometry/utils/test_rotation_utils.py b/tests/unit/geometry/utils/test_rotation_utils.py new file mode 100644 index 00000000..f298af7a --- /dev/null +++ b/tests/unit/geometry/utils/test_rotation_utils.py @@ -0,0 +1,787 @@ +import unittest +from typing import Tuple + +import numpy as np +import numpy.typing as npt +from pyquaternion import Quaternion as PyQuaternion + +from py123d.geometry.geometry_index import EulerAnglesIndex, QuaternionIndex +from py123d.geometry.utils.rotation_utils import ( + conjugate_quaternion_array, + get_euler_array_from_quaternion_array, + get_euler_array_from_rotation_matrices, + get_euler_array_from_rotation_matrix, + get_q_bar_matrices, + get_q_matrices, + get_quaternion_array_from_euler_array, + get_quaternion_array_from_rotation_matrices, + get_quaternion_array_from_rotation_matrix, + get_rotation_matrices_from_euler_array, + get_rotation_matrices_from_quaternion_array, + get_rotation_matrix_from_euler_array, + get_rotation_matrix_from_quaternion_array, + invert_quaternion_array, + multiply_quaternion_arrays, + normalize_angle, + normalize_quaternion_array, +) + + +def _get_rotation_matrix_helper(euler_array: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + """Helper function to ensure ZYX (Yaw-Pitch-Roll) intrinsic Euler angle convention, aka Tait-Bryan angles. + + :param euler_array: Array of Euler angles [roll, pitch, yaw] in radians. + :type euler_array: npt.NDArray[np.float64] + :return: Rotation matrix corresponding to the given Euler angles. + """ + + R_x = np.array( + [ + [1, 0, 0], + [0, np.cos(euler_array[EulerAnglesIndex.ROLL]), -np.sin(euler_array[EulerAnglesIndex.ROLL])], + [0, np.sin(euler_array[EulerAnglesIndex.ROLL]), np.cos(euler_array[EulerAnglesIndex.ROLL])], + ], + dtype=np.float64, + ) + R_y = np.array( + [ + [np.cos(euler_array[EulerAnglesIndex.PITCH]), 0, np.sin(euler_array[EulerAnglesIndex.PITCH])], + [0, 1, 0], + [-np.sin(euler_array[EulerAnglesIndex.PITCH]), 0, np.cos(euler_array[EulerAnglesIndex.PITCH])], + ], + dtype=np.float64, + ) + R_z = np.array( + [ + [np.cos(euler_array[EulerAnglesIndex.YAW]), -np.sin(euler_array[EulerAnglesIndex.YAW]), 0], + [np.sin(euler_array[EulerAnglesIndex.YAW]), np.cos(euler_array[EulerAnglesIndex.YAW]), 0], + [0, 0, 1], + ], + dtype=np.float64, + ) + return R_z @ R_y @ R_x + + +class TestRotationUtils(unittest.TestCase): + + def setUp(self): + pass + + def _get_random_quaternion(self) -> npt.NDArray[np.float64]: + return PyQuaternion.random().q + + def _get_random_quaternion_array(self, n: int) -> npt.NDArray[np.float64]: + random_quat_array = np.zeros((n, len(QuaternionIndex)), dtype=np.float64) + for i in range(n): + random_quat_array[i] = self._get_random_quaternion() + return random_quat_array + + def _get_random_euler_array(self, n: int) -> npt.NDArray[np.float64]: + random_euler_array: npt.NDArray[np.float64] = np.zeros((n, 3), dtype=np.float64) + for i in range(n): + random_euler_array[i] = PyQuaternion.random().yaw_pitch_roll[ + ::-1 + ] # Convert (yaw, pitch, roll) to (roll, pitch, yaw) + return random_euler_array + + def test_conjugate_quaternion_array(self): + """Test the conjugate_quaternion_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_flat = self._get_random_quaternion_array(N) + + random_quat = random_quat_flat.reshape(shape + (len(QuaternionIndex),)) + conj_quat = conjugate_quaternion_array(random_quat) + + np.testing.assert_allclose( + conj_quat[..., QuaternionIndex.QW], + random_quat[..., QuaternionIndex.QW], + atol=1e-8, + ) + np.testing.assert_allclose( + conj_quat[..., QuaternionIndex.QX], + -random_quat[..., QuaternionIndex.QX], + atol=1e-8, + ) + np.testing.assert_allclose( + conj_quat[..., QuaternionIndex.QY], + -random_quat[..., QuaternionIndex.QY], + atol=1e-8, + ) + np.testing.assert_allclose( + conj_quat[..., QuaternionIndex.QZ], + -random_quat[..., QuaternionIndex.QZ], + atol=1e-8, + ) + + # Check if double conjugation returns original quaternion + double_conj_quat = conjugate_quaternion_array(conj_quat) + np.testing.assert_allclose( + double_conj_quat, + random_quat, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 2, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + conjugate_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + conjugate_quaternion_array(invalid_quat) + + def test_get_euler_array_from_quaternion_array(self): + """Test the get_euler_array_from_quaternion_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_array_flat = self._get_random_quaternion_array(N) + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Convert to Euler angles using our function + euler_array = get_euler_array_from_quaternion_array(random_quat_array) + + euler_array_flat = euler_array.reshape((N, 3)) + # Test against pyquaternion results + for i, q in enumerate(random_quat_array_flat): + pyq = PyQuaternion(array=q) + # Convert to Euler angles using pyquaternion for comparison + yaw, pitch, roll = pyq.yaw_pitch_roll + euler_from_pyq = np.array([roll, pitch, yaw], dtype=np.float64) + + # Check if conversion is correct + np.testing.assert_allclose(euler_array_flat[i], euler_from_pyq, atol=1e-6) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 2, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + get_euler_array_from_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + get_euler_array_from_quaternion_array(invalid_quat) + + def test_get_euler_array_from_rotation_matrices(self): + """Test the get_euler_array_from_rotation_matrices function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + rotation_matrices_flat: npt.NDArray[np.float64] = np.zeros((N, 3, 3), dtype=np.float64) + for i in range(N): + random_euler = self._get_random_euler_array(1)[0] + rotation_matrices_flat[i] = _get_rotation_matrix_helper(random_euler) + + rotation_matrices = rotation_matrices_flat.reshape(shape + (3, 3)) + + # Convert to Euler angles using our function + euler_array = get_euler_array_from_rotation_matrices(rotation_matrices) + + # Test against helper function results + euler_array_flat = euler_array.reshape((N, 3)) + for i in range(N): + expected_rotation_matrix = _get_rotation_matrix_helper(euler_array_flat[i]) + np.testing.assert_allclose( + rotation_matrices_flat[i], + expected_rotation_matrix, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 1)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((0, 3)) # (0, 3) rotation matrix shape (invalid) + get_euler_array_from_rotation_matrices(invalid_rot) + + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3, 3, 8)) # (3, 3, 8) rotation matrix shape (invalid) + get_euler_array_from_rotation_matrices(invalid_rot) + + def test_get_euler_array_from_rotation_matrix(self): + """Test the get_euler_array_from_rotation_matrix function.""" + for _ in range(10): + random_euler = self._get_random_euler_array(1)[0] + rotation_matrix = _get_rotation_matrix_helper(random_euler) + + # Convert to Euler angles using our function + euler_array = get_euler_array_from_rotation_matrix(rotation_matrix) + + # Check if conversion is correct + np.testing.assert_allclose( + euler_array, + random_euler, + atol=1e-8, + ) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3,)) # (0, 3) rotation matrix shape (invalid) + get_euler_array_from_rotation_matrix(invalid_rot) + + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3, 8)) # (3, 8) rotation matrix shape (invalid) + get_euler_array_from_rotation_matrix(invalid_rot) + + def test_get_q_bar_matrices(self): + """Test the get_q_bar_matrices function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_array_flat = self._get_random_quaternion_array(N) + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Compute Q_bar matrices using our function + q_bar_matrices = get_q_bar_matrices(random_quat_array) + + q_bar_matrices_flat = q_bar_matrices.reshape((N, 4, 4)) + + # Test against pyquaternion results + for i, q in enumerate(random_quat_array_flat): + expected_q_bar = PyQuaternion(array=q)._q_bar_matrix() + + # Check if Q_bar matrix is correct + np.testing.assert_allclose( + q_bar_matrices_flat[i], + expected_q_bar, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((3, 2)) + _test_by_shape((1, 2)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + get_q_bar_matrices(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + get_q_bar_matrices(invalid_quat) + + def test_get_q_matrices(self): + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_array_flat = self._get_random_quaternion_array(N) + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Compute Q matrices using our function + q_matrices = get_q_matrices(random_quat_array) + + q_matrices_flat = q_matrices.reshape((N, 4, 4)) + + # Test against pyquaternion results + for i, q in enumerate(random_quat_array_flat): + expected_q = PyQuaternion(array=q)._q_matrix() + + # Check if Q matrix is correct + np.testing.assert_allclose( + q_matrices_flat[i], + expected_q, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((3, 2)) + _test_by_shape((1, 2)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + get_q_matrices(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + get_q_matrices(invalid_quat) + + def test_get_quaternion_array_from_euler_array(self): + """test the get_quaternion_array_from_euler_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_euler_array_flat = self._get_random_euler_array(N) + random_euler_array = random_euler_array_flat.reshape(shape + (3,)) + + # Convert to quaternion array using our function + quat_array = get_quaternion_array_from_euler_array(random_euler_array) + + quat_array_flat = quat_array.reshape((N, len(QuaternionIndex))) + + # Test against pyquaternion results + for i in range(N): + roll = random_euler_array_flat[i][EulerAnglesIndex.ROLL] + pitch = random_euler_array_flat[i][EulerAnglesIndex.PITCH] + yaw = random_euler_array_flat[i][EulerAnglesIndex.YAW] + + pyquaternion = ( + PyQuaternion(axis=[0, 0, 1], angle=yaw) + * PyQuaternion(axis=[0, 1, 0], angle=pitch) + * PyQuaternion(axis=[1, 0, 0], angle=roll) + ) + + expected_quat = pyquaternion.q + + # Check if conversion is correct + np.testing.assert_allclose( + quat_array_flat[i], + expected_quat, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((3, 5)) + _test_by_shape((1, 0, 2)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((0,)) # Zero euler angles (invalid) + get_quaternion_array_from_euler_array(invalid_euler) + + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((3, 8)) # Zero euler angles (invalid) + get_quaternion_array_from_euler_array(invalid_euler) + + def test_get_quaternion_array_from_rotation_matrices(self): + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + rotation_matrices_flat: npt.NDArray[np.float64] = np.zeros((N, 3, 3), dtype=np.float64) + for i in range(N): + random_euler = self._get_random_euler_array(1)[0] + rotation_matrices_flat[i] = _get_rotation_matrix_helper(random_euler) + + rotation_matrices = rotation_matrices_flat.reshape(shape + (3, 3)) + + # Convert to quaternion array using our function + quat_array = get_quaternion_array_from_rotation_matrices(rotation_matrices) + + quat_array_flat = quat_array.reshape((N, len(QuaternionIndex))) + + # Test against pyquaternion results + for i in range(N): + expected_quaternion = PyQuaternion(matrix=rotation_matrices_flat[i]).q + actual_quaternion = quat_array_flat[i] + + # Check if quaternions are equivalent (considering sign ambiguity) + # Quaternions q and -q represent the same rotation + np.testing.assert_equal( + np.allclose(actual_quaternion, expected_quaternion, atol=1e-8) + or np.allclose(actual_quaternion, -expected_quaternion, atol=1e-8), + True, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((3, 5)) + _test_by_shape((1, 0, 2)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((0, 3)) # (0, 3) rotation matrix shape (invalid) + get_quaternion_array_from_rotation_matrices(invalid_rot) + + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3, 3, 8)) # (3, 3, 8) rotation matrix shape (invalid) + get_quaternion_array_from_rotation_matrices(invalid_rot) + + def test_get_quaternion_array_from_rotation_matrix(self): + """Test the get_quaternion_array_from_rotation_matrix function.""" + for _ in range(10): + random_euler = self._get_random_euler_array(1)[0] + rotation_matrix = _get_rotation_matrix_helper(random_euler) + + # Convert to quaternion array using our function + quat_array = get_quaternion_array_from_rotation_matrix(rotation_matrix) + + expected_quaternion = PyQuaternion(matrix=rotation_matrix).q + actual_quaternion = quat_array + + # Check if quaternions are equivalent (considering sign ambiguity) + # Quaternions q and -q represent the same rotation + np.testing.assert_equal( + np.allclose(actual_quaternion, expected_quaternion, atol=1e-8) + or np.allclose(actual_quaternion, -expected_quaternion, atol=1e-8), + True, + ) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3,)) # (0, 3) rotation matrix shape (invalid) + get_quaternion_array_from_rotation_matrix(invalid_rot) + + with self.assertRaises(AssertionError): + invalid_rot = np.zeros((3, 8)) # (3, 8) rotation matrix shape (invalid) + get_quaternion_array_from_rotation_matrix(invalid_rot) + + def test_normalize_quaternion_array(self): + """Test the normalize_quaternion_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + scale = np.random.uniform(0.1, 10.0) + random_quat_array_flat = self._get_random_quaternion_array(N) * scale # Scale to ensure non-unit norm + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Normalize using our function + normalized_quat_array = normalize_quaternion_array(random_quat_array) + + normalized_quat_array_flat = normalized_quat_array.reshape((N, len(QuaternionIndex))) + + # Check if each quaternion is normalized + for i in range(N): + norm = np.linalg.norm(normalized_quat_array_flat[i]) + np.testing.assert_allclose(norm, 1.0, atol=1e-8) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 5, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + normalize_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + normalize_quaternion_array(invalid_quat) + + def test_get_rotation_matrices_from_euler_array(self): + """Test the get_rotation_matrices_from_euler_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_euler_array_flat = self._get_random_euler_array(N) + random_euler_array = random_euler_array_flat.reshape(shape + (3,)) + + # Convert to rotation matrices using our function + rotation_matrices = get_rotation_matrices_from_euler_array(random_euler_array) + + rotation_matrices_flat = rotation_matrices.reshape((N, 3, 3)) + + # Test against helper function results + for i in range(N): + expected_rotation_matrix = _get_rotation_matrix_helper(random_euler_array_flat[i]) + np.testing.assert_allclose( + rotation_matrices_flat[i], + expected_rotation_matrix, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 1)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((0, 5)) # Zero euler angles (invalid) + get_rotation_matrices_from_euler_array(invalid_euler) + + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((3, 8)) # Zero euler angles (invalid) + get_rotation_matrices_from_euler_array(invalid_euler) + + def test_get_rotation_matrices_from_quaternion_array(self): + """Test the get_rotation_matrices_from_quaternion_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_array_flat = self._get_random_quaternion_array(N) + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Convert to rotation matrices using our function + rotation_matrices = get_rotation_matrices_from_quaternion_array(random_quat_array) + + rotation_matrices_flat = rotation_matrices.reshape((N, 3, 3)) + + # Test against pyquaternion results + for i, q in enumerate(random_quat_array_flat): + expected_rotation_matrix = PyQuaternion(array=q).rotation_matrix + + # Check if rotation matrix is correct + np.testing.assert_allclose( + rotation_matrices_flat[i], + expected_rotation_matrix, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 5, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + get_rotation_matrices_from_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + get_rotation_matrices_from_quaternion_array(invalid_quat) + + def test_get_rotation_matrix_from_euler_array(self): + """Test the get_rotation_matrix_from_euler_array function.""" + for _ in range(10): + random_euler = self._get_random_euler_array(1)[0] + + # Convert to rotation matrix using our function + rotation_matrix = get_rotation_matrix_from_euler_array(random_euler) + + expected_rotation_matrix = _get_rotation_matrix_helper(random_euler) + + # Check if conversion is correct + np.testing.assert_allclose( + rotation_matrix, + expected_rotation_matrix, + atol=1e-8, + ) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((0,)) # Zero euler angles (invalid) + get_rotation_matrix_from_euler_array(invalid_euler) + + with self.assertRaises(AssertionError): + invalid_euler = np.zeros((8,)) # Zero euler angles (invalid) + get_rotation_matrix_from_euler_array(invalid_euler) + + def test_get_rotation_matrix_from_quaternion_array(self): + """Test the get_rotation_matrix_from_quaternion_array function.""" + for _ in range(10): + random_quat = self._get_random_quaternion() + + # Convert to rotation matrix using our function + rotation_matrix = get_rotation_matrix_from_quaternion_array(random_quat) + + expected_rotation_matrix = PyQuaternion(array=random_quat).rotation_matrix + + # Check if conversion is correct + np.testing.assert_allclose( + rotation_matrix, + expected_rotation_matrix, + atol=1e-8, + ) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + get_rotation_matrix_from_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((8,)) # Zero quaternion (invalid) + get_rotation_matrix_from_quaternion_array(invalid_quat) + + def test_invert_quaternion_array(self): + """Test the invert_quaternion_array function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_quat_array_flat = self._get_random_quaternion_array(N) + random_quat_array = random_quat_array_flat.reshape(shape + (len(QuaternionIndex),)) + + # Invert using our function + inverted_quat_array = invert_quaternion_array(random_quat_array) + + inverted_quat_array_flat = inverted_quat_array.reshape((N, len(QuaternionIndex))) + + # Test against pyquaternion results + for i, q in enumerate(random_quat_array_flat): + pyq = PyQuaternion(array=q) + expected_inverse = pyq.inverse.q + + # Check if inversion is correct + np.testing.assert_allclose( + inverted_quat_array_flat[i], + expected_inverse, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 5, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((0,)) # Zero quaternion (invalid) + invert_quaternion_array(invalid_quat) + + with self.assertRaises(AssertionError): + invalid_quat = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + invert_quaternion_array(invalid_quat) + + def test_multiply_quaternion_arrays(self): + """Test the multiply_quaternion_arrays function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + quat_array1_flat = self._get_random_quaternion_array(N) + quat_array2_flat = self._get_random_quaternion_array(N) + + quat_array1 = quat_array1_flat.reshape(shape + (len(QuaternionIndex),)) + quat_array2 = quat_array2_flat.reshape(shape + (len(QuaternionIndex),)) + + # Multiply using our function + multiplied_quat_array = multiply_quaternion_arrays(quat_array1, quat_array2) + + multiplied_quat_array_flat = multiplied_quat_array.reshape((N, len(QuaternionIndex))) + + # Test against pyquaternion results + for i in range(N): + pyq1 = PyQuaternion(array=quat_array1_flat[i]) + pyq2 = PyQuaternion(array=quat_array2_flat[i]) + expected_product = (pyq1 * pyq2).q + + # Check if multiplication is correct + np.testing.assert_allclose( + multiplied_quat_array_flat[i], + expected_product, + atol=1e-8, + ) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 5, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test invalid input + with self.assertRaises(AssertionError): + invalid_quat1 = np.zeros((0,)) # Zero quaternion (invalid) + invalid_quat2 = np.zeros((0,)) # Zero quaternion (invalid) + multiply_quaternion_arrays(invalid_quat1, invalid_quat2) + + with self.assertRaises(AssertionError): + invalid_quat1 = np.zeros((len(QuaternionIndex), 8)) # Zero quaternion (invalid) + invalid_quat2 = np.zeros((len(QuaternionIndex), 4)) # Zero quaternion (invalid) + multiply_quaternion_arrays(invalid_quat1, invalid_quat2) + + def test_normalize_angle(self): + """Test the normalize_angle function.""" + + def _test_by_shape(shape: Tuple[int, ...]) -> None: + for _ in range(10): + N = np.prod(shape) + random_angles_flat = np.random.uniform(-10 * np.pi, 10 * np.pi, size=N) + random_angles = random_angles_flat.reshape(shape) + + # Normalize using our function + normalized_angles = normalize_angle(random_angles) + + normalized_angles_flat = normalized_angles.reshape((N,)) + + # Check if each angle is within [-pi, pi] + for i in range(N): + angle = normalized_angles_flat[i] + self.assertGreaterEqual(angle, -np.pi - 1e-8) + self.assertLessEqual(angle, np.pi + 1e-8) + + # Test single-dim shape + _test_by_shape((1,)) + + # Test multi-dim shape + _test_by_shape((2, 3)) + _test_by_shape((1, 5, 3)) + + # Test zero-dim shape + _test_by_shape((0,)) + + # Test float + with self.subTest("Test float input"): + angle = 4 * np.pi + normalized_angle = normalize_angle(angle) + self.assertGreaterEqual(normalized_angle, -np.pi - 1e-8) + self.assertLessEqual(normalized_angle, np.pi + 1e-8) + + +if __name__ == "__main__": + unittest.main() diff --git a/d123/simulation/planning/.gitkeep b/tests/unit/visualization/.gitkeep similarity index 100% rename from d123/simulation/planning/.gitkeep rename to tests/unit/visualization/.gitkeep