From d7cd500d29f1dd05655726542678234c5e5d5b4c Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 13 Aug 2025 10:38:38 +0800 Subject: [PATCH 01/32] mini --- .../vehicle_state/vehicle_parameters.py | 11 + .../dataset_specific/kitti_360/__init__ .py | 0 .../kitti_360/kitti_360_data_converter.py | 456 ++++++++++++++++++ .../default_dataset_conversion.yaml | 3 +- .../config/datasets/kitti360_dataset.yaml | 16 + .../code/hydra/config.yaml | 60 +++ .../2025.08.11.15.45.36/code/hydra/hydra.yaml | 177 +++++++ .../code/hydra/overrides.yaml | 1 + exp/my_run/2025.08.11.15.45.36/log.txt | 10 + jbwang_test.py | 68 +++ notebooks/dataset/jbwang_test.py | 86 ++++ notebooks/jbwang_viz_test.py | 252 ++++++++++ notebooks/nuplan/nuplan_sensor_loading.ipynb | 27 +- requirements.txt | 2 +- 14 files changed, 1165 insertions(+), 4 deletions(-) create mode 100644 d123/dataset/dataset_specific/kitti_360/__init__ .py create mode 100644 d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py create mode 100644 d123/script/config/datasets/kitti360_dataset.yaml create mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml create mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml create mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml create mode 100644 exp/my_run/2025.08.11.15.45.36/log.txt create mode 100644 jbwang_test.py create mode 100644 notebooks/dataset/jbwang_test.py create mode 100644 notebooks/jbwang_viz_test.py diff --git a/d123/common/datatypes/vehicle_state/vehicle_parameters.py b/d123/common/datatypes/vehicle_state/vehicle_parameters.py index 8fe4d048..17480042 100644 --- a/d123/common/datatypes/vehicle_state/vehicle_parameters.py +++ b/d123/common/datatypes/vehicle_state/vehicle_parameters.py @@ -60,6 +60,17 @@ def get_wopd_pacifica_parameters() -> VehicleParameters: rear_axle_to_center_longitudinal=1.461, ) +def get_kitti360_station_wagon_parameters() -> VehicleParameters: + #TODO except wheel_base, all need to be checked + return VehicleParameters( + vehicle_name="kitti360_station_wagon", + width=2.297, + length=5.176, + height=1.400, + wheel_base=2.710, + rear_axle_to_center_vertical=0.45, + rear_axle_to_center_longitudinal=1.461, + ) def center_se3_to_rear_axle_se3(center_se3: StateSE3, vehicle_parameters: VehicleParameters) -> StateSE3: """ diff --git a/d123/dataset/dataset_specific/kitti_360/__init__ .py b/d123/dataset/dataset_specific/kitti_360/__init__ .py new file mode 100644 index 00000000..e69de29b diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py new file mode 100644 index 00000000..b6e97d8c --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -0,0 +1,456 @@ +import gc +import json +import os +from dataclasses import asdict +from functools import partial +from pathlib import Path +from typing import Any, Dict, Final, List, Optional, Tuple, Union + +import numpy as np +import datetime +import hashlib +import pyarrow as pa +from PIL import Image +from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map + +from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.time.time_point import TimePoint +from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3Index +from d123.common.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters +from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index +from d123.common.geometry.vector import Vector3DIndex +from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table +from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter +from d123.dataset.logs.log_metadata import LogMetadata + +KITTI360_DT: Final[float] = 0.1 +SORT_BY_TIMESTAMP: Final[bool] = True + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) + +#TODO carera mismatch +KITTI360_CAMERA_TYPES: Final[Dict[CameraType, str]] = { + CameraType.CAM_L0: "image_00", + CameraType.CAM_R0: "image_01", + # TODO fisheye camera + # CameraType.CAM_L1: "image_02", + # CameraType.CAM_R1: "image_03", +} + +DIR_2D_RAW = "data_2d_raw" +DIR_2D_SMT = "data_2d_semantics" +DIR_3D_RAW = "data_3d_raw" +DIR_3D_SMT = "data_3d_semantics" +DIR_3D_BBOX = "data_3d_bboxes" +DIR_POSES = "data_poses" +DIR_CALIB = "calibration" + +#TODO PATH_2D_RAW_ROOT +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT +PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW +PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT +PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX +PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES +PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB + +KITTI360_REQUIRED_MODALITY_ROOTS: Dict[str, Path] = { + DIR_2D_RAW: PATH_2D_RAW_ROOT, + # DIR_2D_SMT: PATH_2D_SMT_ROOT, + # DIR_3D_RAW: PATH_3D_RAW_ROOT, + # DIR_3D_SMT: PATH_3D_SMT_ROOT, + # DIR_3D_BBOX: PATH_3D_BBOX_ROOT, + # DIR_POSES: PATH_POSES_ROOT, +} + + +def create_token(input_data: str) -> str: + # TODO: Refactor this function. + # TODO: Add a general function to create tokens from arbitrary data. + if isinstance(input_data, str): + input_data = input_data.encode("utf-8") + + hash_obj = hashlib.sha256(input_data) + return hash_obj.hexdigest()[:16] + +def _load_calibration() -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]: + """ + 读取 KITTI-360 全局标定文件,返回: + - intrinsics[image_02] = 3x3 + - c2e[image_02] = 4x4(camera->ego/body),这里将 cam_to_pose 视为 camera->vehicle(简化) + """ + calib_dir = KITTI360_DATA_ROOT / DIR_CALIB + intrinsics: Dict[str, np.ndarray] = {} + c2e: Dict[str, np.ndarray] = {} + + # 内参:perspective.txt 中的 P_rect_0{0..3} + persp = calib_dir / "perspective.txt" + if persp.exists(): + with open(persp, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + if ln.startswith("P_rect_02"): + intrinsics["image_02"] = _read_projection_matrix(ln) + elif ln.startswith("P_rect_03"): + intrinsics["image_03"] = _read_projection_matrix(ln) + + # 外参:cam_to_pose.txt 中 Tr_cam02(相机到车体/pose) + c2p = calib_dir / "cam_to_pose.txt" + if c2p.exists(): + with open(c2p, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + if ln.startswith("Tr_cam02"): + vals = [float(x) for x in ln.split(":")[1].strip().split()] + T = np.array(vals, dtype=np.float64).reshape(4, 4) + c2e["image_02"] = T + elif ln.startswith("Tr_cam03"): + vals = [float(x) for x in ln.split(":")[1].strip().split()] + T = np.array(vals, dtype=np.float64).reshape(4, 4) + c2e["image_03"] = T + + return intrinsics, c2e + +class Kitti360DataConverter(RawDataConverter): + def __init__( + self, + splits: List[str], + log_path: Union[Path, str], + data_converter_config: DataConverterConfig, + ) -> None: + super().__init__(data_converter_config) + for split in splits: + assert ( + split in self.get_available_splits() + ), f"Split {split} is not available. Available splits: {self.available_splits}" + + self._splits: List[str] = splits + self._log_path: Path = Path(log_path) + self._log_paths_per_split: Dict[str, List[Path]] = self._collect_log_paths() + + def _collect_log_paths(self) -> Dict[str, List[Path]]: + """ + Collect candidate sequence folders under data_2d_raw that end with '_sync', + and keep only those sequences that are present in ALL required modality roots + (e.g., data_2d_semantics, data_3d_raw, etc.). + """ + missing_roots = [str(p) for p in KITTI360_REQUIRED_MODALITY_ROOTS.values() if not p.exists()] + if missing_roots: + raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") + + # Enumerate candidate sequences from data_2d_raw + candidates = sorted(p for p in PATH_2D_RAW_ROOT.iterdir() if p.is_dir() and p.name.endswith("_sync")) + + valid_seqs: List[Path] = [] + for seq_dir in candidates: + seq_name = seq_dir.name + missing_modalities = [ + modality_name + for modality_name, root in KITTI360_REQUIRED_MODALITY_ROOTS.items() + if not (root / seq_name).exists() + ] + if not missing_modalities: + valid_seqs.append(seq_dir) #KITTI360_DATA_ROOT / DIR_2D_RAW /seq_name + #TODO warnings + # else: + # warnings.warn( + # f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " + # f"Root: {KITTI360_DATA_ROOT}" + # ) + return {"kitti360": valid_seqs} + + def get_available_splits(self) -> List[str]: + """Returns a list of available raw data types.""" + return ["kitti360"] + + def convert_maps(self, worker: WorkerPool) -> None: + print("KITTI-360 does not provide standard maps. Skipping map conversion.") + return None + + def convert_logs(self, worker: WorkerPool) -> None: + log_args = [ + { + "log_path": log_path, + "split": split, + } + for split, log_paths in self._log_paths_per_split.items() + for log_path in log_paths + ] + + worker_map( + worker, + partial( + convert_kitti360_log_to_arrow, + data_converter_config=self.data_converter_config, + ), + log_args, + ) + +def convert_kitti360_log_to_arrow( + args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig +) -> List[Any]: + + for log_info in args: + log_path: Path = log_info["log_path"] + split: str = log_info["split"] + log_name = log_path.stem + + if not log_path.exists(): + raise FileNotFoundError(f"Log path {log_path} does not exist.") + log_file_path = data_converter_config.output_path / split / f"{log_name}.arrow" + + if data_converter_config.force_log_conversion or not log_file_path.exists(): + log_file_path.unlink(missing_ok=True) + if not log_file_path.parent.exists(): + log_file_path.parent.mkdir(parents=True, exist_ok=True) + + schema_column_list = [ + ("token", pa.string()), + ("timestamp", pa.int64()), + ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), + ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), + ("detections_token", pa.list_(pa.string())), + ("detections_type", pa.list_(pa.int16())), + ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), + ("traffic_light_ids", pa.list_(pa.int64())), + ("traffic_light_types", pa.list_(pa.int16())), + ("scenario_tag", pa.list_(pa.string())), + ("route_lane_group_ids", pa.list_(pa.int64())), + ] + if data_converter_config.lidar_store_option is not None: + if data_converter_config.lidar_store_option == "path": + schema_column_list.append(("lidar", pa.string())) + elif data_converter_config.lidar_store_option == "binary": + raise NotImplementedError("Binary lidar storage is not implemented.") + + # TODO: Adjust how cameras are added + if data_converter_config.camera_store_option is not None: + for cam_type in KITTI360_CAMERA_TYPES.keys(): + if data_converter_config.camera_store_option == "path": + schema_column_list.append((cam_type.serialize(), pa.string())) + schema_column_list.append((f"{cam_type.serialize()}_extrinsic", pa.list_(pa.float64(), 16))) + elif data_converter_config.camera_store_option == "binary": + raise NotImplementedError("Binary camera storage is not implemented.") + + recording_schema = pa.schema(schema_column_list) + #TODO location + metadata = LogMetadata( + dataset="kitti360", + log_name=log_name, + location="None", + timestep_seconds=KITTI360_DT, + map_has_z=False, + ) + + #TODO vehicle parameters + vehicle_parameters = get_kitti360_station_wagon_parameters() + camera_metadata = get_kitti360_camera_metadata() + recording_schema = recording_schema.with_metadata( + { + "log_metadata": json.dumps(asdict(metadata)), + "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), + "camera_metadata": camera_metadata_dict_to_json(camera_metadata), + } + ) + + _write_recording_table(log_name, recording_schema, log_file_path, data_converter_config) + + gc.collect() + return [] + + +def get_kitti360_camera_metadata() -> Dict[str, CameraMetadata]: + + persp = PATH_CALIB_ROOT / "perspective.txt" + + assert persp.exists() + result = {"image_00": {}, "image_01": {}} + + with open(persp, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + key, value = ln.split(" ", 1) + cam_id = key.split("_")[-1][:2] + if key.startswith("P_rect_"): + result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) + elif key.startswith("S_rect_"): + result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] + elif key.startswith("D_"): + result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + + log_cam_infos: Dict[str, CameraMetadata] = {} + for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): + log_cam_infos[cam_type.serialize()] = CameraMetadata( + camera_type=cam_type, + width=result[cam_name]["wh"][0], + height=result[cam_name]["wh"][1], + intrinsic=np.array(result[cam_name]["intrinsic"]), + distortion=np.array(result[cam_name]["distortion"]), + ) + return log_cam_infos + +def _read_projection_matrix(p_line: str) -> np.ndarray: + parts = p_line.split(" ", 1) + if len(parts) != 2: + raise ValueError(f"Bad projection line: {p_line}") + vals = [float(x) for x in parts[1].strip().split()] + P = np.array(vals, dtype=np.float64).reshape(3, 4) + K = P[:, :3] + return K + +def _write_recording_table( + log_name: str, + recording_schema: pa.Schema, + log_file_path: Path, + data_converter_config: DataConverterConfig +) -> None: + + ts_list = _read_timestamps(log_name) + + with pa.OSFile(str(log_file_path), "wb") as sink: + with pa.ipc.new_file(sink, recording_schema) as writer: + for i, tp in enumerate(ts_list): + row_data = { + "token": [create_token(f"{log_name}_{i}")], + "timestamp": [tp.time_us], + "detections_state": [], + "detections_velocity": [], + "detections_token": [], + "detections_type": [], + "ego_states": [], + "traffic_light_ids": [], + "traffic_light_types": [], + "scenario_tag": [], + "route_lane_group_ids": [], + } + + if data_converter_config.lidar_store_option is not None: + row_data["lidar"] = [] + # row_data["lidar"] = [_extract_lidar(log_name, data_converter_config)] + + if data_converter_config.camera_store_option is not None: + # camera_data_dict = _extract_camera(log_db, lidar_pc, source_log_path, data_converter_config) + camera_data_dict = {} + for camera_type, camera_data in camera_data_dict.items(): + if camera_data is not None: + row_data[camera_type.serialize()] = [camera_data[0]] + row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] + else: + row_data[camera_type.serialize()] = [None] + row_data[f"{camera_type.serialize()}_extrinsic"] = [None] + + batch = pa.record_batch(row_data, schema=recording_schema) + writer.write_batch(batch) + + if SORT_BY_TIMESTAMP: + recording_table = open_arrow_table(log_file_path) + recording_table = recording_table.sort_by([("timestamp", "ascending")]) + write_arrow_table(recording_table, log_file_path) + +#TODO default timestamps +# If timestamps are not provided, we can generate them based on the KITTI-360 DT +def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: + + ts_file = PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt" + if ts_file.exists(): + tps: List[TimePoint] = [] + with open(ts_file, "r") as f: + for line in f: + s = line.strip() + if not s: + continue + dt_str, ns_str = s.split('.') + dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") + dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) + unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + + total_seconds = (dt_obj - unix_epoch).total_seconds() + + ns_value = int(ns_str) + us_from_ns = ns_value // 1000 + + total_us = int(total_seconds * 1_000_000) + us_from_ns + + tps.append(TimePoint.from_us(total_us)) + return tps + return None + +#TODO lidar extraction +def _extract_lidar(log_name: str, data_converter_config: DataConverterConfig) -> Optional[str]: + lidar: Optional[str] = None + lidar_full_path = DIR_3D_SMT / "train" / log_name / "0000000002_0000000385.ply" + if lidar_full_path.exists(): + if data_converter_config.lidar_store_option == "path": + lidar = f"{log_name}/lidar/{sample_name}.npy" + elif data_converter_config.lidar_store_option == "binary": + raise NotImplementedError("Binary lidar storage is not implemented.") + else: + raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") + return lidar + +def _extract_camera(): + pass + + + +# for idx in range(n_frames): +# token = f"{seq_name}_{idx:06d}" +# t_us = ts_list[idx].time_us + +# row = { +# "token": [token], +# "timestamp": [t_us], +# # 以下先填空/占位,方便后续替换为真实标注 +# "detections_state": [[]], +# "detections_velocity": [[]], +# "detections_token": [[]], +# "detections_type": [[]], +# "ego_states": [([0.0] * len(EgoStateSE3Index))], # 占位 +# "traffic_light_ids": [[]], +# "traffic_light_types": [[]], +# "scenario_tag": [["unknown"]], +# "route_lane_group_ids": [[]], +# } + +# # lidar 路径(若存在) +# if data_converter_config.lidar_store_option is not None: +# # velodyne bin:KITTI-360/data_3d_raw//velodyne_points/data/0000000000.bin +# velodyne_dir = ( +# KITTI360_DATA_ROOT / DIR_3D / seq_name / "velodyne_points" / "data" +# ) +# # 文件名位数可能为 10 位,这里做两种尝试 +# bin_path = None +# for fmt in [f"{idx:010d}.bin", f"{idx:06d}.bin", f"{idx:08d}.bin"]: +# cand = velodyne_dir / fmt +# if cand.exists(): +# bin_path = cand +# break +# row["lidar"] = [str(bin_path.relative_to(KITTI360_DATA_ROOT)) if bin_path else None] + +# # 相机路径与外参 +# if data_converter_config.camera_store_option is not None: +# for cam_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): +# img_dir = seq_dir_2d / cam_dir_name / "data" +# # 文件名位数尝试 +# img_path = None +# for ext in (".png", ".jpg", ".jpeg"): +# for fmt in [f"{idx:010d}{ext}", f"{idx:06d}{ext}", f"{idx:08d}{ext}"]: +# cand = img_dir / fmt +# if cand.exists(): +# img_path = cand +# break +# if img_path: +# break +# if img_path is not None: +# rel = str(img_path.relative_to(KITTI360_DATA_ROOT)) +# row[cam_type.serialize()] = [rel] +# # 外参:固定 cam->ego(全局标定),逐帧不变(如需 rolling/姿态,可在此替换) +# T = c2e.get(KITTI360_CAMERA_TYPES[cam_type], np.eye(4, dtype=np.float64)) +# row[f"{cam_type.serialize()}_extrinsic"] = [T.astype(np.float64).reshape(-1).tolist()] +# else: +# row[cam_type.serialize()] = [None] +# row[f"{cam_type.serialize()}_extrinsic"] = [None] + +# batch = pa.record_batch(row, schema=recording_schema) +# writer.write_batch(batch) +# del batch, row \ No newline at end of file diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index cceb2911..bc48ed00 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -15,9 +15,10 @@ defaults: - default_dataset_paths - _self_ - datasets: + - kitti360_dataset # - nuplan_private_dataset # - carla_dataset - - wopd_dataset + # - wopd_dataset force_log_conversion: False force_map_conversion: True diff --git a/d123/script/config/datasets/kitti360_dataset.yaml b/d123/script/config/datasets/kitti360_dataset.yaml new file mode 100644 index 00000000..418d36a4 --- /dev/null +++ b/d123/script/config/datasets/kitti360_dataset.yaml @@ -0,0 +1,16 @@ +nuplan_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: 'all' + + splits: ["kitti360"] + log_path: ${oc.env:KITTI360_DATA_ROOT} + + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: 'all' + + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: "path" + lidar_store_option: "path" diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml new file mode 100644 index 00000000..86d05e7b --- /dev/null +++ b/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: my_run +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: false +force_map_conversion: true +datasets: + nuplan_private_dataset: + _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter + _convert_: all + splits: + - nuplan_private_test + log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml new file mode 100644 index 00000000..bf09b447 --- /dev/null +++ b/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=my_run + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=my_run + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123/d123/script + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/my_run/2025.08.11.15.45.36 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml new file mode 100644 index 00000000..373bde0c --- /dev/null +++ b/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=my_run diff --git a/exp/my_run/2025.08.11.15.45.36/log.txt b/exp/my_run/2025.08.11.15.45.36/log.txt new file mode 100644 index 00000000..2bdc0b60 --- /dev/null +++ b/exp/my_run/2025.08.11.15.45.36/log.txt @@ -0,0 +1,10 @@ +2025-08-11 15:45:36,813 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-11 15:46:10,300 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-11 15:46:34,960 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-11 15:46:34,962 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-11 15:46:34,962 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-11 15:46:34,963 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-11 15:46:34,964 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/jbwang_test.py b/jbwang_test.py new file mode 100644 index 00000000..ac3afac5 --- /dev/null +++ b/jbwang_test.py @@ -0,0 +1,68 @@ +# from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB + +# # # 打开数据库文件 +# # db = NuPlanDB(db_path="/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.05.12.22.00.38_veh-35_01008_01518.db") +# NUPLAN_DATA_ROOT = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini" +# log_path +# log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None) + +# # 获取第1050帧数据 +# frame = db.get_frame(1050) +# img_front = frame.camera_front # 前视图像 +# point_cloud = frame.lidar # 点云 + +# # 获取本片段所有车辆状态 +# status_data = db.get_vehicle_status() # 返回DataFrame +# print(status_data) + + + +# from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import NuplanDataConverter, DataConverterConfig +# spits = ["nuplan_mini_train"] +# log_path = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini/" +# converter = NuplanDataConverter( +# log_path=log_path, +# splits=spits, +# data_converter_config=DataConverterConfig(output_path="data/jbwang/d123"), +# ) +# # converter.convert_logs() +from pathlib import Path +log_paths_per_split = { + "nuplan_mini_train": [ + "2021","2022"] + } +log_args = [ + { + "log_path": log_path, + "split": split, + } + for split, log_paths in log_paths_per_split.items() + for log_path in log_paths + ] +PATH_2D_RAW_ROOT = Path("/nas/datasets/KITTI-360/data_3d_raw/") +candidates = sorted(p for p in PATH_2D_RAW_ROOT.iterdir() if p.is_dir() and p.name.endswith("_sync")) +# print(log_args) +# print(candidates) +# print(candidates[0].name) +# print(candidates[0].stem) +# print(type(candidates[0].name)) +# print(type(candidates[0].stem)) +# PATH_2D_RAW_ROOT_new = PATH_2D_RAW_ROOT/"123"/candidates[0].name +# print(PATH_2D_RAW_ROOT_new) + + + +# import hashlib +# def create_token(input_data: str) -> str: +# # TODO: Refactor this function. +# # TODO: Add a general function to create tokens from arbitrary data. +# if isinstance(input_data, str): +# input_data = input_data.encode("utf-8") + +# hash_obj = hashlib.sha256(input_data) +# return hash_obj.hexdigest()[:16] + +# log_name = "1230_asd_" +# for i in range(20): +# a = create_token(f"{log_name}_{i}") +# print(a) diff --git a/notebooks/dataset/jbwang_test.py b/notebooks/dataset/jbwang_test.py new file mode 100644 index 00000000..caaa3201 --- /dev/null +++ b/notebooks/dataset/jbwang_test.py @@ -0,0 +1,86 @@ +s3_uri = "/data/jbwang/d123/data/nuplan_mini_train/2021.10.11.07.12.18_veh-50_00211_00304.arrow" +# s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" +# s3_uri = "/data/jbwang/d123/data/carla/_Rep0_routes_validation1_route0_07_23_14_33_15.arrow" +# s3_uri = "/data/jbwang/d123/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow" + +import pyarrow as pa +import pyarrow.fs as fs +import pyarrow.dataset as ds + +import os + +s3_fs = fs.S3FileSystem() +from d123.common.utils.timer import Timer + + +timer = Timer() +timer.start() + +dataset = ds.dataset(f"{s3_uri}", format="ipc") +timer.log("1. Dataset loaded") + +# Get all column names and remove the ones you want to drop +all_columns = dataset.schema.names +# print("all_columns", all_columns) +# print("Schema:") +# print(dataset.schema) +# columns_to_keep = [col for col in all_columns if col not in ["front_cam_demo", "front_cam_transform"]] +timer.log("2. Columns filtered") + +table = dataset.to_table(columns=all_columns) +# print("table",table) +# print(table["token"]) +for col in table.column_names: + if col == "lidar": + continue + print(f"Column: {col}, Type: {table.schema.field(col).type}") + tokens = table[col] # 或 table.column("token") + # print(len(tokens)) + print(tokens.slice(0, 4).to_pylist()) +# print(table["traffic_light_ids"]) +timer.log("3. Table created") +# Save locally +# with pa.ipc.new_file("filtered_file.arrow", table.schema) as writer: +# writer.write_table(table) +timer.log("4. Table saved locally") + +timer.end() +timer.stats(verbose=False) + +# 查看nuplan数据库的表结构和内容 + +# from pathlib import Path +# from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB +# from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc +# from sqlalchemy import inspect, select +# from sqlalchemy.orm import Session +# from sqlalchemy import func +# from nuplan.database.nuplan_db_orm.ego_pose import EgoPose + +# NUPLAN_DATA_ROOT = Path("/nas/datasets/nuplan/") # 按你实际路径 +# log_path = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.05.12.22.00.38_veh-35_01008_01518.db" + +# db = NuPlanDB(NUPLAN_DATA_ROOT, log_path, None) +# # print(db.log) +# print(db.log.map_version) +# # print("log.cameras",db.log.cameras) +# # print("Log name:", db.log_name) +# # print("lidar",db.lidar_pc) +# # print("scenario_tags", db.scenario_tag) +# # print(db.log._session.query(EgoPose).order_by(func.abs(EgoPose.timestamp)).first()) + +# # persp = Path("/nas/datasets/KITTI-360/calibration/perspective.txt") +# # with open(persp, "r") as f: +# # lines = [ln.strip() for ln in f if ln.strip()] +# # print(lines) + +# from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import get_kitti360_camera_metadata + +# print(get_kitti360_camera_metadata()) + + + +# from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import _read_timestamps +# result = _read_timestamps("2013_05_28_drive_0000_sync") +# print(len(result)) +# print([result[0].time_us]) \ No newline at end of file diff --git a/notebooks/jbwang_viz_test.py b/notebooks/jbwang_viz_test.py new file mode 100644 index 00000000..73f05dbf --- /dev/null +++ b/notebooks/jbwang_viz_test.py @@ -0,0 +1,252 @@ +# from typing import Tuple + +# import matplotlib.pyplot as plt + +# from nuplan.planning.utils.multithreading.worker_sequential import Sequential + +# from d123.dataset.scene.scene_builder import ArrowSceneBuilder +# from d123.dataset.scene.scene_filter import SceneFilter +# from d123.dataset.scene.abstract_scene import AbstractScene + +# from typing import Dict +# from d123.common.datatypes.sensor.camera import CameraType +# from d123.common.visualization.matplotlib.camera import add_camera_ax +# from d123.common.visualization.matplotlib.camera import add_box_detections_to_camera_ax + +# # split = "nuplan_private_test" +# # log_names = ["2021.09.29.17.35.58_veh-44_00066_00432"] + + + + +# # splits = ["carla"] +# splits = ["nuplan_private_test"] +# # splits = ["wopd_train"] +# # log_names = None + + + +# # splits = ["nuplan_private_test"] +# log_names = None + +# scene_tokens = None + +# scene_filter = SceneFilter( +# split_names=splits, +# log_names=log_names, +# scene_tokens=scene_tokens, +# duration_s=19, +# history_s=0.0, +# timestamp_threshold_s=20, +# shuffle=False, +# camera_types=[CameraType.CAM_F0], +# ) +# scene_builder = ArrowSceneBuilder("/data/jbwang/d123/data/") +# worker = Sequential() +# # worker = RayDistributed() +# scenes = scene_builder.get_scenes(scene_filter, worker) + +# print(f"Found {len(scenes)} scenes") + + +# from typing import List, Optional, Tuple +# import matplotlib.pyplot as plt +# import numpy as np +# from d123.common.geometry.base import Point2D +# from d123.common.visualization.color.color import BLACK, DARK_GREY, DARKER_GREY, LIGHT_GREY, NEW_TAB_10, TAB_10 +# from d123.common.visualization.color.config import PlotConfig +# from d123.common.visualization.color.default import CENTERLINE_CONFIG, MAP_SURFACE_CONFIG, ROUTE_CONFIG +# from d123.common.visualization.matplotlib.observation import ( +# add_box_detections_to_ax, +# add_default_map_on_ax, +# add_ego_vehicle_to_ax, +# add_traffic_lights_to_ax, +# ) +# from d123.common.visualization.matplotlib.utils import add_shapely_linestring_to_ax, add_shapely_polygon_to_ax +# from d123.dataset.maps.abstract_map import AbstractMap +# from d123.dataset.maps.abstract_map_objects import AbstractLane +# from d123.dataset.maps.map_datatypes import MapLayer +# from d123.dataset.scene.abstract_scene import AbstractScene + + +# import shapely.geometry as geom + +# LEFT_CONFIG: PlotConfig = PlotConfig( +# fill_color=TAB_10[2], +# fill_color_alpha=1.0, +# line_color=TAB_10[2], +# line_color_alpha=0.5, +# line_width=1.0, +# line_style="-", +# zorder=3, +# ) + +# RIGHT_CONFIG: PlotConfig = PlotConfig( +# fill_color=TAB_10[3], +# fill_color_alpha=1.0, +# line_color=TAB_10[3], +# line_color_alpha=0.5, +# line_width=1.0, +# line_style="-", +# zorder=3, +# ) + + +# LANE_CONFIG: PlotConfig = PlotConfig( +# fill_color=BLACK, +# fill_color_alpha=1.0, +# line_color=BLACK, +# line_color_alpha=0.0, +# line_width=0.0, +# line_style="-", +# zorder=5, +# ) + +# ROAD_EDGE_CONFIG: PlotConfig = PlotConfig( +# fill_color=DARKER_GREY.set_brightness(0.0), +# fill_color_alpha=1.0, +# line_color=DARKER_GREY.set_brightness(0.0), +# line_color_alpha=1.0, +# line_width=1.0, +# line_style="-", +# zorder=3, +# ) + +# ROAD_LINE_CONFIG: PlotConfig = PlotConfig( +# fill_color=DARKER_GREY, +# fill_color_alpha=1.0, +# line_color=NEW_TAB_10[5], +# line_color_alpha=1.0, +# line_width=1.5, +# line_style="-", +# zorder=3, +# ) + + +# def add_debug_map_on_ax( +# ax: plt.Axes, +# map_api: AbstractMap, +# point_2d: Point2D, +# radius: float, +# route_lane_group_ids: Optional[List[int]] = None, +# ) -> None: +# layers: List[MapLayer] = [ +# MapLayer.LANE, +# MapLayer.LANE_GROUP, +# MapLayer.GENERIC_DRIVABLE, +# MapLayer.CARPARK, +# MapLayer.CROSSWALK, +# MapLayer.INTERSECTION, +# MapLayer.WALKWAY, +# MapLayer.ROAD_EDGE, +# MapLayer.ROAD_LINE, +# ] +# x_min, x_max = point_2d.x - radius, point_2d.x + radius +# y_min, y_max = point_2d.y - radius, point_2d.y + radius +# patch = geom.box(x_min, y_min, x_max, y_max) +# map_objects_dict = map_api.query(geometry=patch, layers=layers, predicate="intersects") + +# done = False +# for layer, map_objects in map_objects_dict.items(): +# for map_object in map_objects: +# try: +# if layer in [ +# # MapLayer.GENERIC_DRIVABLE, +# # MapLayer.CARPARK, +# # MapLayer.CROSSWALK, +# # MapLayer.INTERSECTION, +# # MapLayer.WALKWAY, +# ]: +# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) + +# # if layer in [MapLayer.LANE_GROUP]: +# # add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) + +# if layer in [MapLayer.LANE]: +# map_object: AbstractLane +# if map_object.right_lane is not None and map_object.left_lane is not None and not done: +# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, LANE_CONFIG) +# add_shapely_polygon_to_ax(ax, map_object.right_lane.shapely_polygon, RIGHT_CONFIG) +# add_shapely_polygon_to_ax(ax, map_object.left_lane.shapely_polygon, LEFT_CONFIG) +# done = True +# else: +# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) + + +# # add_shapely_linestring_to_ax(ax, map_object.right_boundary.linestring, RIGHT_CONFIG) +# # add_shapely_linestring_to_ax(ax, map_object.left_boundary.linestring, LEFT_CONFIG) +# # add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, LANE_CONFIG) + +# # centroid = map_object.shapely_polygon.centroid +# # ax.text( +# # centroid.x, +# # centroid.y, +# # str(map_object.id), +# # horizontalalignment="center", +# # verticalalignment="center", +# # fontsize=8, +# # bbox=dict(facecolor="white", alpha=0.7, boxstyle="round,pad=0.2"), +# # ) +# # if layer in [MapLayer.ROAD_EDGE]: +# # add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_EDGE_CONFIG) +# # edge_lengths.append(map_object.polyline_3d.linestring.length) + +# if layer in [MapLayer.ROAD_LINE]: +# line_type = int(map_object.road_line_type) +# plt_config = PlotConfig( +# fill_color=NEW_TAB_10[line_type % len(NEW_TAB_10)], +# fill_color_alpha=1.0, +# line_color=NEW_TAB_10[line_type % len(NEW_TAB_10)], +# line_color_alpha=1.0, +# line_width=1.5, +# line_style="-", +# zorder=3, +# ) +# add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, plt_config) + +# except Exception: +# import traceback + +# print(f"Error adding map object of type {layer.name} and id {map_object.id}") +# traceback.print_exc() + +# ax.set_title(f"Map: {map_api.map_name}") + + +# def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes: + +# ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) +# box_detections = scene.get_box_detections_at_iteration(iteration) + +# point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d +# add_debug_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None) +# # add_default_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None) +# # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.map_api) + +# add_box_detections_to_ax(ax, box_detections) +# add_ego_vehicle_to_ax(ax, ego_vehicle_state) + +# zoom = 1.0 +# ax.set_xlim(point_2d.x - radius * zoom, point_2d.x + radius * zoom) +# ax.set_ylim(point_2d.y - radius * zoom, point_2d.y + radius * zoom) + +# ax.set_aspect("equal", adjustable="box") +# return ax + + +# def plot_scene_at_iteration( +# scene: AbstractScene, iteration: int = 0, radius: float = 80 +# ) -> Tuple[plt.Figure, plt.Axes]: + +# size = 15 + +# fig, ax = plt.subplots(figsize=(size, size)) +# _plot_scene_on_ax(ax, scene, iteration, radius) +# return fig, ax + + +# scene_index = 1 +# fig, ax = plot_scene_at_iteration(scenes[scene_index], iteration=100, radius=100) + +# # fig.savefig(f"/home/daniel/scene_{scene_index}_iteration_1.pdf", dpi=300, bbox_inches="tight") + diff --git a/notebooks/nuplan/nuplan_sensor_loading.ipynb b/notebooks/nuplan/nuplan_sensor_loading.ipynb index 0dd69b4e..8291f265 100644 --- a/notebooks/nuplan/nuplan_sensor_loading.ipynb +++ b/notebooks/nuplan/nuplan_sensor_loading.ipynb @@ -21,7 +21,18 @@ "execution_count": null, "id": "1", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[[0.0, 0.0, 0.0], [0.0, 0.0, 0.0], [0.0, 0.0, 0.0]]" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "import numpy as np\n", "\n", @@ -33,7 +44,19 @@ "execution_count": null, "id": "2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "ModuleNotFoundError", + "evalue": "No module named 'd123'", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mModuleNotFoundError\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[3]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34;01md123\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdataset\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mdataset_specific\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnuplan\u001b[39;00m\u001b[34;01m.\u001b[39;00m\u001b[34;01mnuplan_data_converter\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mimport\u001b[39;00m NuplanDataConverter\n", + "\u001b[31mModuleNotFoundError\u001b[39m: No module named 'd123'" + ] + } + ], "source": [ "from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import NuplanDataConverter" ] diff --git a/requirements.txt b/requirements.txt index f0c697e2..b022f008 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -nuplan-devkit @ git+https://github.com/motional/nuplan-devkit/@nuplan-devkit-v1.2 +# nuplan-devkit @ git+https://github.com/motional/nuplan-devkit/@nuplan-devkit-v1.2 # nuplan requirements aioboto3 From a4f664ea5fe560c5195228ab390fd2bde1ccf457 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Fri, 15 Aug 2025 14:59:52 +0800 Subject: [PATCH 02/32] finish kitti360v0.0.1 --- d123/common/datatypes/sensor/lidar_index.py | 7 + .../kitti_360/{__init__ .py => __init__.py} | 0 .../dataset_specific/kitti_360/jbwang_test.py | 154 +++++++ .../kitti_360/kitti_360_data_converter.py | 391 +++++++++++------- .../kitti_360/kitti_360_helper.py | 102 +++++ .../dataset_specific/kitti_360/labels.py | 168 ++++++++ .../default_dataset_conversion.yaml | 4 - .../config/datasets/kitti360_dataset.yaml | 2 +- .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.31.57/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti360_test/2025.08.15.14.31.57/log.txt | 10 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.36.40/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti360_test/2025.08.15.14.36.40/log.txt | 10 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.40.29/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti_test2/2025.08.15.14.40.29/log.txt | 10 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.43.13/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti_test2/2025.08.15.14.43.13/log.txt | 12 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.46.49/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti_test2/2025.08.15.14.46.49/log.txt | 10 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.50.55/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti_test2/2025.08.15.14.50.55/log.txt | 11 + .../code/hydra/config.yaml | 60 +++ .../2025.08.15.14.52.39/code/hydra/hydra.yaml | 177 ++++++++ .../code/hydra/overrides.yaml | 1 + exp/kitti_test2/2025.08.15.14.52.39/log.txt | 11 + jbwang_test.py | 19 +- jbwang_test2.py | 70 ++++ notebooks/dataset/jbwang_test.py | 11 +- 39 files changed, 2508 insertions(+), 160 deletions(-) rename d123/dataset/dataset_specific/kitti_360/{__init__ .py => __init__.py} (100%) create mode 100644 d123/dataset/dataset_specific/kitti_360/jbwang_test.py create mode 100644 d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py create mode 100644 d123/dataset/dataset_specific/kitti_360/labels.py create mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.31.57/log.txt create mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml create mode 100644 exp/kitti360_test/2025.08.15.14.36.40/log.txt create mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.40.29/log.txt create mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.43.13/log.txt create mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.46.49/log.txt create mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.50.55/log.txt create mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml create mode 100644 exp/kitti_test2/2025.08.15.14.52.39/log.txt create mode 100644 jbwang_test2.py diff --git a/d123/common/datatypes/sensor/lidar_index.py b/d123/common/datatypes/sensor/lidar_index.py index 0df92cff..4e7ad133 100644 --- a/d123/common/datatypes/sensor/lidar_index.py +++ b/d123/common/datatypes/sensor/lidar_index.py @@ -60,3 +60,10 @@ class WopdLidarIndex(LiDARIndex): X = 3 Y = 4 Z = 5 + +@register_lidar_index +class Kitti360LidarIndex(LiDARIndex): + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/__init__ .py b/d123/dataset/dataset_specific/kitti_360/__init__.py similarity index 100% rename from d123/dataset/dataset_specific/kitti_360/__init__ .py rename to d123/dataset/dataset_specific/kitti_360/__init__.py diff --git a/d123/dataset/dataset_specific/kitti_360/jbwang_test.py b/d123/dataset/dataset_specific/kitti_360/jbwang_test.py new file mode 100644 index 00000000..6f0bdbd9 --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/jbwang_test.py @@ -0,0 +1,154 @@ +import gc +import json +import os +import pickle +from dataclasses import asdict +from functools import partial +from pathlib import Path +from typing import Any, Dict, Final, List, Optional, Tuple, Union + +import numpy as np +import pyarrow as pa +import yaml +from nuplan.database.nuplan_db.nuplan_scenario_queries import get_cameras, get_images_from_lidar_tokens +from nuplan.database.nuplan_db_orm.ego_pose import EgoPose +from nuplan.database.nuplan_db_orm.lidar_box import LidarBox +from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc +from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB +from nuplan.planning.simulation.observation.observation_type import CameraChannel +from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map +from pyquaternion import Quaternion +from sqlalchemy import func + + +from kitti_360_data_converter import _extract_ego_state_all,get_kitti360_lidar_metadata,_extract_cameras,_extract_detections + +# a = _extract_ego_state_all("2013_05_28_drive_0000_sync") +# print(a[0]) +# print(a[1]) +# print(a[10]) +from d123.common.datatypes.time.time_point import TimePoint +from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json + +NUPLAN_CAMERA_TYPES = { + CameraType.CAM_F0: CameraChannel.CAM_F0, + CameraType.CAM_B0: CameraChannel.CAM_B0, + CameraType.CAM_L0: CameraChannel.CAM_L0, + CameraType.CAM_L1: CameraChannel.CAM_L1, + CameraType.CAM_L2: CameraChannel.CAM_L2, + CameraType.CAM_R0: CameraChannel.CAM_R0, + CameraType.CAM_R1: CameraChannel.CAM_R1, + CameraType.CAM_R2: CameraChannel.CAM_R2, +} + +NUPLAN_DATA_ROOT = Path(os.environ["NUPLAN_DATA_ROOT"]) +NUPLAN_ROLLING_SHUTTER_S: Final[TimePoint] = TimePoint.from_s(1 / 60) + +def _extract_camera( + log_db: NuPlanDB, + lidar_pc: LidarPc, + source_log_path: Path, +) -> Dict[CameraType, Union[str, bytes]]: + + camera_dict: Dict[str, Union[str, bytes]] = {} + sensor_root = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" + + log_cam_infos = {camera.token: camera for camera in log_db.log.cameras} + for camera_type, camera_channel in NUPLAN_CAMERA_TYPES.items(): + camera_data: Optional[Union[str, bytes]] = None + c2e: Optional[List[float]] = None + image_class = list(get_images_from_lidar_tokens(source_log_path, [lidar_pc.token], [str(camera_channel.value)])) + # print("image_class",image_class) + if len(image_class) != 0: + image = image_class[0] + filename_jpg = sensor_root / image.filename_jpg + + timestamp = image.timestamp + NUPLAN_ROLLING_SHUTTER_S.time_us + img_ego_pose: EgoPose = ( + log_db.log._session.query(EgoPose).order_by(func.abs(EgoPose.timestamp - timestamp)).first() + ) + img_e2g = img_ego_pose.trans_matrix + g2e = lidar_pc.ego_pose.trans_matrix_inv + img_e2e = g2e @ img_e2g + cam_info = log_cam_infos[image.camera_token] + c2img_e = cam_info.trans_matrix + c2e = img_e2e @ c2img_e + # print(f"Camera {camera_type} found for lidar {lidar_pc.token} at timestamp {timestamp}") + print(camera_type,"c2e:", c2e) + camera_dict[camera_type] = camera_data + + return camera_dict + + +def get_cam_info_from_lidar_pc(log,log_file, lidar_pc, rolling_shutter_s=1/60): + + retrieved_images = get_images_from_lidar_tokens( + log_file, [lidar_pc.token], [str(channel.value) for channel in CameraChannel] + ) + + # if interp_trans: + # neighbours = [] + # ego_poses_dict = {} + # for ego_pose in log.ego_poses: + # ego_poses_dict[ego_pose.token] = ego_pose + # if abs(ego_pose.timestamp - lidar_pc.ego_pose.timestamp) / 1e6 < 0.5: + # neighbours.append(ego_pose) + # timestamps = [pose.timestamp for pose in neighbours] + # translations = [pose.translation_np for pose in neighbours] + # splines = [CubicSpline(timestamps, [translation[i] for translation in translations]) for i in range(2)] + + log_cam_infos = {camera.token : camera for camera in log.camera} + cams = {} + for img in retrieved_images: + channel = img.channel + filename = img.filename_jpg + + # if interp_trans: + # img_ego_pose = ego_poses_dict[img.ego_pose_token] + # interpolated_translation = np.array([splines[0](timestamp), splines[1](timestamp), img_ego_pose.z]) + # delta = interpolated_translation - lidar_pc.ego_pose.translation_np + # delta = np.dot(lidar_pc.ego_pose.quaternion.rotation_matrix.T, delta) + if channel == "CAM_F0": + timestamp = img.timestamp + (rolling_shutter_s * 1e6) + img_ego_pose = log.session.query(EgoPose).order_by(func.abs(EgoPose.timestamp - timestamp)).first() + img_e2g = img_ego_pose.trans_matrix + # print("img_e2g:", img_e2g) + + g2e = lidar_pc.ego_pose.trans_matrix_inv + # print("g2e:", g2e) #change obviously + img_e2e = g2e @ img_e2g + # print("img_e2e:", img_e2e) + cam_info = log_cam_infos[img.camera_token] + c2img_e = cam_info.trans_matrix + # print("c2img_e:", c2img_e) + c2e = img_e2e @ c2img_e + # print("channel:", channel, "c2e:", c2e) + + cams[channel] = dict( + data_path = filename, + timestamp = img.timestamp, + token=img.token, + sensor2ego_rotation = Quaternion(matrix=c2e[:3, :3]), + sensor2ego_translation = c2e[:3, 3], + cam_intrinsic = cam_info.intrinsic_np, + distortion = cam_info.distortion_np, + ) + + + if len(cams) != 8: + return None + # print(cams) + return cams + +if __name__ == "__main__": + # Example usage + # data_converter_config: DataConverterConfig + # log_path = Path("/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.10.11.07.12.18_veh-50_00211_00304.db") + # log_path = Path("/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.09.16.15.12.03_veh-42_01037_01434.db") + # log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None) + + # for lidar_pc in log_db.lidar_pc: # Replace with actual token + # # camera_data = _extract_camera(log_db, lidar_pc, log_path) + # camera_data = get_cam_info_from_lidar_pc(log_db,log_path, lidar_pc, rolling_shutter_s=1/60) + # print(_extract_cameras("2013_05_28_drive_0000_sync",0)) + _extract_detections("2013_05_28_drive_0000_sync", 0) \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index b6e97d8c..c79ce0b2 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -9,27 +9,35 @@ import numpy as np import datetime import hashlib +import xml.etree.ElementTree as ET import pyarrow as pa from PIL import Image + from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map +from d123.common.datatypes.detection.detection_types import DetectionType from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json +from d123.common.datatypes.sensor.lidar_index import Kitti360LidarIndex from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters +from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index +from d123.common.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 +from d123.common.geometry.base import StateSE3 from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index -from d123.common.geometry.vector import Vector3DIndex +from d123.common.geometry.vector import Vector3D, Vector3DIndex from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata +from kitti_360_helper import KITTI360Bbox3D + KITTI360_DT: Final[float] = 0.1 SORT_BY_TIMESTAMP: Final[bool] = True KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) #TODO carera mismatch -KITTI360_CAMERA_TYPES: Final[Dict[CameraType, str]] = { +KITTI360_CAMERA_TYPES = { CameraType.CAM_L0: "image_00", CameraType.CAM_R0: "image_01", # TODO fisheye camera @@ -63,6 +71,16 @@ # DIR_POSES: PATH_POSES_ROOT, } +#TODO +KIITI360_DETECTION_NAME_DICT = { + "truck": DetectionType.VEHICLE, + "bus": DetectionType.VEHICLE, + "car": DetectionType.VEHICLE, + "motorcycle": DetectionType.BICYCLE, + "bicycle": DetectionType.BICYCLE, + "pedestrian": DetectionType.PEDESTRIAN, +} + def create_token(input_data: str) -> str: # TODO: Refactor this function. @@ -73,43 +91,6 @@ def create_token(input_data: str) -> str: hash_obj = hashlib.sha256(input_data) return hash_obj.hexdigest()[:16] -def _load_calibration() -> Tuple[Dict[str, np.ndarray], Dict[str, np.ndarray]]: - """ - 读取 KITTI-360 全局标定文件,返回: - - intrinsics[image_02] = 3x3 - - c2e[image_02] = 4x4(camera->ego/body),这里将 cam_to_pose 视为 camera->vehicle(简化) - """ - calib_dir = KITTI360_DATA_ROOT / DIR_CALIB - intrinsics: Dict[str, np.ndarray] = {} - c2e: Dict[str, np.ndarray] = {} - - # 内参:perspective.txt 中的 P_rect_0{0..3} - persp = calib_dir / "perspective.txt" - if persp.exists(): - with open(persp, "r") as f: - lines = [ln.strip() for ln in f if ln.strip()] - for ln in lines: - if ln.startswith("P_rect_02"): - intrinsics["image_02"] = _read_projection_matrix(ln) - elif ln.startswith("P_rect_03"): - intrinsics["image_03"] = _read_projection_matrix(ln) - - # 外参:cam_to_pose.txt 中 Tr_cam02(相机到车体/pose) - c2p = calib_dir / "cam_to_pose.txt" - if c2p.exists(): - with open(c2p, "r") as f: - lines = [ln.strip() for ln in f if ln.strip()] - for ln in lines: - if ln.startswith("Tr_cam02"): - vals = [float(x) for x in ln.split(":")[1].strip().split()] - T = np.array(vals, dtype=np.float64).reshape(4, 4) - c2e["image_02"] = T - elif ln.startswith("Tr_cam03"): - vals = [float(x) for x in ln.split(":")[1].strip().split()] - T = np.array(vals, dtype=np.float64).reshape(4, 4) - c2e["image_03"] = T - - return intrinsics, c2e class Kitti360DataConverter(RawDataConverter): def __init__( @@ -204,6 +185,19 @@ def convert_kitti360_log_to_arrow( if not log_file_path.parent.exists(): log_file_path.parent.mkdir(parents=True, exist_ok=True) + metadata = LogMetadata( + dataset="kitti360", + log_name=log_name, + location="None", + timestep_seconds=KITTI360_DT, + map_has_z=False, + ) + + vehicle_parameters = get_kitti360_station_wagon_parameters() + camera_metadata = get_kitti360_camera_metadata() + #TODO now only velodyne lidar + lidar_metadata = get_kitti360_lidar_metadata() + schema_column_list = [ ("token", pa.string()), ("timestamp", pa.int64()), @@ -218,38 +212,29 @@ def convert_kitti360_log_to_arrow( ("route_lane_group_ids", pa.list_(pa.int64())), ] if data_converter_config.lidar_store_option is not None: - if data_converter_config.lidar_store_option == "path": - schema_column_list.append(("lidar", pa.string())) - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") + for lidar_type in lidar_metadata.keys(): + if data_converter_config.lidar_store_option == "path": + schema_column_list.append((lidar_type.serialize(), pa.string())) + elif data_converter_config.lidar_store_option == "binary": + raise NotImplementedError("Binary lidar storage is not implemented.") - # TODO: Adjust how cameras are added if data_converter_config.camera_store_option is not None: - for cam_type in KITTI360_CAMERA_TYPES.keys(): + for camera_type in camera_metadata.keys(): if data_converter_config.camera_store_option == "path": - schema_column_list.append((cam_type.serialize(), pa.string())) - schema_column_list.append((f"{cam_type.serialize()}_extrinsic", pa.list_(pa.float64(), 16))) + schema_column_list.append((camera_type.serialize(), pa.string())) + schema_column_list.append( + (f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 4 * 4)) + ) elif data_converter_config.camera_store_option == "binary": raise NotImplementedError("Binary camera storage is not implemented.") recording_schema = pa.schema(schema_column_list) - #TODO location - metadata = LogMetadata( - dataset="kitti360", - log_name=log_name, - location="None", - timestep_seconds=KITTI360_DT, - map_has_z=False, - ) - - #TODO vehicle parameters - vehicle_parameters = get_kitti360_station_wagon_parameters() - camera_metadata = get_kitti360_camera_metadata() recording_schema = recording_schema.with_metadata( { "log_metadata": json.dumps(asdict(metadata)), "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), "camera_metadata": camera_metadata_dict_to_json(camera_metadata), + "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), } ) @@ -298,6 +283,35 @@ def _read_projection_matrix(p_line: str) -> np.ndarray: K = P[:, :3] return K +def get_kitti360_lidar_metadata(log_name: str) -> Dict[LiDARType, LiDARMetadata]: + metadata: Dict[LiDARType, LiDARMetadata] = {} + + cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + cam2velo_txt = PATH_CALIB_ROOT / "calib_cam_to_velo.txt" + if not cam2velo_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") + + lastrow = np.array([0,0,0,1]).reshape(1,4) + + with open(cam2pose_txt, 'r') as f: + image_00 = next(f) + values = list(map(float, image_00.strip().split()[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + + cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) + extrinsic = cam2velo @ np.linalg.inv(cam2pose) + + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LidarIndex, + extrinsic=extrinsic, + ) + return metadata + def _write_recording_table( log_name: str, recording_schema: pa.Schema, @@ -306,31 +320,33 @@ def _write_recording_table( ) -> None: ts_list = _read_timestamps(log_name) + ego_state_all = _extract_ego_state_all(log_name) + detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list)) with pa.OSFile(str(log_file_path), "wb") as sink: with pa.ipc.new_file(sink, recording_schema) as writer: - for i, tp in enumerate(ts_list): + for idx, tp in enumerate(ts_list): + row_data = { - "token": [create_token(f"{log_name}_{i}")], + "token": [create_token(f"{log_name}_{idx}")], "timestamp": [tp.time_us], - "detections_state": [], - "detections_velocity": [], - "detections_token": [], - "detections_type": [], - "ego_states": [], - "traffic_light_ids": [], - "traffic_light_types": [], - "scenario_tag": [], - "route_lane_group_ids": [], + "detections_state": [detections_states[idx]], + "detections_velocity": [detections_velocity[idx]], + "detections_token": [detections_tokens[idx]], + "detections_type": [detections_types[idx]], + "ego_states": [ego_state_all[idx]], + "traffic_light_ids": [[]], + #may TODO traffic light types + "traffic_light_types": [[]], + "scenario_tag": [['unknown']], + "route_lane_group_ids": [[]], } if data_converter_config.lidar_store_option is not None: - row_data["lidar"] = [] - # row_data["lidar"] = [_extract_lidar(log_name, data_converter_config)] + row_data["lidar"] = [_extract_lidar(log_name, idx, data_converter_config)] if data_converter_config.camera_store_option is not None: - # camera_data_dict = _extract_camera(log_db, lidar_pc, source_log_path, data_converter_config) - camera_data_dict = {} + camera_data_dict = _extract_cameras(log_name, idx, data_converter_config) for camera_type, camera_data in camera_data_dict.items(): if camera_data is not None: row_data[camera_type.serialize()] = [camera_data[0]] @@ -348,9 +364,8 @@ def _write_recording_table( write_arrow_table(recording_table, log_file_path) #TODO default timestamps -# If timestamps are not provided, we can generate them based on the KITTI-360 DT def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: - + # unix ts_file = PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt" if ts_file.exists(): tps: List[TimePoint] = [] @@ -375,82 +390,160 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: return tps return None +def _extract_ego_state_all(log_name: str) -> List[List[float]]: + + ego_state_all: List[List[float]] = [] + + pose_file = PATH_POSES_ROOT / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 + + #TODO + oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" + + for idx in range(len(list(oxts_path.glob("*.txt")))): + oxts_path_file = oxts_path / f"{int(idx):010d}.txt" + oxts_data = np.loadtxt(oxts_path_file) + + roll, pitch, yaw = oxts_data[3:6] + vehicle_parameters = get_kitti360_station_wagon_parameters() + + pos = np.searchsorted(poses_time, idx, side='right') - 1 + + rear_axle_pose = StateSE3( + x=poses[pos, 4], + y=poses[pos, 8], + z=poses[pos, 12], + roll=roll, + pitch=pitch, + yaw=yaw, + ) + # NOTE: The height to rear axle is not provided the dataset and is merely approximated. + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) + dynamic_state = DynamicStateSE3( + velocity=Vector3D( + x=oxts_data[8], + y=oxts_data[9], + z=oxts_data[10], + ), + acceleration=Vector3D( + x=oxts_data[14], + y=oxts_data[15], + z=oxts_data[16], + ), + angular_velocity=Vector3D( + x=oxts_data[20], + y=oxts_data[21], + z=oxts_data[22], + ), + ) + ego_state_all.append( + EgoStateSE3( + center_se3=center, + dynamic_state_se3=dynamic_state, + vehicle_parameters=vehicle_parameters, + timepoint=None, + ).array.tolist() + ) + return ego_state_all + +#TODO now only divided by data_3d_semantics +# We may distinguish between image and lidar detections +# besides, now it is based only on start and end frame +def _extract_detections( + log_name: str, + ts_len: int +) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: + + detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_velocity: List[List[List[float]]] = [[] for _ in range(ts_len)] + detections_tokens: List[List[str]] = [[] for _ in range(ts_len)] + detections_types: List[List[int]] = [[] for _ in range(ts_len)] + + bbox_3d_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + if not bbox_3d_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") + + tree = ET.parse(bbox_3d_path) + root = tree.getroot() + + for child in root: + label = child.find('label').text + if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + + # static + if obj.timestamp == -1: + start_frame = obj.start_frame + end_frame = obj.end_frame + for frame in range(start_frame, end_frame + 1): + #TODO check if valid in each frame + if frame < 0 or frame >= ts_len: + continue + #TODO check yaw + detections_states[frame].append(obj.get_state_array()) + detections_velocity[frame].append([0.0, 0.0, 0.0]) + detections_tokens[frame].append(str(obj.globalID)) + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[label])) + # dynamic + else: + frame = obj.timestamp + detections_states[frame].append(obj.get_state_array()) + #TODO velocity not provided + detections_velocity[frame].append([0.0, 0.0, 0.0]) + detections_tokens[frame].append(str(obj.globalID)) + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[label])) + + return detections_states, detections_velocity, detections_tokens, detections_types + #TODO lidar extraction -def _extract_lidar(log_name: str, data_converter_config: DataConverterConfig) -> Optional[str]: +def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Optional[str]: lidar: Optional[str] = None - lidar_full_path = DIR_3D_SMT / "train" / log_name / "0000000002_0000000385.ply" + lidar_full_path = DIR_3D_RAW / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" if lidar_full_path.exists(): if data_converter_config.lidar_store_option == "path": - lidar = f"{log_name}/lidar/{sample_name}.npy" + lidar = f"/data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" elif data_converter_config.lidar_store_option == "binary": raise NotImplementedError("Binary lidar storage is not implemented.") else: raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") - return lidar - -def _extract_camera(): - pass - - - -# for idx in range(n_frames): -# token = f"{seq_name}_{idx:06d}" -# t_us = ts_list[idx].time_us - -# row = { -# "token": [token], -# "timestamp": [t_us], -# # 以下先填空/占位,方便后续替换为真实标注 -# "detections_state": [[]], -# "detections_velocity": [[]], -# "detections_token": [[]], -# "detections_type": [[]], -# "ego_states": [([0.0] * len(EgoStateSE3Index))], # 占位 -# "traffic_light_ids": [[]], -# "traffic_light_types": [[]], -# "scenario_tag": [["unknown"]], -# "route_lane_group_ids": [[]], -# } - -# # lidar 路径(若存在) -# if data_converter_config.lidar_store_option is not None: -# # velodyne bin:KITTI-360/data_3d_raw//velodyne_points/data/0000000000.bin -# velodyne_dir = ( -# KITTI360_DATA_ROOT / DIR_3D / seq_name / "velodyne_points" / "data" -# ) -# # 文件名位数可能为 10 位,这里做两种尝试 -# bin_path = None -# for fmt in [f"{idx:010d}.bin", f"{idx:06d}.bin", f"{idx:08d}.bin"]: -# cand = velodyne_dir / fmt -# if cand.exists(): -# bin_path = cand -# break -# row["lidar"] = [str(bin_path.relative_to(KITTI360_DATA_ROOT)) if bin_path else None] - -# # 相机路径与外参 -# if data_converter_config.camera_store_option is not None: -# for cam_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): -# img_dir = seq_dir_2d / cam_dir_name / "data" -# # 文件名位数尝试 -# img_path = None -# for ext in (".png", ".jpg", ".jpeg"): -# for fmt in [f"{idx:010d}{ext}", f"{idx:06d}{ext}", f"{idx:08d}{ext}"]: -# cand = img_dir / fmt -# if cand.exists(): -# img_path = cand -# break -# if img_path: -# break -# if img_path is not None: -# rel = str(img_path.relative_to(KITTI360_DATA_ROOT)) -# row[cam_type.serialize()] = [rel] -# # 外参:固定 cam->ego(全局标定),逐帧不变(如需 rolling/姿态,可在此替换) -# T = c2e.get(KITTI360_CAMERA_TYPES[cam_type], np.eye(4, dtype=np.float64)) -# row[f"{cam_type.serialize()}_extrinsic"] = [T.astype(np.float64).reshape(-1).tolist()] -# else: -# row[cam_type.serialize()] = [None] -# row[f"{cam_type.serialize()}_extrinsic"] = [None] - -# batch = pa.record_batch(row, schema=recording_schema) -# writer.write_batch(batch) -# del batch, row \ No newline at end of file + return {LiDARType.LIDAR_TOP: lidar} if lidar else None + +#TODO check camera extrinsic now is from camera to pose +def _extract_cameras( + log_name: str, idx: int, data_converter_config: DataConverterConfig +) -> Dict[CameraType, Optional[str]]: + + camera_dict: Dict[str, Union[str, bytes]] = {} + for camera_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): + img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + if img_path_png.exists(): + + cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + lastrow = np.array([0,0,0,1]).reshape(1,4) + + with open(cam2pose_txt, 'r') as f: + for line in f: + parts = line.strip().split() + key = parts[0][:-1] + if key == cam_dir_name: + values = list(map(float, parts[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + + if data_converter_config.camera_store_option == "path": + camera_data = str(img_path_png), cam2pose.flatten().tolist() + elif data_converter_config.camera_store_option == "binary": + with open(img_path_png, "rb") as f: + camera_data = f.read(), cam2pose + else: + raise FileNotFoundError(f"Camera image not found: {img_path_png}") + camera_dict[camera_type] = camera_data + return camera_dict diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py new file mode 100644 index 00000000..da79cf3e --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -0,0 +1,102 @@ +import numpy as np + +from collections import defaultdict +from labels import kittiId2label + +from scipy.linalg import polar +from scipy.spatial.transform import Rotation as R + +from d123.common.geometry.base import StateSE3 +from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 + +DEFAULT_ROLL = 0.0 +DEFAULT_PITCH = 0.0 + +MAX_N = 1000 +def local2global(semanticId, instanceId): + globalId = semanticId*MAX_N + instanceId + if isinstance(globalId, np.ndarray): + return globalId.astype(np.int32) + else: + return int(globalId) + +def global2local(globalId): + semanticId = globalId // MAX_N + instanceId = globalId % MAX_N + if isinstance(globalId, np.ndarray): + return semanticId.astype(np.int32), instanceId.astype(np.int32) + else: + return int(semanticId), int(instanceId) + +class KITTI360Bbox3D(): + # Constructor + def __init__(self): + + # the ID of the corresponding object + self.semanticId = -1 + self.instanceId = -1 + self.annotationId = -1 + self.globalID = -1 + + # the window that contains the bbox + self.start_frame = -1 + self.end_frame = -1 + + # timestamp of the bbox (-1 if statis) + self.timestamp = -1 + + # name + self.name = '' + + def parseOpencvMatrix(self, node): + rows = int(node.find('rows').text) + cols = int(node.find('cols').text) + data = node.find('data').text.split(' ') + + mat = [] + for d in data: + d = d.replace('\n', '') + if len(d)<1: + continue + mat.append(float(d)) + mat = np.reshape(mat, [rows, cols]) + return mat + + def parseBbox(self, child): + semanticIdKITTI = int(child.find('semanticId').text) + self.semanticId = kittiId2label[semanticIdKITTI].id + self.instanceId = int(child.find('instanceId').text) + self.name = kittiId2label[semanticIdKITTI].name + + self.start_frame = int(child.find('start_frame').text) + self.end_frame = int(child.find('end_frame').text) + + self.timestamp = int(child.find('timestamp').text) + + self.annotationId = int(child.find('index').text) + 1 + + self.globalID = local2global(self.semanticId, self.instanceId) + transform = self.parseOpencvMatrix(child.find('transform')) + self.R = transform[:3,:3] + self.T = transform[:3,3] + + def polar_decompose_rotation_scale(self): + Rm, Sm = polar(self.R) + scale = np.diag(Sm) + yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + + return scale, (yaw, pitch, roll) + + def get_state_array(self): + scale, (yaw, pitch, roll) = self.polar_decompose_rotation_scale() + center = StateSE3( + x=self.T[0], + y=self.T[1], + z=self.T[2], + roll=DEFAULT_ROLL, + pitch=DEFAULT_PITCH, + yaw=yaw, + ) + bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) + + return bounding_box_se3.array \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/labels.py b/d123/dataset/dataset_specific/kitti_360/labels.py new file mode 100644 index 00000000..38f8a91c --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/labels.py @@ -0,0 +1,168 @@ +#!/usr/bin/python +# +# KITTI-360 labels +# + +from collections import namedtuple + + +#-------------------------------------------------------------------------------- +# Definitions +#-------------------------------------------------------------------------------- + +# a label and all meta information +Label = namedtuple( 'Label' , [ + + 'name' , # The identifier of this label, e.g. 'car', 'person', ... . + # We use them to uniquely name a class + + 'id' , # An integer ID that is associated with this label. + # The IDs are used to represent the label in ground truth images + # An ID of -1 means that this label does not have an ID and thus + # is ignored when creating ground truth images (e.g. license plate). + # Do not modify these IDs, since exactly these IDs are expected by the + # evaluation server. + + 'kittiId' , # An integer ID that is associated with this label for KITTI-360 + # NOT FOR RELEASING + + 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create + # ground truth images with train IDs, using the tools provided in the + # 'preparation' folder. However, make sure to validate or submit results + # to our evaluation server using the regular IDs above! + # For trainIds, multiple labels might have the same ID. Then, these labels + # are mapped to the same class in the ground truth images. For the inverse + # mapping, we use the label that is defined first in the list below. + # For example, mapping all void-type classes to the same ID in training, + # might make sense for some approaches. + # Max value is 255! + + 'category' , # The name of the category that this label belongs to + + 'categoryId' , # The ID of this category. Used to create ground truth images + # on category level. + + 'hasInstances', # Whether this label distinguishes between single instances or not + + 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored + # during evaluations or not + + 'ignoreInInst', # Whether pixels having this class as ground truth label are ignored + # during evaluations of instance segmentation or not + + 'color' , # The color of this label + ] ) + + +#-------------------------------------------------------------------------------- +# A list of all labels +#-------------------------------------------------------------------------------- + +# Please adapt the train IDs as appropriate for your approach. +# Note that you might want to ignore labels with ID 255 during training. +# Further note that the current train IDs are only a suggestion. You can use whatever you like. +# Make sure to provide your results using the original IDs and not the training IDs. +# Note that many IDs are ignored in evaluation and thus you never need to predict these! + +labels = [ + # name id kittiId, trainId category catId hasInstances ignoreInEval ignoreInInst color + Label( 'unlabeled' , 0 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), + Label( 'ego vehicle' , 1 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), + Label( 'rectification border' , 2 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), + Label( 'out of roi' , 3 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), + Label( 'static' , 4 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), + Label( 'dynamic' , 5 , -1 , 255 , 'void' , 0 , False , True , True , (111, 74, 0) ), + Label( 'ground' , 6 , -1 , 255 , 'void' , 0 , False , True , True , ( 81, 0, 81) ), + Label( 'road' , 7 , 1 , 0 , 'flat' , 1 , False , False , False , (128, 64,128) ), + Label( 'sidewalk' , 8 , 3 , 1 , 'flat' , 1 , False , False , False , (244, 35,232) ), + Label( 'parking' , 9 , 2 , 255 , 'flat' , 1 , False , True , True , (250,170,160) ), + Label( 'rail track' , 10 , 10, 255 , 'flat' , 1 , False , True , True , (230,150,140) ), + Label( 'building' , 11 , 11, 2 , 'construction' , 2 , True , False , False , ( 70, 70, 70) ), + Label( 'wall' , 12 , 7 , 3 , 'construction' , 2 , False , False , False , (102,102,156) ), + Label( 'fence' , 13 , 8 , 4 , 'construction' , 2 , False , False , False , (190,153,153) ), + Label( 'guard rail' , 14 , 30, 255 , 'construction' , 2 , False , True , True , (180,165,180) ), + Label( 'bridge' , 15 , 31, 255 , 'construction' , 2 , False , True , True , (150,100,100) ), + Label( 'tunnel' , 16 , 32, 255 , 'construction' , 2 , False , True , True , (150,120, 90) ), + Label( 'pole' , 17 , 21, 5 , 'object' , 3 , True , False , True , (153,153,153) ), + Label( 'polegroup' , 18 , -1 , 255 , 'object' , 3 , False , True , True , (153,153,153) ), + Label( 'traffic light' , 19 , 23, 6 , 'object' , 3 , True , False , True , (250,170, 30) ), + Label( 'traffic sign' , 20 , 24, 7 , 'object' , 3 , True , False , True , (220,220, 0) ), + Label( 'vegetation' , 21 , 5 , 8 , 'nature' , 4 , False , False , False , (107,142, 35) ), + Label( 'terrain' , 22 , 4 , 9 , 'nature' , 4 , False , False , False , (152,251,152) ), + Label( 'sky' , 23 , 9 , 10 , 'sky' , 5 , False , False , False , ( 70,130,180) ), + Label( 'person' , 24 , 19, 11 , 'human' , 6 , True , False , False , (220, 20, 60) ), + Label( 'rider' , 25 , 20, 12 , 'human' , 6 , True , False , False , (255, 0, 0) ), + Label( 'car' , 26 , 13, 13 , 'vehicle' , 7 , True , False , False , ( 0, 0,142) ), + Label( 'truck' , 27 , 14, 14 , 'vehicle' , 7 , True , False , False , ( 0, 0, 70) ), + Label( 'bus' , 28 , 34, 15 , 'vehicle' , 7 , True , False , False , ( 0, 60,100) ), + Label( 'caravan' , 29 , 16, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0, 90) ), + Label( 'trailer' , 30 , 15, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0,110) ), + Label( 'train' , 31 , 33, 16 , 'vehicle' , 7 , True , False , False , ( 0, 80,100) ), + Label( 'motorcycle' , 32 , 17, 17 , 'vehicle' , 7 , True , False , False , ( 0, 0,230) ), + Label( 'bicycle' , 33 , 18, 18 , 'vehicle' , 7 , True , False , False , (119, 11, 32) ), + Label( 'garage' , 34 , 12, 2 , 'construction' , 2 , True , True , True , ( 64,128,128) ), + Label( 'gate' , 35 , 6 , 4 , 'construction' , 2 , False , True , True , (190,153,153) ), + Label( 'stop' , 36 , 29, 255 , 'construction' , 2 , True , True , True , (150,120, 90) ), + Label( 'smallpole' , 37 , 22, 5 , 'object' , 3 , True , True , True , (153,153,153) ), + Label( 'lamp' , 38 , 25, 255 , 'object' , 3 , True , True , True , (0, 64, 64) ), + Label( 'trash bin' , 39 , 26, 255 , 'object' , 3 , True , True , True , (0, 128,192) ), + Label( 'vending machine' , 40 , 27, 255 , 'object' , 3 , True , True , True , (128, 64, 0) ), + Label( 'box' , 41 , 28, 255 , 'object' , 3 , True , True , True , (64, 64,128) ), + Label( 'unknown construction' , 42 , 35, 255 , 'void' , 0 , False , True , True , (102, 0, 0) ), + Label( 'unknown vehicle' , 43 , 36, 255 , 'void' , 0 , False , True , True , ( 51, 0, 51) ), + Label( 'unknown object' , 44 , 37, 255 , 'void' , 0 , False , True , True , ( 32, 32, 32) ), + Label( 'license plate' , -1 , -1, -1 , 'vehicle' , 7 , False , True , True , ( 0, 0,142) ), +] + +#-------------------------------------------------------------------------------- +# Create dictionaries for a fast lookup +#-------------------------------------------------------------------------------- + +# Please refer to the main method below for example usages! + +# name to label object +name2label = { label.name : label for label in labels } +# id to label object +id2label = { label.id : label for label in labels } +# trainId to label object +trainId2label = { label.trainId : label for label in reversed(labels) } +# KITTI-360 ID to cityscapes ID +kittiId2label = { label.kittiId : label for label in labels } +# category to list of label objects +category2labels = {} +for label in labels: + category = label.category + if category in category2labels: + category2labels[category].append(label) + else: + category2labels[category] = [label] + +#-------------------------------------------------------------------------------- +# Assure single instance name +#-------------------------------------------------------------------------------- + +# returns the label name that describes a single instance (if possible) +# e.g. input | output +# ---------------------- +# car | car +# cargroup | car +# foo | None +# foogroup | None +# skygroup | None +def assureSingleInstanceName( name ): + # if the name is known, it is not a group + if name in name2label: + return name + # test if the name actually denotes a group + if not name.endswith("group"): + return None + # remove group + name = name[:-len("group")] + # test if the new name exists + if not name in name2label: + return None + # test if the new name denotes a label that actually has instances + if not name2label[name].hasInstances: + return None + # all good then + return name diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index b844fa03..e1c76c60 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -17,11 +17,7 @@ defaults: - datasets: - kitti360_dataset # - nuplan_private_dataset -<<<<<<< HEAD # - carla_dataset -======= - - carla_dataset ->>>>>>> dev_v0.0.6 # - wopd_dataset force_log_conversion: True diff --git a/d123/script/config/datasets/kitti360_dataset.yaml b/d123/script/config/datasets/kitti360_dataset.yaml index 418d36a4..17b9e863 100644 --- a/d123/script/config/datasets/kitti360_dataset.yaml +++ b/d123/script/config/datasets/kitti360_dataset.yaml @@ -1,4 +1,4 @@ -nuplan_dataset: +kitti360_dataset: _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter _convert_: 'all' diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml new file mode 100644 index 00000000..a505c4d2 --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti360_test +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + nuplan_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: all + splits: + - kitti360 + log_path: ${oc.env:KITTI360_DATA_ROOT} + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml new file mode 100644 index 00000000..406ccbe7 --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti360_test + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti360_test + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123/d123/script + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti360_test/2025.08.15.14.31.57 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml new file mode 100644 index 00000000..6c8e6217 --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti360_test diff --git a/exp/kitti360_test/2025.08.15.14.31.57/log.txt b/exp/kitti360_test/2025.08.15.14.31.57/log.txt new file mode 100644 index 00000000..984f705a --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.31.57/log.txt @@ -0,0 +1,10 @@ +2025-08-15 14:31:57,385 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:32:14,105 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:32:35,603 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:32:35,604 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:32:35,604 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:32:35,604 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:32:35,605 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml new file mode 100644 index 00000000..0fd6120d --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti360_test +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + kitti360_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: all + splits: + - kitti360 + log_path: ${oc.env:KITTI360_DATA_ROOT} + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml new file mode 100644 index 00000000..4eee2c65 --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti360_test + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti360_test + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123/d123/script + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti360_test/2025.08.15.14.36.40 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml new file mode 100644 index 00000000..6c8e6217 --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti360_test diff --git a/exp/kitti360_test/2025.08.15.14.36.40/log.txt b/exp/kitti360_test/2025.08.15.14.36.40/log.txt new file mode 100644 index 00000000..5f939dac --- /dev/null +++ b/exp/kitti360_test/2025.08.15.14.36.40/log.txt @@ -0,0 +1,10 @@ +2025-08-15 14:36:40,989 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:36:56,167 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:37:18,685 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:37:18,686 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:37:18,686 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:37:18,686 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:37:18,687 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml new file mode 100644 index 00000000..5ce47ba9 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti_test2 +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + kitti360_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: all + splits: + - kitti360 + log_path: ${oc.env:KITTI360_DATA_ROOT} + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml new file mode 100644 index 00000000..2d1c615a --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti_test2 + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti_test2 + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123 + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.40.29 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml new file mode 100644 index 00000000..676c1042 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.40.29/log.txt b/exp/kitti_test2/2025.08.15.14.40.29/log.txt new file mode 100644 index 00000000..8437d38e --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.40.29/log.txt @@ -0,0 +1,10 @@ +2025-08-15 14:40:29,427 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:40:42,538 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:41:00,324 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:41:00,325 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:41:00,325 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:41:00,325 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:41:00,326 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml new file mode 100644 index 00000000..de70bfa3 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti_test2 +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + nuplan_private_dataset: + _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter + _convert_: all + splits: + - nuplan_private_test + log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml new file mode 100644 index 00000000..cca44d29 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti_test2 + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti_test2 + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123 + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.43.13 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml new file mode 100644 index 00000000..676c1042 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.43.13/log.txt b/exp/kitti_test2/2025.08.15.14.43.13/log.txt new file mode 100644 index 00000000..fec50568 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.43.13/log.txt @@ -0,0 +1,12 @@ +2025-08-15 14:43:13,965 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:43:24,401 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:43:39,643 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:43:39,644 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:43:39,644 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:43:39,644 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:43:39,645 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... +2025-08-15 14:43:44,316 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:21} Building RawDataProcessor...DONE! +2025-08-15 14:43:44,316 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:34} Processing dataset: NuplanDataConverter diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml new file mode 100644 index 00000000..5ce47ba9 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti_test2 +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + kitti360_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: all + splits: + - kitti360 + log_path: ${oc.env:KITTI360_DATA_ROOT} + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml new file mode 100644 index 00000000..bd9698a2 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti_test2 + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti_test2 + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123 + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.46.49 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml new file mode 100644 index 00000000..676c1042 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.46.49/log.txt b/exp/kitti_test2/2025.08.15.14.46.49/log.txt new file mode 100644 index 00000000..00286f48 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.46.49/log.txt @@ -0,0 +1,10 @@ +2025-08-15 14:46:49,566 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:46:59,509 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:47:14,118 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:47:14,118 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:47:14,119 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:47:14,119 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:47:14,122 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml new file mode 100644 index 00000000..5ce47ba9 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti_test2 +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + kitti360_dataset: + _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _convert_: all + splits: + - kitti360 + log_path: ${oc.env:KITTI360_DATA_ROOT} + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml new file mode 100644 index 00000000..acff45d7 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti_test2 + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti_test2 + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123 + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.50.55 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml new file mode 100644 index 00000000..676c1042 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.50.55/log.txt b/exp/kitti_test2/2025.08.15.14.50.55/log.txt new file mode 100644 index 00000000..9902e0ce --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.50.55/log.txt @@ -0,0 +1,11 @@ +2025-08-15 14:50:55,950 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:51:19,466 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:51:52,653 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:51:52,653 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... +2025-08-15 14:51:52,655 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:17} Instantiating dataset type: {'_target_': 'd123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter', '_convert_': 'all', 'splits': ['kitti360'], 'log_path': '${oc.env:KITTI360_DATA_ROOT}', 'data_converter_config': {'_target_': 'd123.dataset.dataset_specific.raw_data_converter.DataConverterConfig', '_convert_': 'all', 'output_path': '${d123_data_root}', 'force_log_conversion': '${force_log_conversion}', 'force_map_conversion': '${force_map_conversion}', 'camera_store_option': 'path', 'lidar_store_option': 'path'}} diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml new file mode 100644 index 00000000..de70bfa3 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml @@ -0,0 +1,60 @@ +worker: + _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed + _convert_: all + master_node_ip: null + threads_per_node: null + debug_mode: false + log_to_driver: true + logs_subdir: logs + use_distributed: false +scene_filter: + _target_: d123.dataset.scene.scene_filter.SceneFilter + _convert_: all + split_types: null + split_names: null + log_names: null + map_names: null + scene_tokens: null + timestamp_threshold_s: null + ego_displacement_minimum_m: null + duration_s: 9.2 + history_s: 3.0 +scene_builder: + _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder + _convert_: all + dataset_path: ${d123_data_root} +distributed_timeout_seconds: 7200 +selected_simulation_metrics: null +verbose: false +logger_level: info +logger_format_string: null +max_number_of_workers: null +gpu: true +seed: 42 +d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} +d123_maps_root: ${oc.env:D123_MAPS_ROOT} +d123_data_root: ${oc.env:D123_DATA_ROOT} +nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} +nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} +nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} +experiment_name: kitti_test2 +date_format: '%Y.%m.%d.%H.%M.%S' +experiment_uid: ${now:${date_format}} +output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} +force_log_conversion: true +force_map_conversion: false +datasets: + nuplan_private_dataset: + _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter + _convert_: all + splits: + - nuplan_private_test + log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits + data_converter_config: + _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _convert_: all + output_path: ${d123_data_root} + force_log_conversion: ${force_log_conversion} + force_map_conversion: ${force_map_conversion} + camera_store_option: path + lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml new file mode 100644 index 00000000..d053f8e7 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml @@ -0,0 +1,177 @@ +hydra: + run: + dir: ${output_dir} + sweep: + dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} + subdir: ${hydra.job.num} + launcher: + _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher + sweeper: + _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper + max_batch_size: null + params: null + help: + app_name: ${hydra.job.name} + header: '${hydra.help.app_name} is powered by Hydra. + + ' + footer: 'Powered by Hydra (https://hydra.cc) + + Use --hydra-help to view Hydra specific help + + ' + template: '${hydra.help.header} + + == Configuration groups == + + Compose your configuration from those groups (group=option) + + + $APP_CONFIG_GROUPS + + + == Config == + + Override anything in the config (foo.bar=value) + + + $CONFIG + + + ${hydra.help.footer} + + ' + hydra_help: + template: 'Hydra (${hydra.runtime.version}) + + See https://hydra.cc for more info. + + + == Flags == + + $FLAGS_HELP + + + == Configuration groups == + + Compose your configuration from those groups (For example, append hydra/job_logging=disabled + to command line) + + + $HYDRA_CONFIG_GROUPS + + + Use ''--cfg hydra'' to Show the Hydra config. + + ' + hydra_help: ??? + hydra_logging: + version: 1 + formatters: + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + root: + level: INFO + handlers: + - console + disable_existing_loggers: false + job_logging: + version: 1 + formatters: + simple: + format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' + colorlog: + (): colorlog.ColoredFormatter + format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] + - %(message)s' + log_colors: + DEBUG: purple + INFO: green + WARNING: yellow + ERROR: red + CRITICAL: red + handlers: + console: + class: logging.StreamHandler + formatter: colorlog + stream: ext://sys.stdout + file: + class: logging.FileHandler + formatter: simple + filename: ${hydra.job.name}.log + root: + level: INFO + handlers: + - console + - file + disable_existing_loggers: false + env: {} + mode: RUN + searchpath: + - pkg://d123.script.config + - pkg://d123.script.config.common + callbacks: {} + output_subdir: ${output_dir}/code/hydra + overrides: + hydra: + - hydra.mode=RUN + task: + - experiment_name=kitti_test2 + job: + name: run_dataset_conversion + chdir: false + override_dirname: experiment_name=kitti_test2 + id: ??? + num: ??? + config_name: default_dataset_conversion + env_set: {} + env_copy: [] + config: + override_dirname: + kv_sep: '=' + item_sep: ',' + exclude_keys: [] + runtime: + version: 1.3.2 + version_base: '1.3' + cwd: /home/jbwang/d123 + config_sources: + - path: hydra.conf + schema: pkg + provider: hydra + - path: /home/jbwang/d123/d123/script/config/dataset_conversion + schema: file + provider: main + - path: hydra_plugins.hydra_colorlog.conf + schema: pkg + provider: hydra-colorlog + - path: d123.script.config + schema: pkg + provider: hydra.searchpath in main + - path: d123.script.config.common + schema: pkg + provider: hydra.searchpath in main + - path: '' + schema: structured + provider: schema + output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.52.39 + choices: + scene_builder: default_scene_builder + scene_filter: all_scenes + worker: ray_distributed + hydra/env: default + hydra/callbacks: null + hydra/job_logging: colorlog + hydra/hydra_logging: colorlog + hydra/hydra_help: default + hydra/help: default + hydra/sweeper: basic + hydra/launcher: basic + hydra/output: default + verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml new file mode 100644 index 00000000..676c1042 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml @@ -0,0 +1 @@ +- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.52.39/log.txt b/exp/kitti_test2/2025.08.15.14.52.39/log.txt new file mode 100644 index 00000000..e2585299 --- /dev/null +++ b/exp/kitti_test2/2025.08.15.14.52.39/log.txt @@ -0,0 +1,11 @@ +2025-08-15 14:52:39,717 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... +2025-08-15 14:53:02,994 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! +2025-08-15 14:53:36,548 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed +2025-08-15 14:53:36,549 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 +Number of CPUs per node: 64 +Number of GPUs per node: 8 +Number of threads across all nodes: 64 +2025-08-15 14:53:36,549 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! +2025-08-15 14:53:36,549 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... +2025-08-15 14:53:36,550 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... +2025-08-15 14:53:36,550 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:17} Instantiating dataset type: {'_target_': 'd123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter', '_convert_': 'all', 'splits': ['nuplan_private_test'], 'log_path': '${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits', 'data_converter_config': {'_target_': 'd123.dataset.dataset_specific.raw_data_converter.DataConverterConfig', '_convert_': 'all', 'output_path': '${d123_data_root}', 'force_log_conversion': '${force_log_conversion}', 'force_map_conversion': '${force_map_conversion}', 'camera_store_option': 'path', 'lidar_store_option': 'path'}} diff --git a/jbwang_test.py b/jbwang_test.py index ac3afac5..ff320df9 100644 --- a/jbwang_test.py +++ b/jbwang_test.py @@ -65,4 +65,21 @@ # log_name = "1230_asd_" # for i in range(20): # a = create_token(f"{log_name}_{i}") -# print(a) +# print(a)ee + + +import numpy as np +from pathlib import Path +a = np.loadtxt("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/0000000000.txt") +b = np.loadtxt("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0018_sync/poses.txt") +data = b +ts = data[:, 0].astype(np.int32) +poses = np.reshape(data[:, 1:], (-1, 3, 4)) +poses = np.concatenate((poses, np.tile(np.array([0, 0, 0, 1]).reshape(1,1,4),(poses.shape[0],1,1))), 1) +print(a) +print(b.shape) +print(ts.shape) +print(poses.shape) + +ccc = Path("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/") +print(len(list(ccc.glob("*.txt")))) \ No newline at end of file diff --git a/jbwang_test2.py b/jbwang_test2.py new file mode 100644 index 00000000..b1229356 --- /dev/null +++ b/jbwang_test2.py @@ -0,0 +1,70 @@ +# import numpy as np +# import pickle + +# # path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" +# # a = np.fromfile(path, dtype=np.float32) + +# # print(a.shape) +# # print(a[:10]) + +# # path2 = "/nas/datasets/KITTI-360/calibration/calib_cam_to_pose.txt" +# # c = np.loadtxt(path2) +# # print(c) + +# import open3d as o3d +# import numpy as np + +# def read_ply_file(file_path): +# # 读取 PLY 文件 +# pcd = o3d.io.read_point_cloud(file_path) +# print(len(pcd.points), len(pcd.colors)) +# # 提取顶点信息 +# points = np.asarray(pcd.points) # x, y, z +# colors = np.asarray(pcd.colors) # red, green, blue +# # semantics = np.asarray(pcd.semantic) # semanticID, instanceID, isVisible, confidence + +# # 将所有信息合并到一个数组中 +# vertices = np.hstack((points, colors)) + +# return vertices + +# # 示例用法 +# file_path = '/nas/datasets/KITTI-360/data_3d_semantics/train/2013_05_28_drive_0000_sync/static/0000000002_0000000385.ply' # 替换为你的 PLY 文件路径 +# vertices = read_ply_file(file_path) + +# # 打印前几个顶点信息 +# print("顶点信息 (前5个顶点):") +# print(vertices[:5]) + +import numpy as np +from scipy.linalg import polar +from scipy.spatial.transform import Rotation as R + +def polar_decompose_rotation_scale(A: np.ndarray): + """ + A: 3x3 (含旋转+缩放+剪切) + 返回: + Rm: 纯旋转 + Sm: 对称正定 (缩放+剪切) + scale: 近似轴缩放(从 Sm 特征值开方或对角提取;若存在剪切需谨慎) + yaw,pitch,roll: 使用 ZYX 序列 (常对应 yaw(Z), pitch(Y), roll(X)) + """ + Rm, Sm = polar(A) # A = Rm @ Sm + # 近似各向缩放(若无剪切): + scale = np.diag(Sm) + # 欧拉角 + yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + return { + "R": Rm, + "S": Sm, + "scale_diag": scale, + "yaw_pitch_roll": (yaw, pitch, roll), + } + +M = np.array([ + [-3.97771668e+00, -1.05715942e+00,-2.18206085e-02], + [2.43555284e+00, -1.72707462e+00, -1.03932284e-02], + [-4.41359095e-02, -2.94448305e-02, 1.39303744e+00], +]) +out = polar_decompose_rotation_scale(M) +print(out) \ No newline at end of file diff --git a/notebooks/dataset/jbwang_test.py b/notebooks/dataset/jbwang_test.py index caaa3201..0996734b 100644 --- a/notebooks/dataset/jbwang_test.py +++ b/notebooks/dataset/jbwang_test.py @@ -1,5 +1,5 @@ -s3_uri = "/data/jbwang/d123/data/nuplan_mini_train/2021.10.11.07.12.18_veh-50_00211_00304.arrow" -# s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" +# s3_uri = "/data/jbwang/d123/data/nuplan_mini_train/2021.10.11.07.12.18_veh-50_00211_00304.arrow" +s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" # s3_uri = "/data/jbwang/d123/data/carla/_Rep0_routes_validation1_route0_07_23_14_33_15.arrow" # s3_uri = "/data/jbwang/d123/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow" @@ -33,10 +33,11 @@ for col in table.column_names: if col == "lidar": continue - print(f"Column: {col}, Type: {table.schema.field(col).type}") - tokens = table[col] # 或 table.column("token") + print(f"Column : {col}, Type: {table.schema.field(col).type}") + # tokens = table[col] # 或 table.column("token") + # print(tokens) # print(len(tokens)) - print(tokens.slice(0, 4).to_pylist()) + # print(tokens.slice(0, 100).to_pylist()) # print(table["traffic_light_ids"]) timer.log("3. Table created") # Save locally From 5b8a2074e5e014536ae9408a4955ff167fcf536e Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Fri, 15 Aug 2025 15:00:52 +0800 Subject: [PATCH 03/32] delete exp/* --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 22cfdee9..0baa64c4 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,4 @@ *.csv *.log *.mp4 +exp/* From 0db8c31aea69a814e66a003e6a970c6b776b1d17 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Fri, 15 Aug 2025 15:54:04 +0800 Subject: [PATCH 04/32] fix hydra and other bugs --- .../kitti_360/kitti_360_data_converter.py | 30 +++++++++++-------- .../kitti_360/kitti_360_helper.py | 6 ++-- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index c79ce0b2..02dc0add 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -28,8 +28,7 @@ from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata - -from kitti_360_helper import KITTI360Bbox3D +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D KITTI360_DT: Final[float] = 0.1 SORT_BY_TIMESTAMP: Final[bool] = True @@ -62,11 +61,10 @@ PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB +#TODO check all paths KITTI360_REQUIRED_MODALITY_ROOTS: Dict[str, Path] = { DIR_2D_RAW: PATH_2D_RAW_ROOT, - # DIR_2D_SMT: PATH_2D_SMT_ROOT, - # DIR_3D_RAW: PATH_3D_RAW_ROOT, - # DIR_3D_SMT: PATH_3D_SMT_ROOT, + DIR_3D_RAW: PATH_3D_RAW_ROOT, # DIR_3D_BBOX: PATH_3D_BBOX_ROOT, # DIR_POSES: PATH_POSES_ROOT, } @@ -138,6 +136,7 @@ def _collect_log_paths(self) -> Dict[str, List[Path]]: # f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " # f"Root: {KITTI360_DATA_ROOT}" # ) + print("valid",valid_seqs) return {"kitti360": valid_seqs} def get_available_splits(self) -> List[str]: @@ -244,7 +243,7 @@ def convert_kitti360_log_to_arrow( return [] -def get_kitti360_camera_metadata() -> Dict[str, CameraMetadata]: +def get_kitti360_camera_metadata() -> Dict[CameraType, CameraMetadata]: persp = PATH_CALIB_ROOT / "perspective.txt" @@ -265,7 +264,7 @@ def get_kitti360_camera_metadata() -> Dict[str, CameraMetadata]: log_cam_infos: Dict[str, CameraMetadata] = {} for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): - log_cam_infos[cam_type.serialize()] = CameraMetadata( + log_cam_infos[cam_type] = CameraMetadata( camera_type=cam_type, width=result[cam_name]["wh"][0], height=result[cam_name]["wh"][1], @@ -283,7 +282,7 @@ def _read_projection_matrix(p_line: str) -> np.ndarray: K = P[:, :3] return K -def get_kitti360_lidar_metadata(log_name: str) -> Dict[LiDARType, LiDARMetadata]: +def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" @@ -343,7 +342,12 @@ def _write_recording_table( } if data_converter_config.lidar_store_option is not None: - row_data["lidar"] = [_extract_lidar(log_name, idx, data_converter_config)] + lidar_data_dict = _extract_lidar(log_name, idx, data_converter_config) + for lidar_type, lidar_data in lidar_data_dict.items(): + if lidar_data is not None: + row_data[lidar_type.serialize()] = [lidar_data] + else: + row_data[lidar_type.serialize()] = [None] if data_converter_config.camera_store_option is not None: camera_data_dict = _extract_cameras(log_name, idx, data_converter_config) @@ -363,7 +367,7 @@ def _write_recording_table( recording_table = recording_table.sort_by([("timestamp", "ascending")]) write_arrow_table(recording_table, log_file_path) -#TODO default timestamps +#TODO default timestamps and Synchronization all other parts def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: # unix ts_file = PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt" @@ -501,9 +505,9 @@ def _extract_detections( return detections_states, detections_velocity, detections_tokens, detections_types #TODO lidar extraction -def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Optional[str]: +def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: lidar: Optional[str] = None - lidar_full_path = DIR_3D_RAW / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" + lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" if lidar_full_path.exists(): if data_converter_config.lidar_store_option == "path": lidar = f"/data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" @@ -511,7 +515,7 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverter raise NotImplementedError("Binary lidar storage is not implemented.") else: raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") - return {LiDARType.LIDAR_TOP: lidar} if lidar else None + return {LiDARType.LIDAR_TOP: lidar} #TODO check camera extrinsic now is from camera to pose def _extract_cameras( diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index da79cf3e..c86d9604 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -1,13 +1,13 @@ import numpy as np from collections import defaultdict -from labels import kittiId2label from scipy.linalg import polar from scipy.spatial.transform import Rotation as R from d123.common.geometry.base import StateSE3 from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 +from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label DEFAULT_ROLL = 0.0 DEFAULT_PITCH = 0.0 @@ -93,8 +93,8 @@ def get_state_array(self): x=self.T[0], y=self.T[1], z=self.T[2], - roll=DEFAULT_ROLL, - pitch=DEFAULT_PITCH, + roll=roll, + pitch=pitch, yaw=yaw, ) bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) From 2d129ee0df6980d219e529a57a60ba6e487a47d6 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Sun, 17 Aug 2025 12:27:05 +0800 Subject: [PATCH 05/32] add pose calibration to align with nuplan --- d123/script/run_viser.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/d123/script/run_viser.py b/d123/script/run_viser.py index e682a96e..e977c669 100644 --- a/d123/script/run_viser.py +++ b/d123/script/run_viser.py @@ -19,10 +19,13 @@ def main(cfg: DictConfig) -> None: worker = build_worker(cfg) scene_filter = build_scene_filter(cfg.scene_filter) + logger.info(f"Scene filter: {scene_filter}") + logger.info(f"Using {cfg.scene_builder}") + scene_filter.duration_s = 50 scene_builder = build_scene_builder(cfg.scene_builder) scenes = scene_builder.get_scenes(scene_filter, worker=worker) - - ViserVisualizationServer(scenes=scenes) + logger.info(f"Found {len(scenes)} scenes.") + ViserVisualizationServer(scenes=scenes,scene_index=0) if __name__ == "__main__": From 7dd70e6e0713628cb8cf19c9c282814ca04d2e5b Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Sun, 17 Aug 2025 12:30:07 +0800 Subject: [PATCH 06/32] add pose calibration to align with nuplan --- d123/dataset/arrow/conversion.py | 1 + .../kitti_360/kitti_360_data_converter.py | 55 ++++-- jbwang_test.py | 43 +++-- jbwang_test2.py | 162 +++++++++++------- notebooks/dataset/jbwang_test.py | 7 +- 5 files changed, 172 insertions(+), 96 deletions(-) diff --git a/d123/dataset/arrow/conversion.py b/d123/dataset/arrow/conversion.py index d9afba6f..69488545 100644 --- a/d123/dataset/arrow/conversion.py +++ b/d123/dataset/arrow/conversion.py @@ -33,6 +33,7 @@ DATASET_SENSOR_ROOT: Dict[str, Path] = { "nuplan": Path(os.environ["NUPLAN_DATA_ROOT"]) / "nuplan-v1.1" / "sensor_blobs", "carla": Path(os.environ["CARLA_DATA_ROOT"]) / "sensor_blobs", + "kitti360": Path(os.environ["KITTI360_DATA_ROOT"]), } diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 02dc0add..7e13b905 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -12,6 +12,7 @@ import xml.etree.ElementTree as ET import pyarrow as pa from PIL import Image +import logging from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map @@ -61,12 +62,11 @@ PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB -#TODO check all paths KITTI360_REQUIRED_MODALITY_ROOTS: Dict[str, Path] = { DIR_2D_RAW: PATH_2D_RAW_ROOT, DIR_3D_RAW: PATH_3D_RAW_ROOT, - # DIR_3D_BBOX: PATH_3D_BBOX_ROOT, - # DIR_POSES: PATH_POSES_ROOT, + DIR_POSES: PATH_POSES_ROOT, + DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } #TODO @@ -79,6 +79,20 @@ "pedestrian": DetectionType.PEDESTRIAN, } +KITTI3602NUPLAN_IMU_CALIBRATION = np.array([ + [1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1], + ], dtype=np.float64) + +KITTI3602NUPLAN_LIDAR_CALIBRATION = np.array([ + [0, -1, 0, 0], + [1, 0, 0, 0], + [0, 0, 1, 0], + [0, 0, 0, 1], + ], dtype=np.float64) + def create_token(input_data: str) -> str: # TODO: Refactor this function. @@ -120,23 +134,30 @@ def _collect_log_paths(self) -> Dict[str, List[Path]]: # Enumerate candidate sequences from data_2d_raw candidates = sorted(p for p in PATH_2D_RAW_ROOT.iterdir() if p.is_dir() and p.name.endswith("_sync")) + def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: + if modality_name == DIR_3D_BBOX: + # expected: data_3d_bboxes/train/.xml + xml_path = root / f"{seq_name}.xml" + return xml_path.exists() + else: + return (root / seq_name).exists() + valid_seqs: List[Path] = [] for seq_dir in candidates: seq_name = seq_dir.name missing_modalities = [ modality_name for modality_name, root in KITTI360_REQUIRED_MODALITY_ROOTS.items() - if not (root / seq_name).exists() + if not _has_modality(seq_name, modality_name, root) ] if not missing_modalities: valid_seqs.append(seq_dir) #KITTI360_DATA_ROOT / DIR_2D_RAW /seq_name - #TODO warnings - # else: - # warnings.warn( - # f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " - # f"Root: {KITTI360_DATA_ROOT}" - # ) - print("valid",valid_seqs) + else: + logging.info( + f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " + f"Root: {KITTI360_DATA_ROOT}" + ) + logging.info(f"vadid sequences found: {valid_seqs}") return {"kitti360": valid_seqs} def get_available_splits(self) -> List[str]: @@ -144,7 +165,7 @@ def get_available_splits(self) -> List[str]: return ["kitti360"] def convert_maps(self, worker: WorkerPool) -> None: - print("KITTI-360 does not provide standard maps. Skipping map conversion.") + logging.info("KITTI-360 does not provide standard maps. Skipping map conversion.") return None def convert_logs(self, worker: WorkerPool) -> None: @@ -184,6 +205,7 @@ def convert_kitti360_log_to_arrow( if not log_file_path.parent.exists(): log_file_path.parent.mkdir(parents=True, exist_ok=True) + #TODO location metadata = LogMetadata( dataset="kitti360", log_name=log_name, @@ -300,13 +322,17 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: values = list(map(float, image_00.strip().split()[1:])) matrix = np.array(values).reshape(3, 4) cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) + cam2velo = KITTI3602NUPLAN_LIDAR_CALIBRATION @ cam2velo + extrinsic = cam2velo @ np.linalg.inv(cam2pose) metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, lidar_index=Kitti360LidarIndex, + #TODO extrinsic needed to be same with nuplan extrinsic=extrinsic, ) return metadata @@ -367,7 +393,7 @@ def _write_recording_table( recording_table = recording_table.sort_by([("timestamp", "ascending")]) write_arrow_table(recording_table, log_file_path) -#TODO default timestamps and Synchronization all other parts +#TODO default timestamps and Synchronization all other sequences def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: # unix ts_file = PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt" @@ -504,7 +530,7 @@ def _extract_detections( return detections_states, detections_velocity, detections_tokens, detections_types -#TODO lidar extraction +#TODO lidar extraction now only velo def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: lidar: Optional[str] = None lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" @@ -541,6 +567,7 @@ def _extract_cameras( values = list(map(float, parts[1:])) matrix = np.array(values).reshape(3, 4) cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose if data_converter_config.camera_store_option == "path": camera_data = str(img_path_png), cam2pose.flatten().tolist() diff --git a/jbwang_test.py b/jbwang_test.py index ff320df9..e42f512a 100644 --- a/jbwang_test.py +++ b/jbwang_test.py @@ -68,18 +68,31 @@ # print(a)ee -import numpy as np -from pathlib import Path -a = np.loadtxt("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/0000000000.txt") -b = np.loadtxt("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0018_sync/poses.txt") -data = b -ts = data[:, 0].astype(np.int32) -poses = np.reshape(data[:, 1:], (-1, 3, 4)) -poses = np.concatenate((poses, np.tile(np.array([0, 0, 0, 1]).reshape(1,1,4),(poses.shape[0],1,1))), 1) -print(a) -print(b.shape) -print(ts.shape) -print(poses.shape) - -ccc = Path("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/") -print(len(list(ccc.glob("*.txt")))) \ No newline at end of file +# import numpy as np +# from pathlib import Path +# a = np.loadtxt("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/0000000000.txt") +# b = np.loadtxt("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0018_sync/poses.txt") +# data = b +# ts = data[:, 0].astype(np.int32) +# poses = np.reshape(data[:, 1:], (-1, 3, 4)) +# poses = np.concatenate((poses, np.tile(np.array([0, 0, 0, 1]).reshape(1,1,4),(poses.shape[0],1,1))), 1) +# print(a) +# print(b.shape) +# print(ts.shape) +# print(poses.shape) + +# ccc = Path("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/") +# print(len(list(ccc.glob("*.txt")))) + + + + +from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import convert_nuplan_map_to_gpkg + +from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig + +MAP_LOCATIONS = {"sg-one-north", "us-ma-boston", "us-nv-las-vegas-strip", "us-pa-pittsburgh-hazelwood"} +maps = list(MAP_LOCATIONS) + +data_conveter_config = DataConverterConfig(output_path = "/nas/datasets/nuplan/maps") +convert_nuplan_map_to_gpkg(maps,data_conveter_config) \ No newline at end of file diff --git a/jbwang_test2.py b/jbwang_test2.py index b1229356..b406c52c 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -1,70 +1,104 @@ -# import numpy as np -# import pickle +# # import numpy as np +# # import pickle + +# # # path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" +# # # a = np.fromfile(path, dtype=np.float32) + +# # # print(a.shape) +# # # print(a[:10]) + +# # # path2 = "/nas/datasets/KITTI-360/calibration/calib_cam_to_pose.txt" +# # # c = np.loadtxt(path2) +# # # print(c) + +# # import open3d as o3d +# # import numpy as np + +# # def read_ply_file(file_path): +# # # 读取 PLY 文件 +# # pcd = o3d.io.read_point_cloud(file_path) +# # print(len(pcd.points), len(pcd.colors)) +# # # 提取顶点信息 +# # points = np.asarray(pcd.points) # x, y, z +# # colors = np.asarray(pcd.colors) # red, green, blue +# # # semantics = np.asarray(pcd.semantic) # semanticID, instanceID, isVisible, confidence + +# # # 将所有信息合并到一个数组中 +# # vertices = np.hstack((points, colors)) -# # path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" -# # a = np.fromfile(path, dtype=np.float32) +# # return vertices -# # print(a.shape) -# # print(a[:10]) +# # # 示例用法 +# # file_path = '/nas/datasets/KITTI-360/data_3d_semantics/train/2013_05_28_drive_0000_sync/static/0000000002_0000000385.ply' # 替换为你的 PLY 文件路径 +# # vertices = read_ply_file(file_path) -# # path2 = "/nas/datasets/KITTI-360/calibration/calib_cam_to_pose.txt" -# # c = np.loadtxt(path2) -# # print(c) +# # # 打印前几个顶点信息 +# # print("顶点信息 (前5个顶点):") +# # print(vertices[:5]) -# import open3d as o3d # import numpy as np +# from scipy.linalg import polar +# from scipy.spatial.transform import Rotation as R + +# def polar_decompose_rotation_scale(A: np.ndarray): +# """ +# A: 3x3 (含旋转+缩放+剪切) +# 返回: +# Rm: 纯旋转 +# Sm: 对称正定 (缩放+剪切) +# scale: 近似轴缩放(从 Sm 特征值开方或对角提取;若存在剪切需谨慎) +# yaw,pitch,roll: 使用 ZYX 序列 (常对应 yaw(Z), pitch(Y), roll(X)) +# """ +# Rm, Sm = polar(A) # A = Rm @ Sm +# # 近似各向缩放(若无剪切): +# scale = np.diag(Sm) +# # 欧拉角 +# yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) +# return { +# "R": Rm, +# "S": Sm, +# "scale_diag": scale, +# "yaw_pitch_roll": (yaw, pitch, roll), +# } + +# M = np.array([ +# [-3.97771668e+00, -1.05715942e+00,-2.18206085e-02], +# [2.43555284e+00, -1.72707462e+00, -1.03932284e-02], +# [-4.41359095e-02, -2.94448305e-02, 1.39303744e+00], +# ]) +# out = polar_decompose_rotation_scale(M) +# print(out) + + +import glob +import os +import cv2 + +def to_video(folder_path, fps=15, downsample=2): + imgs_path = glob.glob(os.path.join(folder_path, '*png*')) + # imgs_path = sorted(imgs_path)[:19] + imgs_path = sorted(imgs_path)[:700:1] + img_array = [] + for img_path in imgs_path: + img = cv2.imread(img_path) + height, width, channel = img.shape + img = cv2.resize(img, (width // downsample, height // + downsample), interpolation=cv2.INTER_AREA) + height, width, channel = img.shape + size = (width, height) + img_array.append(img) + + # media.write_video(os.path.join(folder_path, 'video.mp4'), img_array, fps=10) + mp4_path = os.path.join("/data/jbwang/d123/video/", 'video_one_episode.mp4') + if os.path.exists(mp4_path): + os.remove(mp4_path) + out = cv2.VideoWriter( + mp4_path, + cv2.VideoWriter_fourcc(*'DIVX'), fps, size + ) + for i in range(len(img_array)): + out.write(img_array[i]) + out.release() + +to_video("/nas/datasets/KITTI-360/2013_05_28_drive_0000_sync/image_00/data_rect/") -# def read_ply_file(file_path): -# # 读取 PLY 文件 -# pcd = o3d.io.read_point_cloud(file_path) -# print(len(pcd.points), len(pcd.colors)) -# # 提取顶点信息 -# points = np.asarray(pcd.points) # x, y, z -# colors = np.asarray(pcd.colors) # red, green, blue -# # semantics = np.asarray(pcd.semantic) # semanticID, instanceID, isVisible, confidence - -# # 将所有信息合并到一个数组中 -# vertices = np.hstack((points, colors)) - -# return vertices - -# # 示例用法 -# file_path = '/nas/datasets/KITTI-360/data_3d_semantics/train/2013_05_28_drive_0000_sync/static/0000000002_0000000385.ply' # 替换为你的 PLY 文件路径 -# vertices = read_ply_file(file_path) - -# # 打印前几个顶点信息 -# print("顶点信息 (前5个顶点):") -# print(vertices[:5]) - -import numpy as np -from scipy.linalg import polar -from scipy.spatial.transform import Rotation as R - -def polar_decompose_rotation_scale(A: np.ndarray): - """ - A: 3x3 (含旋转+缩放+剪切) - 返回: - Rm: 纯旋转 - Sm: 对称正定 (缩放+剪切) - scale: 近似轴缩放(从 Sm 特征值开方或对角提取;若存在剪切需谨慎) - yaw,pitch,roll: 使用 ZYX 序列 (常对应 yaw(Z), pitch(Y), roll(X)) - """ - Rm, Sm = polar(A) # A = Rm @ Sm - # 近似各向缩放(若无剪切): - scale = np.diag(Sm) - # 欧拉角 - yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) - return { - "R": Rm, - "S": Sm, - "scale_diag": scale, - "yaw_pitch_roll": (yaw, pitch, roll), - } - -M = np.array([ - [-3.97771668e+00, -1.05715942e+00,-2.18206085e-02], - [2.43555284e+00, -1.72707462e+00, -1.03932284e-02], - [-4.41359095e-02, -2.94448305e-02, 1.39303744e+00], -]) -out = polar_decompose_rotation_scale(M) -print(out) \ No newline at end of file diff --git a/notebooks/dataset/jbwang_test.py b/notebooks/dataset/jbwang_test.py index 0996734b..c2cabfbe 100644 --- a/notebooks/dataset/jbwang_test.py +++ b/notebooks/dataset/jbwang_test.py @@ -1,7 +1,8 @@ # s3_uri = "/data/jbwang/d123/data/nuplan_mini_train/2021.10.11.07.12.18_veh-50_00211_00304.arrow" -s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" +# s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" # s3_uri = "/data/jbwang/d123/data/carla/_Rep0_routes_validation1_route0_07_23_14_33_15.arrow" # s3_uri = "/data/jbwang/d123/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow" +s3_uri = "/data/jbwang/d123/data2/kitti360_c2e_train/2013_05_28_drive_0000_sync_c2e.arrow" import pyarrow as pa import pyarrow.fs as fs @@ -34,9 +35,9 @@ if col == "lidar": continue print(f"Column : {col}, Type: {table.schema.field(col).type}") - # tokens = table[col] # 或 table.column("token") + tokens = table[col] # 或 table.column("token") # print(tokens) - # print(len(tokens)) + print(len(tokens)) # print(tokens.slice(0, 100).to_pylist()) # print(table["traffic_light_ids"]) timer.log("3. Table created") From 7110af5ef464912c1fe51a673c503f8a293a9d79 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Mon, 18 Aug 2025 12:37:43 +0800 Subject: [PATCH 07/32] finish dynamic car and static car remains some bug(start and end frame) --- .../kitti_360/kitti_360_data_converter.py | 75 ++++++++++++++---- .../kitti_360/kitti_360_helper.py | 58 +++++++++++--- .../default_dataset_conversion.yaml | 4 - d123/script/run_viser.py | 9 +-- jbwang_test2.py | 79 +++++++++++-------- 5 files changed, 159 insertions(+), 66 deletions(-) diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 7e13b905..efc0bdf2 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -7,6 +7,7 @@ from typing import Any, Dict, Final, List, Optional, Tuple, Union import numpy as np +from collections import defaultdict import datetime import hashlib import xml.etree.ElementTree as ET @@ -69,7 +70,7 @@ DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } -#TODO +#TODO now only parts of labels are used KIITI360_DETECTION_NAME_DICT = { "truck": DetectionType.VEHICLE, "bus": DetectionType.VEHICLE, @@ -332,7 +333,6 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, lidar_index=Kitti360LidarIndex, - #TODO extrinsic needed to be same with nuplan extrinsic=extrinsic, ) return metadata @@ -345,8 +345,11 @@ def _write_recording_table( ) -> None: ts_list = _read_timestamps(log_name) - ego_state_all = _extract_ego_state_all(log_name) + #TODO + print("extracting detections...") detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list)) + print("extracting states...") + ego_state_all = _extract_ego_state_all(log_name) with pa.OSFile(str(log_file_path), "wb") as sink: with pa.ipc.new_file(sink, recording_schema) as writer: @@ -437,6 +440,7 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: oxts_path_file = oxts_path / f"{int(idx):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) + #TODO check roll, pitch, yaw roll, pitch, yaw = oxts_data[3:6] vehicle_parameters = get_kitti360_station_wagon_parameters() @@ -479,7 +483,7 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: ) return ego_state_all -#TODO now only divided by data_3d_semantics +#TODO # We may distinguish between image and lidar detections # besides, now it is based only on start and end frame def _extract_detections( @@ -499,6 +503,18 @@ def _extract_detections( tree = ET.parse(bbox_3d_path) root = tree.getroot() + dynamic_groups: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + + lidra_data_all = [] + for index in range(ts_len): + lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{index:010d}.bin" + if not lidar_full_path.exists(): + logging.warning(f"LiDAR file not found for frame {index}: {lidar_full_path}") + continue + lidar_data = np.fromfile(lidar_full_path, dtype=np.float32) + lidar_data = lidar_data.reshape(-1, 4)[:, :3] # Keep only x, y, z coordinates + lidra_data_all.append(lidar_data) + for child in root: label = child.find('label').text if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): @@ -506,27 +522,57 @@ def _extract_detections( obj = KITTI360Bbox3D() obj.parseBbox(child) - # static + #static object if obj.timestamp == -1: start_frame = obj.start_frame end_frame = obj.end_frame for frame in range(start_frame, end_frame + 1): - #TODO check if valid in each frame - if frame < 0 or frame >= ts_len: - continue - #TODO check yaw + lidar_data = lidra_data_all[frame] + #TODO check yaw and box visible + # if obj.box_visible_in_point_cloud(lidar_data): detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append([0.0, 0.0, 0.0]) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[label])) - # dynamic + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.label])) else: + ann_id = obj.annotationId + dynamic_groups[ann_id].append(obj) + + # dynamic object + for ann_id, obj_list in dynamic_groups.items(): + obj_list.sort(key=lambda obj: obj.timestamp) + num_frames = len(obj_list) + + positions = [obj.get_state_array()[:3] for obj in obj_list] + timestamps = [int(obj.timestamp) for obj in obj_list] + + velocities = [] + + for i in range(1, num_frames - 1): + dt_frames = timestamps[i+1] - timestamps[i-1] + if dt_frames > 0: + dt = dt_frames * KITTI360_DT + vel = (positions[i+1] - positions[i-1]) / dt + # Transform velocity to the ego frame + vel = obj_list[i].Rm.T @ vel + else: + vel = np.zeros(3) + velocities.append(vel) + + if num_frames > 1: + # first and last frame + velocities.insert(0, velocities[0]) + velocities.append(velocities[-1]) + elif num_frames == 1: + velocities.append(np.zeros(3)) + + for obj, vel in zip(obj_list, velocities): frame = obj.timestamp detections_states[frame].append(obj.get_state_array()) - #TODO velocity not provided - detections_velocity[frame].append([0.0, 0.0, 0.0]) + detections_velocity[frame].append(vel) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[label])) + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.label])) + return detections_states, detections_velocity, detections_tokens, detections_types @@ -543,7 +589,6 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverter raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") return {LiDARType.LIDAR_TOP: lidar} -#TODO check camera extrinsic now is from camera to pose def _extract_cameras( log_name: str, idx: int, data_converter_config: DataConverterConfig ) -> Dict[CameraType, Optional[str]]: diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index c86d9604..dc1d10cf 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -47,6 +47,9 @@ def __init__(self): # name self.name = '' + + #label + self.label = '' def parseOpencvMatrix(self, node): rows = int(node.find('rows').text) @@ -75,28 +78,63 @@ def parseBbox(self, child): self.annotationId = int(child.find('index').text) + 1 + self.label = child.find('label').text + self.globalID = local2global(self.semanticId, self.instanceId) + self.parseVertices(child) + self.parse_scale_rotation() + + def parseVertices(self, child): transform = self.parseOpencvMatrix(child.find('transform')) - self.R = transform[:3,:3] - self.T = transform[:3,3] + R = transform[:3,:3] + T = transform[:3,3] + vertices = self.parseOpencvMatrix(child.find('vertices')) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = vertices + + self.R = R + self.T = T - def polar_decompose_rotation_scale(self): + def parse_scale_rotation(self): Rm, Sm = polar(self.R) scale = np.diag(Sm) yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) - return scale, (yaw, pitch, roll) - + self.Rm = np.array(Rm) + self.scale = scale + self.yaw = yaw + self.pitch = pitch + self.roll = roll + + # self.pose = np.eye(4, dtype=np.float64) + # self.pose[:3, :3] = self.Rm + # self.pose[:3, 3] = self.T + # self.w2e = np.linalg.inv(self.pose) + def get_state_array(self): - scale, (yaw, pitch, roll) = self.polar_decompose_rotation_scale() center = StateSE3( x=self.T[0], y=self.T[1], z=self.T[2], - roll=roll, - pitch=pitch, - yaw=yaw, + roll=self.roll, + pitch=self.pitch, + yaw=self.yaw, ) + scale = self.scale bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) - return bounding_box_se3.array \ No newline at end of file + return bounding_box_se3.array + + def box_visible_in_point_cloud(self, points): + # points: (N,3) , box: (8,3) + box = self.vertices + O, A, B, C = box[0], box[1], box[2], box[5] + OA = A - O + OB = B - O + OC = C - O + POA, POB, POC = (points @ OA[..., None])[:, 0], (points @ OB[..., None])[:, 0], (points @ OC[..., None])[:, 0] + mask = (np.dot(O, OA) < POA) & (POA < np.dot(A, OA)) & \ + (np.dot(O, OB) < POB) & (POB < np.dot(B, OB)) & \ + (np.dot(O, OC) < POC) & (POC < np.dot(C, OC)) + return True if np.sum(mask) > 100 else False \ No newline at end of file diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index 97ca3a7a..e1c76c60 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -15,12 +15,8 @@ defaults: - default_dataset_paths - _self_ - datasets: -<<<<<<< HEAD - kitti360_dataset # - nuplan_private_dataset -======= - - nuplan_private_dataset ->>>>>>> dev_v0.0.6 # - carla_dataset # - wopd_dataset diff --git a/d123/script/run_viser.py b/d123/script/run_viser.py index e977c669..faaf08ca 100644 --- a/d123/script/run_viser.py +++ b/d123/script/run_viser.py @@ -19,14 +19,11 @@ def main(cfg: DictConfig) -> None: worker = build_worker(cfg) scene_filter = build_scene_filter(cfg.scene_filter) - logger.info(f"Scene filter: {scene_filter}") - logger.info(f"Using {cfg.scene_builder}") - scene_filter.duration_s = 50 scene_builder = build_scene_builder(cfg.scene_builder) scenes = scene_builder.get_scenes(scene_filter, worker=worker) - logger.info(f"Found {len(scenes)} scenes.") - ViserVisualizationServer(scenes=scenes,scene_index=0) + + ViserVisualizationServer(scenes=scenes) if __name__ == "__main__": - main() + main() \ No newline at end of file diff --git a/jbwang_test2.py b/jbwang_test2.py index b406c52c..aa685428 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -69,36 +69,53 @@ # out = polar_decompose_rotation_scale(M) # print(out) +# import numpy as np +# path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" +# a = np.fromfile(path, dtype=np.float32) +# a = a.reshape((-1,4)) +# print(a[10000:10010,:3]) -import glob -import os -import cv2 - -def to_video(folder_path, fps=15, downsample=2): - imgs_path = glob.glob(os.path.join(folder_path, '*png*')) - # imgs_path = sorted(imgs_path)[:19] - imgs_path = sorted(imgs_path)[:700:1] - img_array = [] - for img_path in imgs_path: - img = cv2.imread(img_path) - height, width, channel = img.shape - img = cv2.resize(img, (width // downsample, height // - downsample), interpolation=cv2.INTER_AREA) - height, width, channel = img.shape - size = (width, height) - img_array.append(img) - - # media.write_video(os.path.join(folder_path, 'video.mp4'), img_array, fps=10) - mp4_path = os.path.join("/data/jbwang/d123/video/", 'video_one_episode.mp4') - if os.path.exists(mp4_path): - os.remove(mp4_path) - out = cv2.VideoWriter( - mp4_path, - cv2.VideoWriter_fourcc(*'DIVX'), fps, size - ) - for i in range(len(img_array)): - out.write(img_array[i]) - out.release() - -to_video("/nas/datasets/KITTI-360/2013_05_28_drive_0000_sync/image_00/data_rect/") +import gc +import json +import os +from dataclasses import asdict +from functools import partial +from pathlib import Path +from typing import Any, Dict, Final, List, Optional, Tuple, Union + +import numpy as np +from collections import defaultdict +import datetime +import hashlib +import xml.etree.ElementTree as ET +import pyarrow as pa +from PIL import Image +import logging + +from d123.common.datatypes.detection.detection_types import DetectionType +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D + + +bbox_3d_path = Path("/nas/datasets/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml") + +tree = ET.parse(bbox_3d_path) +root = tree.getroot() + +KIITI360_DETECTION_NAME_DICT = { + "truck": DetectionType.VEHICLE, + "bus": DetectionType.VEHICLE, + "car": DetectionType.VEHICLE, + "motorcycle": DetectionType.BICYCLE, + "bicycle": DetectionType.BICYCLE, + "pedestrian": DetectionType.PEDESTRIAN, +} + +for child in root: + label = child.find('label').text + if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + # print(obj.Rm) + # print(Sigma) \ No newline at end of file From 778604d4ace9cd9bbc3e27445d3a5f0449786426 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 19 Aug 2025 15:23:08 +0800 Subject: [PATCH 08/32] nearly done kitti_360 but remains some questions --- .../config/dataset_conversion/default_dataset_conversion.yaml | 2 +- d123/script/run_viser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index d8fa5988..52915f13 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -15,11 +15,11 @@ defaults: - default_dataset_paths - _self_ - datasets: - - kitti360_dataset # - nuplan_private_dataset # - carla_dataset # - wopd_dataset # - av2_sensor_dataset + - kitti360_dataset force_log_conversion: True force_map_conversion: False diff --git a/d123/script/run_viser.py b/d123/script/run_viser.py index faaf08ca..8973acea 100644 --- a/d123/script/run_viser.py +++ b/d123/script/run_viser.py @@ -21,7 +21,7 @@ def main(cfg: DictConfig) -> None: scene_filter = build_scene_filter(cfg.scene_filter) scene_builder = build_scene_builder(cfg.scene_builder) scenes = scene_builder.get_scenes(scene_filter, worker=worker) - + ViserVisualizationServer(scenes=scenes) From 94bc3f420ab5ba7486556e8aa72e30786e25f0db Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 19 Aug 2025 15:24:35 +0800 Subject: [PATCH 09/32] nearly done kitti_360 but remains some questions --- .gitignore | 3 +- .../dataset_specific/kitti_360/jbwang_test.py | 5 +- .../kitti_360/kitti_360_data_converter.py | 121 ++++++------ .../kitti_360/kitti_360_helper.py | 14 +- docs/datasets/kitti-360.rst | 10 +- jbwang_test2.py | 27 ++- notebooks/dataset/jbwang_test.py | 14 +- notebooks/gym/jbwang_test.py | 180 ++++++++++++++++++ 8 files changed, 302 insertions(+), 72 deletions(-) create mode 100644 notebooks/gym/jbwang_test.py diff --git a/.gitignore b/.gitignore index 3a820809..426cc468 100644 --- a/.gitignore +++ b/.gitignore @@ -23,8 +23,7 @@ *.csv *.log *.mp4 -exp/* - +exp/ # Sphinx documentation docs/_build/ diff --git a/d123/dataset/dataset_specific/kitti_360/jbwang_test.py b/d123/dataset/dataset_specific/kitti_360/jbwang_test.py index 6f0bdbd9..e480783e 100644 --- a/d123/dataset/dataset_specific/kitti_360/jbwang_test.py +++ b/d123/dataset/dataset_specific/kitti_360/jbwang_test.py @@ -21,7 +21,7 @@ from sqlalchemy import func -from kitti_360_data_converter import _extract_ego_state_all,get_kitti360_lidar_metadata,_extract_cameras,_extract_detections +from kitti_360_data_converter import _extract_ego_state_all,get_kitti360_lidar_metadata,_extract_cameras,_extract_detections,_read_timestamps # a = _extract_ego_state_all("2013_05_28_drive_0000_sync") # print(a[0]) @@ -151,4 +151,5 @@ def get_cam_info_from_lidar_pc(log,log_file, lidar_pc, rolling_shutter_s=1/60): # # camera_data = _extract_camera(log_db, lidar_pc, log_path) # camera_data = get_cam_info_from_lidar_pc(log_db,log_path, lidar_pc, rolling_shutter_s=1/60) # print(_extract_cameras("2013_05_28_drive_0000_sync",0)) - _extract_detections("2013_05_28_drive_0000_sync", 0) \ No newline at end of file + # _extract_detections("2013_05_28_drive_0000_sync", 0) + print(_read_timestamps("2013_05_28_drive_0000_sync")) \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index efc0bdf2..81057042 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -37,7 +37,7 @@ KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) -#TODO carera mismatch +#TODO cameraType KITTI360_CAMERA_TYPES = { CameraType.CAM_L0: "image_00", CameraType.CAM_R0: "image_01", @@ -55,6 +55,7 @@ DIR_CALIB = "calibration" #TODO PATH_2D_RAW_ROOT +# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW @@ -206,13 +207,12 @@ def convert_kitti360_log_to_arrow( if not log_file_path.parent.exists(): log_file_path.parent.mkdir(parents=True, exist_ok=True) - #TODO location metadata = LogMetadata( dataset="kitti360", log_name=log_name, - location="None", + location=None, timestep_seconds=KITTI360_DT, - map_has_z=False, + map_has_z=True, ) vehicle_parameters = get_kitti360_station_wagon_parameters() @@ -345,11 +345,9 @@ def _write_recording_table( ) -> None: ts_list = _read_timestamps(log_name) - #TODO - print("extracting detections...") - detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list)) - print("extracting states...") ego_state_all = _extract_ego_state_all(log_name) + ego_states_xyz = np.array([ego_state[:3] for ego_state in ego_state_all],dtype=np.float64) + detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list),ego_states_xyz) with pa.OSFile(str(log_file_path), "wb") as sink: with pa.ipc.new_file(sink, recording_schema) as writer: @@ -364,7 +362,6 @@ def _write_recording_table( "detections_type": [detections_types[idx]], "ego_states": [ego_state_all[idx]], "traffic_light_ids": [[]], - #may TODO traffic light types "traffic_light_types": [[]], "scenario_tag": [['unknown']], "route_lane_group_ids": [[]], @@ -391,36 +388,44 @@ def _write_recording_table( batch = pa.record_batch(row_data, schema=recording_schema) writer.write_batch(batch) + del batch + if SORT_BY_TIMESTAMP: recording_table = open_arrow_table(log_file_path) recording_table = recording_table.sort_by([("timestamp", "ascending")]) write_arrow_table(recording_table, log_file_path) -#TODO default timestamps and Synchronization all other sequences +#TODO Synchronization all other sequences) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: # unix - ts_file = PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt" - if ts_file.exists(): - tps: List[TimePoint] = [] - with open(ts_file, "r") as f: - for line in f: - s = line.strip() - if not s: - continue - dt_str, ns_str = s.split('.') - dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") - dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) - unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) - - total_seconds = (dt_obj - unix_epoch).total_seconds() - - ns_value = int(ns_str) - us_from_ns = ns_value // 1000 - - total_us = int(total_seconds * 1_000_000) + us_from_ns - - tps.append(TimePoint.from_us(total_us)) - return tps + # default using velodyne timestamps,if not available, use camera timestamps + ts_files = [ + PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", + PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", + PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt", + ] + for ts_file in ts_files: + if ts_file.exists(): + tps: List[TimePoint] = [] + with open(ts_file, "r") as f: + for line in f: + s = line.strip() + if not s: + continue + dt_str, ns_str = s.split('.') + dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") + dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) + unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) + + total_seconds = (dt_obj - unix_epoch).total_seconds() + + ns_value = int(ns_str) + us_from_ns = ns_value // 1000 + + total_us = int(total_seconds * 1_000_000) + us_from_ns + + tps.append(TimePoint.from_us(total_us)) + return tps return None def _extract_ego_state_all(log_name: str) -> List[List[float]]: @@ -434,8 +439,12 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 #TODO + #oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" + pose_idx = 0 + poses_time_len = len(poses_time) + for idx in range(len(list(oxts_path.glob("*.txt")))): oxts_path_file = oxts_path / f"{int(idx):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) @@ -444,7 +453,10 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: roll, pitch, yaw = oxts_data[3:6] vehicle_parameters = get_kitti360_station_wagon_parameters() - pos = np.searchsorted(poses_time, idx, side='right') - 1 + while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] <= idx: + pose_idx += 1 + pos = pose_idx + # pos = np.searchsorted(poses_time, idx, side='right') - 1 rear_axle_pose = StateSE3( x=poses[pos, 4], @@ -454,7 +466,7 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: pitch=pitch, yaw=yaw, ) - # NOTE: The height to rear axle is not provided the dataset and is merely approximated. + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) dynamic_state = DynamicStateSE3( velocity=Vector3D( @@ -483,12 +495,10 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: ) return ego_state_all -#TODO -# We may distinguish between image and lidar detections -# besides, now it is based only on start and end frame def _extract_detections( log_name: str, - ts_len: int + ts_len: int, + ego_states_xyz: np.ndarray ) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] @@ -505,15 +515,16 @@ def _extract_detections( dynamic_groups: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) - lidra_data_all = [] - for index in range(ts_len): - lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{index:010d}.bin" - if not lidar_full_path.exists(): - logging.warning(f"LiDAR file not found for frame {index}: {lidar_full_path}") - continue - lidar_data = np.fromfile(lidar_full_path, dtype=np.float32) - lidar_data = lidar_data.reshape(-1, 4)[:, :3] # Keep only x, y, z coordinates - lidra_data_all.append(lidar_data) + + # lidra_data_all = [] + # for index in range(ts_len): + # lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{index:010d}.bin" + # if not lidar_full_path.exists(): + # logging.warning(f"LiDAR file not found for frame {index}: {lidar_full_path}") + # continue + # lidar_data = np.fromfile(lidar_full_path, dtype=np.float32) + # lidar_data = lidar_data.reshape(-1, 4)[:, :3] # Keep only x, y, z coordinates + # lidra_data_all.append(lidar_data) for child in root: label = child.find('label').text @@ -524,11 +535,13 @@ def _extract_detections( #static object if obj.timestamp == -1: - start_frame = obj.start_frame - end_frame = obj.end_frame - for frame in range(start_frame, end_frame + 1): - lidar_data = lidra_data_all[frame] - #TODO check yaw and box visible + # first filter by radius + obj.filter_by_radius(ego_states_xyz,radius=50.0) + # then filter by pointcloud + for frame in obj.valid_radius_frames: + # TODO in the future, now is too slow because cpu in the server is not free + # or using config? + # lidar_data = lidra_data_all[frame] # if obj.box_visible_in_point_cloud(lidar_data): detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append([0.0, 0.0, 0.0]) @@ -553,8 +566,7 @@ def _extract_detections( if dt_frames > 0: dt = dt_frames * KITTI360_DT vel = (positions[i+1] - positions[i-1]) / dt - # Transform velocity to the ego frame - vel = obj_list[i].Rm.T @ vel + vel = KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] @ obj_list[i].Rm.T @ vel else: vel = np.zeros(3) velocities.append(vel) @@ -573,7 +585,6 @@ def _extract_detections( detections_tokens[frame].append(str(obj.globalID)) detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.label])) - return detections_states, detections_velocity, detections_tokens, detections_types #TODO lidar extraction now only velo diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index dc1d10cf..d4622867 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -41,6 +41,7 @@ def __init__(self): # the window that contains the bbox self.start_frame = -1 self.end_frame = -1 + self.valid_radius_frames = [] # timestamp of the bbox (-1 if statis) self.timestamp = -1 @@ -70,8 +71,8 @@ def parseBbox(self, child): self.semanticId = kittiId2label[semanticIdKITTI].id self.instanceId = int(child.find('instanceId').text) self.name = kittiId2label[semanticIdKITTI].name - - self.start_frame = int(child.find('start_frame').text) + + self.start_frame = int(child.find('start_frame').text) self.end_frame = int(child.find('end_frame').text) self.timestamp = int(child.find('timestamp').text) @@ -126,6 +127,15 @@ def get_state_array(self): return bounding_box_se3.array + def filter_by_radius(self,ego_state_xyz,radius=50.0): + # first stage of detection, used to filter out detections by radius + + for index in range(len(ego_state_xyz)): + ego_state = ego_state_xyz[index] + distance = np.linalg.norm(ego_state[:3] - self.T) + if distance <= radius: + self.valid_radius_frames.append(index) + def box_visible_in_point_cloud(self, points): # points: (N,3) , box: (8,3) box = self.vertices diff --git a/docs/datasets/kitti-360.rst b/docs/datasets/kitti-360.rst index 76100d27..5846e53b 100644 --- a/docs/datasets/kitti-360.rst +++ b/docs/datasets/kitti-360.rst @@ -7,12 +7,12 @@ KiTTI-360 :alt: Dataset sample image :width: 290px - | **Paper:** `Name of Paper `_ - | **Download:** `Documentation `_ - | **Code:** [Code] - | **Documentation:** [License type] + | **Paper:** `KITTI-360: A Novel Dataset and Benchmarks for Urban Scene Understanding in 2D and 3D `_ + | **Download:** `www.cvlibs.net/datasets/kitti-360 `_ + | **Code:** `www.github.com/autonomousvision/kitti360Scripts `_ + | **Documentation:** `kitti-360 Document`_ | **License:** [License type] - | **Duration:** [Duration here] + | **Duration:** 320k image | **Supported Versions:** [Yes/No/Conditions] | **Redistribution:** [Yes/No/Conditions] diff --git a/jbwang_test2.py b/jbwang_test2.py index aa685428..93d86a11 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -97,6 +97,7 @@ from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D +#TODO train and train_full bbox_3d_path = Path("/nas/datasets/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml") tree = ET.parse(bbox_3d_path) @@ -110,12 +111,34 @@ "bicycle": DetectionType.BICYCLE, "pedestrian": DetectionType.PEDESTRIAN, } - +# x,y,z = 881.2268115,3247.493293,115.239219 +# x,y,z = 867.715474,3229.630439,115.189221 # 自车 +# x,y,z = 873.533508, 3227.16235, 115.185341 # 要找的那个人 +x,y,z = 874.233508, 3231.56235, 115.185341 # 要找的那个车 +CENTER_REF = np.array([x, y, z], dtype=np.float64) +objs_name = [] for child in root: label = child.find('label').text if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): continue obj = KITTI360Bbox3D() obj.parseBbox(child) + # obj.parseVertices(child) + name = child.find('label').text + # if obj.start_frame < 10030 and obj.end_frame > 10030: + center = np.array(obj.T, dtype=np.float64) + dist = np.linalg.norm(center - CENTER_REF) + if dist < 7: + print(f"Object ID: {obj.name}, Start Frame: {obj.start_frame}, End Frame: {obj.end_frame},self.annotationId: {obj.annotationId},{obj.timestamp},{obj.T}") + objs_name.append(obj.name) +print(len(objs_name)) +print(set(objs_name)) # print(obj.Rm) - # print(Sigma) \ No newline at end of file + # print(Sigma) +names = [] +for child in root: + label = child.find('label').text + if child.find('transform') is None: + continue + names.append(label) +print(set(names)) \ No newline at end of file diff --git a/notebooks/dataset/jbwang_test.py b/notebooks/dataset/jbwang_test.py index c2cabfbe..c37d8d40 100644 --- a/notebooks/dataset/jbwang_test.py +++ b/notebooks/dataset/jbwang_test.py @@ -2,7 +2,9 @@ # s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" # s3_uri = "/data/jbwang/d123/data/carla/_Rep0_routes_validation1_route0_07_23_14_33_15.arrow" # s3_uri = "/data/jbwang/d123/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow" -s3_uri = "/data/jbwang/d123/data2/kitti360_c2e_train/2013_05_28_drive_0000_sync_c2e.arrow" +# s3_uri = "/data/jbwang/d123/data2/kitti360_c2e_train/2013_05_28_drive_0000_sync_c2e.arrow" +s3_uri = "/data/jbwang/d123/data2/kitti360_detection_all_test/2013_05_28_drive_0000_sync.arrow" + import pyarrow as pa import pyarrow.fs as fs @@ -35,10 +37,14 @@ if col == "lidar": continue print(f"Column : {col}, Type: {table.schema.field(col).type}") - tokens = table[col] # 或 table.column("token") + tokens = table["detections_velocity"] # 或 table.column("token") + # tokens = table["detections_type"] # print(tokens) - print(len(tokens)) - # print(tokens.slice(0, 100).to_pylist()) + # print(len(tokens)) + result = tokens.slice(1470, 40).to_pylist() + # for item in result: + # print(len(item)) +print(result) # print(table["traffic_light_ids"]) timer.log("3. Table created") # Save locally diff --git a/notebooks/gym/jbwang_test.py b/notebooks/gym/jbwang_test.py new file mode 100644 index 00000000..663e2899 --- /dev/null +++ b/notebooks/gym/jbwang_test.py @@ -0,0 +1,180 @@ +from d123.dataset.scene.scene_builder import ArrowSceneBuilder +from d123.dataset.scene.scene_filter import SceneFilter + +from d123.common.multithreading.worker_sequential import Sequential +# from d123.common.multithreading.worker_ray import RayDistributed + +import os, psutil + +from pathlib import Path +from typing import Optional, Tuple + +import matplotlib.animation as animation +import matplotlib.pyplot as plt +from tqdm import tqdm + +from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 +from d123.common.geometry.base import Point2D, StateSE2 +from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 +from d123.common.visualization.color.default import EGO_VEHICLE_CONFIG +from d123.common.visualization.matplotlib.observation import ( + add_bounding_box_to_ax, + add_box_detections_to_ax, + add_default_map_on_ax, + add_traffic_lights_to_ax, + add_ego_vehicle_to_ax, +) +from d123.dataset.arrow.conversion import TrafficLightDetectionWrapper +from d123.dataset.maps.abstract_map import AbstractMap +from d123.common.datatypes.detection.detection import BoxDetectionWrapper +from d123.dataset.scene.abstract_scene import AbstractScene +import io +from PIL import Image + + + +def _plot_scene_on_ax( + ax: plt.Axes, + map_api: AbstractMap, + ego_state: EgoStateSE2, + initial_ego_state: Optional[EgoStateSE2], + box_detections: BoxDetectionWrapper, + traffic_light_detections: TrafficLightDetectionWrapper, + radius: float = 120, +) -> plt.Axes: + + if initial_ego_state is not None: + point_2d = initial_ego_state.center.point_2d + else: + point_2d = ego_state.center.point_2d + add_default_map_on_ax(ax, map_api, point_2d, radius=radius) + add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) + + add_box_detections_to_ax(ax, box_detections) + add_ego_vehicle_to_ax(ax, ego_state) + + ax.set_xlim(point_2d.x - radius, point_2d.x + radius) + ax.set_ylim(point_2d.y - radius, point_2d.y + radius) + + ax.set_aspect("equal", adjustable="box") + return ax + + +def plot_scene_to_image( + map_api: AbstractMap, + ego_state: EgoStateSE2, + initial_ego_state: Optional[EgoStateSE2], + box_detections: BoxDetectionWrapper, + traffic_light_detections: TrafficLightDetectionWrapper, + radius: float = 120, + figsize: Tuple[int, int] = (8, 8), +) -> Image: + + fig, ax = plt.subplots(figsize=figsize) + _plot_scene_on_ax(ax, map_api, ego_state, initial_ego_state, box_detections, traffic_light_detections, radius) + ax.set_aspect("equal", adjustable="box") + plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) + # plt.tight_layout() + + buf = io.BytesIO() + fig.savefig(buf, format="png", bbox_inches="tight") + plt.close(fig) + buf.seek(0) + img = Image.open(buf) + return img + + +def print_memory_usage(): + process = psutil.Process(os.getpid()) + memory_info = process.memory_info() + print(f"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB") + + +split = "kitti360_detection_all_and_vel" +scene_tokens = None +log_names = None + +scene_filter = SceneFilter( + split_names=[split], log_names=log_names, scene_tokens=scene_tokens, duration_s=15.1, history_s=1.0 +) +scene_builder = ArrowSceneBuilder("/data/jbwang/d123/data2/") +worker = Sequential() +# worker = RayDistributed() +scenes = scene_builder.get_scenes(scene_filter, worker) + +print(len(scenes)) + +for scene in scenes[:10]: + print(scene.log_name, scene.token) + +from d123.dataset.arrow.conversion import DetectionType +from d123.simulation.gym.gym_env import GymEnvironment +from d123.simulation.observation.agents_observation import _filter_agents_by_type + +import time + +images = [] +agent_rollouts = [] +plot: bool = True +action = [1.0, -0.0] # Placeholder action, replace with actual action logic +env = GymEnvironment(scenes) + +start = time.time() + +map_api, ego_state, detection_observation, current_scene = env.reset(scenes[1460]) +initial_ego_state = ego_state +cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE]) +agent_rollouts.append(BoxDetectionWrapper(cars)) +if plot: + images.append( + plot_scene_to_image( + map_api, + ego_state, + initial_ego_state, + detection_observation.box_detections, + detection_observation.traffic_light_detections, + ) + ) + + +for i in range(160): + ego_state, detection_observation, end = env.step(action) + cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE]) + agent_rollouts.append(BoxDetectionWrapper(cars)) + if plot: + images.append( + plot_scene_to_image( + map_api, + ego_state, + initial_ego_state, + detection_observation.box_detections, + detection_observation.traffic_light_detections, + ) + ) + if end: + print("End of scene reached.") + break + +time_s = time.time() - start +print(time_s) +print(151/ time_s) + +import numpy as np + + +def create_gif(images, output_path, duration=100): + """ + Create a GIF from a list of PIL images. + + Args: + images (list): List of PIL.Image objects. + output_path (str): Path to save the GIF. + duration (int): Duration between frames in milliseconds. + """ + if images: + print(len(images)) + images_p = [img.convert("P", palette=Image.ADAPTIVE) for img in images] + images_p[0].save(output_path, save_all=True, append_images=images_p[1:], duration=duration, loop=0) + + +create_gif(images, f"/data/jbwang/d123/data2/{split}_{current_scene.token}.gif", duration=20) \ No newline at end of file From d1945b0c470c653f893b65026be80ef66336767d Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Thu, 21 Aug 2025 19:01:27 +0800 Subject: [PATCH 10/32] finish lidar vis and fix some bugs --- .gitignore | 2 + d123/common/datatypes/sensor/camera.py | 48 +++++++ .../vehicle_state/vehicle_parameters.py | 9 +- d123/common/visualization/viser/server.py | 10 +- d123/dataset/arrow/conversion.py | 6 +- .../kitti_360/kitti_360_data_converter.py | 124 ++++++++++-------- .../kitti_360/kitti_360_helper.py | 24 ++++ .../dataset_specific/kitti_360/labels.py | 40 ++++++ .../dataset_specific/kitti_360/load_sensor.py | 27 ++++ jbwang_test2.py | 10 +- 10 files changed, 232 insertions(+), 68 deletions(-) create mode 100644 d123/dataset/dataset_specific/kitti_360/load_sensor.py diff --git a/.gitignore b/.gitignore index 426cc468..971a12d1 100644 --- a/.gitignore +++ b/.gitignore @@ -30,3 +30,5 @@ docs/_build/ docs/build/ _build/ .doctrees/ + +jbwang_* diff --git a/d123/common/datatypes/sensor/camera.py b/d123/common/datatypes/sensor/camera.py index 56fe6f07..c2a33d9d 100644 --- a/d123/common/datatypes/sensor/camera.py +++ b/d123/common/datatypes/sensor/camera.py @@ -104,6 +104,54 @@ def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict for camera_type, metadata in camera_metadata_dict.items() } +#TODO Code Refactoring +@dataclass +class FisheyeMEICameraMetadata: + camera_type: CameraType + width: int + height: int + mirror_parameters: int + distortion: npt.NDArray[np.float64] # k1,k2,p1,p2 + projection_parameters: npt.NDArray[np.float64] #gamma1,gamma2,u0,v0 + + def to_dict(self) -> Dict[str, Any]: + # TODO: remove None types. Only a placeholder for now. + return { + "camera_type": int(self.camera_type), + "width": self.width, + "height": self.height, + "mirror_parameters": self.mirror_parameters, + "distortion": self.distortion.tolist() if self.distortion is not None else None, + "projection_parameters": self.projection_parameters.tolist() if self.projection_parameters is not None else None, + } + + def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + ''' camera coordinate to image plane ''' + norm = np.linalg.norm(points_3d, axis=1) + + x = points_3d[:,0] / norm + y = points_3d[:,1] / norm + z = points_3d[:,2] / norm + + x /= z+self.mirror_parameters + y /= z+self.mirror_parameters + + k1 = self.distortion[0] + k2 = self.distortion[1] + gamma1 = self.projection_parameters[0] + gamma2 = self.projection_parameters[1] + u0 = self.projection_parameters[2] + v0 = self.projection_parameters[3] + + ro2 = x*x + y*y + x *= 1 + k1*ro2 + k2*ro2*ro2 + y *= 1 + k1*ro2 + k2*ro2*ro2 + + x = gamma1*x + u0 + y = gamma2*y + v0 + + return x, y, norm * points_3d[:,2] / np.abs(points_3d[:,2]) + @dataclass class Camera: diff --git a/d123/common/datatypes/vehicle_state/vehicle_parameters.py b/d123/common/datatypes/vehicle_state/vehicle_parameters.py index 5adda6b7..21a91668 100644 --- a/d123/common/datatypes/vehicle_state/vehicle_parameters.py +++ b/d123/common/datatypes/vehicle_state/vehicle_parameters.py @@ -60,15 +60,16 @@ def get_wopd_chrysler_pacifica_parameters() -> VehicleParameters: ) def get_kitti360_station_wagon_parameters() -> VehicleParameters: - #TODO except wheel_base, all need to be checked + #NOTE: Parameters are estimated from the vehicle model. + #https://www.cvlibs.net/datasets/kitti-360/documentation.php return VehicleParameters( vehicle_name="kitti360_station_wagon", - width=2.297, - length=5.176, + width=1.800, + length=3.500, height=1.400, wheel_base=2.710, rear_axle_to_center_vertical=0.45, - rear_axle_to_center_longitudinal=1.461, + rear_axle_to_center_longitudinal=2.71/2 + 0.05, ) def get_av2_ford_fusion_hybrid_parameters() -> VehicleParameters: diff --git a/d123/common/visualization/viser/server.py b/d123/common/visualization/viser/server.py index cdca86c4..7511cdbc 100644 --- a/d123/common/visualization/viser/server.py +++ b/d123/common/visualization/viser/server.py @@ -33,23 +33,23 @@ LINE_WIDTH: float = 4.0 # Bounding box config: -BOUNDING_BOX_TYPE: Literal["mesh", "lines"] = "mesh" +BOUNDING_BOX_TYPE: Literal["mesh", "lines"] = "lines" # Map config: -MAP_AVAILABLE: bool = True +MAP_AVAILABLE: bool = False # Cameras config: -VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_F0, CameraType.CAM_L0, CameraType.CAM_R0] +# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_F0, CameraType.CAM_L0, CameraType.CAM_R0] # VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = all_camera_types -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L, CameraType.CAM_STEREO_R] +VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L, CameraType.CAM_STEREO_R] # VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [] VISUALIZE_CAMERA_GUI: List[CameraType] = [CameraType.CAM_F0] CAMERA_SCALE: float = 1.0 # Lidar config: -LIDAR_AVAILABLE: bool = False +LIDAR_AVAILABLE: bool = True LIDAR_TYPES: List[LiDARType] = [ LiDARType.LIDAR_MERGED, diff --git a/d123/dataset/arrow/conversion.py b/d123/dataset/arrow/conversion.py index 2429f56f..d9e5e664 100644 --- a/d123/dataset/arrow/conversion.py +++ b/d123/dataset/arrow/conversion.py @@ -34,7 +34,7 @@ "nuplan": Path(os.environ["NUPLAN_DATA_ROOT"]) / "nuplan-v1.1" / "sensor_blobs", "carla": Path(os.environ["CARLA_DATA_ROOT"]) / "sensor_blobs", # "av2-sensor": Path(os.environ["AV2_SENSOR_DATA_ROOT"]) / "sensor", - # "kitti360": Path(os.environ["KITTI360_DATA_ROOT"]), + "kitti360": Path(os.environ["KITTI360_DATA_ROOT"]), } @@ -155,6 +155,10 @@ def get_lidar_from_arrow_table( lidar = load_carla_lidar_from_path(full_lidar_path, lidar_metadata) elif log_metadata.dataset == "wopd": raise NotImplementedError + elif log_metadata.dataset == "kitti360": + from d123.dataset.dataset_specific.kitti_360.load_sensor import load_kitti360_lidar_from_path + + lidar = load_kitti360_lidar_from_path(full_lidar_path, lidar_metadata) else: raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 81057042..77f3fff0 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -1,6 +1,8 @@ import gc import json import os +import re +import yaml from dataclasses import asdict from functools import partial from pathlib import Path @@ -18,7 +20,7 @@ from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import CameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.sensor.lidar_index import Kitti360LidarIndex from d123.common.datatypes.time.time_point import TimePoint @@ -30,18 +32,18 @@ from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION +from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label KITTI360_DT: Final[float] = 0.1 SORT_BY_TIMESTAMP: Final[bool] = True KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) -#TODO cameraType KITTI360_CAMERA_TYPES = { - CameraType.CAM_L0: "image_00", - CameraType.CAM_R0: "image_01", - # TODO fisheye camera + CameraType.CAM_STEREO_L: "image_00", + CameraType.CAM_STEREO_R: "image_01", + # TODO need code refactoring to support fisheye cameras # CameraType.CAM_L1: "image_02", # CameraType.CAM_R1: "image_03", } @@ -71,31 +73,6 @@ DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } -#TODO now only parts of labels are used -KIITI360_DETECTION_NAME_DICT = { - "truck": DetectionType.VEHICLE, - "bus": DetectionType.VEHICLE, - "car": DetectionType.VEHICLE, - "motorcycle": DetectionType.BICYCLE, - "bicycle": DetectionType.BICYCLE, - "pedestrian": DetectionType.PEDESTRIAN, -} - -KITTI3602NUPLAN_IMU_CALIBRATION = np.array([ - [1, 0, 0, 0], - [0, -1, 0, 0], - [0, 0, -1, 0], - [0, 0, 0, 1], - ], dtype=np.float64) - -KITTI3602NUPLAN_LIDAR_CALIBRATION = np.array([ - [0, -1, 0, 0], - [1, 0, 0, 0], - [0, 0, 1, 0], - [0, 0, 0, 1], - ], dtype=np.float64) - - def create_token(input_data: str) -> str: # TODO: Refactor this function. # TODO: Add a general function to create tokens from arbitrary data. @@ -266,12 +243,12 @@ def convert_kitti360_log_to_arrow( return [] -def get_kitti360_camera_metadata() -> Dict[CameraType, CameraMetadata]: +def get_kitti360_camera_metadata() -> Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]]: persp = PATH_CALIB_ROOT / "perspective.txt" assert persp.exists() - result = {"image_00": {}, "image_01": {}} + persp_result = {"image_00": {}, "image_01": {}} with open(persp, "r") as f: lines = [ln.strip() for ln in f if ln.strip()] @@ -279,21 +256,39 @@ def get_kitti360_camera_metadata() -> Dict[CameraType, CameraMetadata]: key, value = ln.split(" ", 1) cam_id = key.split("_")[-1][:2] if key.startswith("P_rect_"): - result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) + persp_result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) elif key.startswith("S_rect_"): - result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] + persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] elif key.startswith("D_"): - result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + + fisheye_camera02_path = PATH_CALIB_ROOT / "image_02.yaml" + fisheye_camera03_path = PATH_CALIB_ROOT / "image_03.yaml" + assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() + fisheye02 = _readYAMLFile(fisheye_camera02_path) + fisheye03 = _readYAMLFile(fisheye_camera03_path) + fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} - log_cam_infos: Dict[str, CameraMetadata] = {} + log_cam_infos: Dict[str, Union[CameraMetadata, FisheyeMEICameraMetadata]] = {} for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): - log_cam_infos[cam_type] = CameraMetadata( - camera_type=cam_type, - width=result[cam_name]["wh"][0], - height=result[cam_name]["wh"][1], - intrinsic=np.array(result[cam_name]["intrinsic"]), - distortion=np.array(result[cam_name]["distortion"]), - ) + if cam_name in ["image_00", "image_01"]: + log_cam_infos[cam_type] = CameraMetadata( + camera_type=cam_type, + width=persp_result[cam_name]["wh"][0], + height=persp_result[cam_name]["wh"][1], + intrinsic=np.array(persp_result[cam_name]["intrinsic"]), + distortion=np.array(persp_result[cam_name]["distortion"]), + ) + elif cam_name in ["image_02","image_03"]: + log_cam_infos[cam_type] = FisheyeMEICameraMetadata( + camera_type=cam_type, + width=fisheye_result[cam_name]["image_width"], + height=fisheye_result[cam_name]["image_height"], + mirror_parameters=fisheye_result[cam_name]["mirror_parameters"], + distortion=np.array(fisheye_result[cam_name]["distortion_parameters"]), + projection_parameters= np.array(fisheye_result[cam_name]["projection_parameters"]), + ) + return log_cam_infos def _read_projection_matrix(p_line: str) -> np.ndarray: @@ -305,6 +300,19 @@ def _read_projection_matrix(p_line: str) -> np.ndarray: K = P[:, :3] return K +def _readYAMLFile(fileName): + '''make OpenCV YAML file compatible with python''' + ret = {} + skip_lines=1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" + with open(fileName) as fin: + for i in range(skip_lines): + fin.readline() + yamlFileOut = fin.read() + myRe = re.compile(r":([^ ])") # Add space after ":", if it doesn't exist. Python yaml requirement + yamlFileOut = myRe.sub(r': \1', yamlFileOut) + ret = yaml.safe_load(yamlFileOut) + return ret + def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} @@ -326,9 +334,7 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) - cam2velo = KITTI3602NUPLAN_LIDAR_CALIBRATION @ cam2velo - - extrinsic = cam2velo @ np.linalg.inv(cam2pose) + extrinsic = cam2pose @ np.linalg.inv(cam2velo) metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, @@ -449,14 +455,14 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: oxts_path_file = oxts_path / f"{int(idx):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) - #TODO check roll, pitch, yaw + #TODO check roll, pitch, yaw again roll, pitch, yaw = oxts_data[3:6] vehicle_parameters = get_kitti360_station_wagon_parameters() - while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] <= idx: + while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < idx: pose_idx += 1 pos = pose_idx - # pos = np.searchsorted(poses_time, idx, side='right') - 1 + # pos = np.searchsorted(pwwwoses_time, idx, side='right') - 1 rear_axle_pose = StateSE3( x=poses[pos, 4], @@ -527,8 +533,9 @@ def _extract_detections( # lidra_data_all.append(lidar_data) for child in root: - label = child.find('label').text - if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): + semanticIdKITTI = int(child.find('semanticId').text) + name = kittiId2label[semanticIdKITTI].name + if child.find('transform') is None or name not in KIITI360_DETECTION_NAME_DICT.keys(): continue obj = KITTI360Bbox3D() obj.parseBbox(child) @@ -546,7 +553,7 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append([0.0, 0.0, 0.0]) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.label])) + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.name])) else: ann_id = obj.annotationId dynamic_groups[ann_id].append(obj) @@ -583,7 +590,7 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append(vel) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.label])) + detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.name])) return detections_states, detections_velocity, detections_tokens, detections_types @@ -593,7 +600,7 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverter lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" if lidar_full_path.exists(): if data_converter_config.lidar_store_option == "path": - lidar = f"/data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" + lidar = f"data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" elif data_converter_config.lidar_store_option == "binary": raise NotImplementedError("Binary lidar storage is not implemented.") else: @@ -606,9 +613,12 @@ def _extract_cameras( camera_dict: Dict[str, Union[str, bytes]] = {} for camera_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): - img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + if cam_dir_name in ["image_00", "image_01"]: + img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + elif cam_dir_name in ["image_02", "image_03"]: + img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" + if img_path_png.exists(): - cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" if not cam2pose_txt.exists(): raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index d4622867..5c69264f 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -7,11 +7,33 @@ from d123.common.geometry.base import StateSE3 from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 +from d123.common.geometry.transform.se3 import get_rotation_matrix from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label DEFAULT_ROLL = 0.0 DEFAULT_PITCH = 0.0 +addtional_calibration = get_rotation_matrix( + StateSE3( + x=0.0, + y=0.0, + z=0.0, + roll=np.deg2rad(1.0), + pitch=np.deg2rad(1.0), + yaw=np.deg2rad(0.0), + ) + ) + +kitti3602nuplan_imu_calibration_ideal = np.array([ + [1, 0, 0, 0], + [0, -1, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1], + ], dtype=np.float64) + +KITTI3602NUPLAN_IMU_CALIBRATION = np.eye(4, dtype=np.float64) +KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] = addtional_calibration @ kitti3602nuplan_imu_calibration_ideal[:3, :3] + MAX_N = 1000 def local2global(semanticId, instanceId): globalId = semanticId*MAX_N + instanceId @@ -99,6 +121,8 @@ def parseVertices(self, child): def parse_scale_rotation(self): Rm, Sm = polar(self.R) + if np.linalg.det(Rm) < 0: + Rm[0] = -Rm[0] scale = np.diag(Sm) yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) diff --git a/d123/dataset/dataset_specific/kitti_360/labels.py b/d123/dataset/dataset_specific/kitti_360/labels.py index 38f8a91c..de24f152 100644 --- a/d123/dataset/dataset_specific/kitti_360/labels.py +++ b/d123/dataset/dataset_specific/kitti_360/labels.py @@ -166,3 +166,43 @@ def assureSingleInstanceName( name ): return None # all good then return name + +from d123.common.datatypes.detection.detection_types import DetectionType + +KIITI360_DETECTION_NAME_DICT = { + "traffic light": DetectionType.SIGN, + "traffic sign": DetectionType.SIGN, + "person": DetectionType.PEDESTRIAN, + "rider": DetectionType.BICYCLE, + "car": DetectionType.VEHICLE, + "truck": DetectionType.VEHICLE, + "bus": DetectionType.VEHICLE, + "caravan": DetectionType.VEHICLE, + "trailer": DetectionType.VEHICLE, + "train": DetectionType.VEHICLE, + "motorcycle": DetectionType.BICYCLE, + "bicycle": DetectionType.BICYCLE, + "stop": DetectionType.SIGN, +} + +# KIITI360_DETECTION_NAME_DICT = { +# "pole": DetectionType.GENERIC_OBJECT, +# "traffic light": DetectionType.SIGN, +# "traffic sign": DetectionType.SIGN, +# "person": DetectionType.PEDESTRIAN, +# "rider": DetectionType.BICYCLE, +# "car": DetectionType.VEHICLE, +# "truck": DetectionType.VEHICLE, +# "bus": DetectionType.VEHICLE, +# "caravan": DetectionType.VEHICLE, +# "trailer": DetectionType.VEHICLE, +# "train": DetectionType.VEHICLE, +# "motorcycle": DetectionType.BICYCLE, +# "bicycle": DetectionType.BICYCLE, +# "stop": DetectionType.SIGN, +# "smallpole": DetectionType.GENERIC_OBJECT, +# "lamp": DetectionType.GENERIC_OBJECT, +# "trash bin": DetectionType.GENERIC_OBJECT, +# "vending machine": DetectionType.GENERIC_OBJECT, +# "box": DetectionType.GENERIC_OBJECT, +# } diff --git a/d123/dataset/dataset_specific/kitti_360/load_sensor.py b/d123/dataset/dataset_specific/kitti_360/load_sensor.py new file mode 100644 index 00000000..2a23401f --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/load_sensor.py @@ -0,0 +1,27 @@ +from pathlib import Path + +import numpy as np + +from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata + + +def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: + assert filepath.exists(), f"LiDAR file not found: {filepath}" + pcd = np.fromfile(filepath, dtype=np.float32) + pcd = np.reshape(pcd,[-1,4]) # [N,4] + + xyz = pcd[:, :3] + intensity = pcd[:, 3] + + ones = np.ones((xyz.shape[0], 1), dtype=pcd.dtype) + points_h = np.concatenate([xyz, ones], axis=1) #[N,4] + + transformed_h = lidar_metadata.extrinsic @ points_h.T #[4,N] + + transformed_xyz = transformed_h[:3, :] # (3,N) + + intensity_row = intensity[np.newaxis, :] # (1,N) + + point_cloud_4xN = np.vstack([transformed_xyz, intensity_row]).astype(np.float32) # (4,N) + + return LiDAR(metadata=lidar_metadata, point_cloud=point_cloud_4xN) diff --git a/jbwang_test2.py b/jbwang_test2.py index 93d86a11..7128a636 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -117,14 +117,21 @@ x,y,z = 874.233508, 3231.56235, 115.185341 # 要找的那个车 CENTER_REF = np.array([x, y, z], dtype=np.float64) objs_name = [] +lable_name = [] for child in root: label = child.find('label').text - if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): + # if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): + # continue + + if child.find('transform') is None: continue + print("this label is ",label) + print("!!!!!!!!!!!!!!!!!!!") obj = KITTI360Bbox3D() obj.parseBbox(child) # obj.parseVertices(child) name = child.find('label').text + lable_name.append(name) # if obj.start_frame < 10030 and obj.end_frame > 10030: center = np.array(obj.T, dtype=np.float64) dist = np.linalg.norm(center - CENTER_REF) @@ -133,6 +140,7 @@ objs_name.append(obj.name) print(len(objs_name)) print(set(objs_name)) +print(set(lable_name)) # print(obj.Rm) # print(Sigma) names = [] From 62654f3f13146a5f299aedf25e623c97caab9907 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Mon, 25 Aug 2025 14:31:07 +0800 Subject: [PATCH 11/32] fix ego_yaw_pitch_roll and get good results in lidar viser --- .../kitti_360/kitti_360_data_converter.py | 16 +- .../kitti_360/kitti_360_helper.py | 14 +- jbwang_test2.py | 216 ++++++++++++------ 3 files changed, 158 insertions(+), 88 deletions(-) diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 77f3fff0..6433ca89 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -16,6 +16,7 @@ import pyarrow as pa from PIL import Image import logging +from pyquaternion import Quaternion from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map @@ -455,15 +456,24 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: oxts_path_file = oxts_path / f"{int(idx):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) - #TODO check roll, pitch, yaw again - roll, pitch, yaw = oxts_data[3:6] vehicle_parameters = get_kitti360_station_wagon_parameters() while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < idx: pose_idx += 1 pos = pose_idx - # pos = np.searchsorted(pwwwoses_time, idx, side='right') - 1 + # pos = np.searchsorted(poses_time, idx, side='right') - 1 + # NOTE you can use oxts_data[3:6] as roll, pitch, yaw for simplicity + #roll, pitch, yaw = oxts_data[3:6] + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], + [r10, r11, r12], + [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] + yaw, pitch, roll = Quaternion(matrix=R_mat_cali[:3, :3]).yaw_pitch_roll + rear_axle_pose = StateSE3( x=poses[pos, 4], y=poses[pos, 8], diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index 5c69264f..7edcd6af 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -13,17 +13,6 @@ DEFAULT_ROLL = 0.0 DEFAULT_PITCH = 0.0 -addtional_calibration = get_rotation_matrix( - StateSE3( - x=0.0, - y=0.0, - z=0.0, - roll=np.deg2rad(1.0), - pitch=np.deg2rad(1.0), - yaw=np.deg2rad(0.0), - ) - ) - kitti3602nuplan_imu_calibration_ideal = np.array([ [1, 0, 0, 0], [0, -1, 0, 0], @@ -31,8 +20,7 @@ [0, 0, 0, 1], ], dtype=np.float64) -KITTI3602NUPLAN_IMU_CALIBRATION = np.eye(4, dtype=np.float64) -KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] = addtional_calibration @ kitti3602nuplan_imu_calibration_ideal[:3, :3] +KITTI3602NUPLAN_IMU_CALIBRATION = kitti3602nuplan_imu_calibration_ideal MAX_N = 1000 def local2global(semanticId, instanceId): diff --git a/jbwang_test2.py b/jbwang_test2.py index 7128a636..f9748db5 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -76,77 +76,149 @@ # print(a[10000:10010,:3]) -import gc -import json -import os -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, Final, List, Optional, Tuple, Union -import numpy as np -from collections import defaultdict -import datetime -import hashlib -import xml.etree.ElementTree as ET -import pyarrow as pa -from PIL import Image -import logging - -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D - - -#TODO train and train_full -bbox_3d_path = Path("/nas/datasets/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml") - -tree = ET.parse(bbox_3d_path) -root = tree.getroot() - -KIITI360_DETECTION_NAME_DICT = { - "truck": DetectionType.VEHICLE, - "bus": DetectionType.VEHICLE, - "car": DetectionType.VEHICLE, - "motorcycle": DetectionType.BICYCLE, - "bicycle": DetectionType.BICYCLE, - "pedestrian": DetectionType.PEDESTRIAN, -} -# x,y,z = 881.2268115,3247.493293,115.239219 -# x,y,z = 867.715474,3229.630439,115.189221 # 自车 -# x,y,z = 873.533508, 3227.16235, 115.185341 # 要找的那个人 -x,y,z = 874.233508, 3231.56235, 115.185341 # 要找的那个车 -CENTER_REF = np.array([x, y, z], dtype=np.float64) -objs_name = [] -lable_name = [] -for child in root: - label = child.find('label').text - # if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): - # continue + + +# import gc +# import json +# import os +# from dataclasses import asdict +# from functools import partial +# from pathlib import Path +# from typing import Any, Dict, Final, List, Optional, Tuple, Union + +# import numpy as np +# from collections import defaultdict +# import datetime +# import hashlib +# import xml.etree.ElementTree as ET +# import pyarrow as pa +# from PIL import Image +# import logging + +# from d123.common.datatypes.detection.detection_types import DetectionType +# from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D + + +# #TODO train and train_full +# bbox_3d_path = Path("/nas/datasets/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml") + +# tree = ET.parse(bbox_3d_path) +# root = tree.getroot() + +# KIITI360_DETECTION_NAME_DICT = { +# "truck": DetectionType.VEHICLE, +# "bus": DetectionType.VEHICLE, +# "car": DetectionType.VEHICLE, +# "motorcycle": DetectionType.BICYCLE, +# "bicycle": DetectionType.BICYCLE, +# "pedestrian": DetectionType.PEDESTRIAN, +# } +# # x,y,z = 881.2268115,3247.493293,115.239219 +# # x,y,z = 867.715474,3229.630439,115.189221 # 自车 +# # x,y,z = 873.533508, 3227.16235, 115.185341 # 要找的那个人 +# x,y,z = 874.233508, 3231.56235, 115.185341 # 要找的那个车 +# CENTER_REF = np.array([x, y, z], dtype=np.float64) +# objs_name = [] +# lable_name = [] +# for child in root: +# label = child.find('label').text +# # if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): +# # continue - if child.find('transform') is None: - continue - print("this label is ",label) - print("!!!!!!!!!!!!!!!!!!!") - obj = KITTI360Bbox3D() - obj.parseBbox(child) - # obj.parseVertices(child) - name = child.find('label').text - lable_name.append(name) - # if obj.start_frame < 10030 and obj.end_frame > 10030: - center = np.array(obj.T, dtype=np.float64) - dist = np.linalg.norm(center - CENTER_REF) - if dist < 7: - print(f"Object ID: {obj.name}, Start Frame: {obj.start_frame}, End Frame: {obj.end_frame},self.annotationId: {obj.annotationId},{obj.timestamp},{obj.T}") - objs_name.append(obj.name) -print(len(objs_name)) -print(set(objs_name)) -print(set(lable_name)) - # print(obj.Rm) - # print(Sigma) -names = [] -for child in root: - label = child.find('label').text - if child.find('transform') is None: - continue - names.append(label) -print(set(names)) \ No newline at end of file +# if child.find('transform') is None: +# continue +# print("this label is ",label) +# print("!!!!!!!!!!!!!!!!!!!") +# obj = KITTI360Bbox3D() +# obj.parseBbox(child) +# # obj.parseVertices(child) +# name = child.find('label').text +# lable_name.append(name) +# # if obj.start_frame < 10030 and obj.end_frame > 10030: +# center = np.array(obj.T, dtype=np.float64) +# dist = np.linalg.norm(center - CENTER_REF) +# if dist < 7: +# print(f"Object ID: {obj.name}, Start Frame: {obj.start_frame}, End Frame: {obj.end_frame},self.annotationId: {obj.annotationId},{obj.timestamp},{obj.T}") +# objs_name.append(obj.name) +# print(len(objs_name)) +# print(set(objs_name)) +# print(set(lable_name)) +# # print(obj.Rm) +# # print(Sigma) +# names = [] +# for child in root: +# label = child.find('label').text +# if child.find('transform') is None: +# continue +# names.append(label) +# print(set(names)) + +from scipy.spatial.transform import Rotation as R +import numpy as np +from pathlib import Path as PATH + +def get_rotation_matrix(roll,pitch,yaw): + # Intrinsic Z-Y'-X'' rotation: R = R_x(roll) @ R_y(pitch) @ R_z(yaw) + R_x = np.array( + [ + [1, 0, 0], + [0, np.cos(roll), -np.sin(roll)], + [0, np.sin(roll), np.cos(roll)], + ], + dtype=np.float64, + ) + R_y = np.array( + [ + [np.cos(pitch), 0, np.sin(pitch)], + [0, 1, 0], + [-np.sin(pitch), 0, np.cos(pitch)], + ], + dtype=np.float64, + ) + R_z = np.array( + [ + [np.cos(yaw), -np.sin(yaw), 0], + [np.sin(yaw), np.cos(yaw), 0], + [0, 0, 1], + ], + dtype=np.float64, + ) + return R_x @ R_y @ R_z + +oxts_path = PATH("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/" ) +pose_file = PATH("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0000_sync/poses.txt") +poses = np.loadtxt(pose_file) +poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 + +pose_idx = 0 +poses_time_len = len(poses_time) + +from pyquaternion import Quaternion + +for idx in range(len(list(oxts_path.glob("*.txt")))): + oxts_path_file = oxts_path / f"{int(idx):010d}.txt" + oxts_data = np.loadtxt(oxts_path_file) + while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < idx: + pose_idx += 1 + pos = pose_idx + + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], + [r10, r11, r12], + [r20, r21, r22]], dtype=np.float64) + calib = np.array([[1.0, 0.0, 0.0], + [0.0, -1.0, 0.0], + [0.0, 0.0, -1.0]], dtype=np.float64) + R_mat = R_mat @ calib + if idx <= 300: + # print("R_mat",R_mat) + new_yaw, new_pitch, new_roll = Quaternion(matrix=R_mat[:3, :3]).yaw_pitch_roll + # new_yaw,new_pitch,new_roll = R.from_matrix(R_mat).as_euler('yxz', degrees=False) + print("new",new_roll,new_pitch,new_yaw) + print("roll,pitch,yaw",oxts_data[3:6]) # 前6个元素是位置和速度 + roll, pitch, yaw = oxts_data[3:6] + # print("true",get_rotation_matrix(roll,pitch,yaw)) + # print("new",roll,pitch,yaw) \ No newline at end of file From 48b11a96113a73b703d1a2fa52200864f374bdff Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 27 Aug 2025 16:31:55 +0800 Subject: [PATCH 12/32] finish preprocess detection script --- .../kitti_360/kitti_360_data_converter.py | 63 +++--- .../kitti_360/kitti_360_helper.py | 41 ++-- .../kitti_360/preprocess_detection.py | 189 ++++++++++++++++++ .../default_dataset_conversion.yaml | 2 +- jbwang_test2.py | 11 +- 5 files changed, 253 insertions(+), 53 deletions(-) create mode 100644 d123/dataset/dataset_specific/kitti_360/preprocess_detection.py diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 6433ca89..03e5bd37 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -9,6 +9,7 @@ from typing import Any, Dict, Final, List, Optional, Tuple, Union import numpy as np +import pickle from collections import defaultdict import datetime import hashlib @@ -27,14 +28,12 @@ from d123.common.datatypes.time.time_point import TimePoint from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index from d123.common.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 -from d123.common.geometry.base import StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3Index -from d123.common.geometry.vector import Vector3D, Vector3DIndex from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label +from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex KITTI360_DT: Final[float] = 0.1 SORT_BY_TIMESTAMP: Final[bool] = True @@ -74,6 +73,9 @@ DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } +D123_DEVKIT_ROOT = Path(os.environ["D123_DEVKIT_ROOT"]) +PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "d123" / "dataset" / "dataset_specific" / "kitti_360" / "detection_preprocess" + def create_token(input_data: str) -> str: # TODO: Refactor this function. # TODO: Add a general function to create tokens from arbitrary data. @@ -316,7 +318,15 @@ def _readYAMLFile(fileName): def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} + extrinsic = get_lidar_extrinsic() + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LidarIndex, + extrinsic=extrinsic, + ) + return metadata +def get_lidar_extrinsic() -> np.ndarray: cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" if not cam2pose_txt.exists(): raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") @@ -336,13 +346,7 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) extrinsic = cam2pose @ np.linalg.inv(cam2velo) - - metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( - lidar_type=LiDARType.LIDAR_TOP, - lidar_index=Kitti360LidarIndex, - extrinsic=extrinsic, - ) - return metadata + return extrinsic def _write_recording_table( log_name: str, @@ -405,11 +409,10 @@ def _write_recording_table( #TODO Synchronization all other sequences) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: # unix - # default using velodyne timestamps,if not available, use camera timestamps ts_files = [ - PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt", + PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", ] for ts_file in ts_files: if ts_file.exists(): @@ -531,16 +534,13 @@ def _extract_detections( dynamic_groups: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) - - # lidra_data_all = [] - # for index in range(ts_len): - # lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{index:010d}.bin" - # if not lidar_full_path.exists(): - # logging.warning(f"LiDAR file not found for frame {index}: {lidar_full_path}") - # continue - # lidar_data = np.fromfile(lidar_full_path, dtype=np.float32) - # lidar_data = lidar_data.reshape(-1, 4)[:, :3] # Keep only x, y, z coordinates - # lidra_data_all.append(lidar_data) + detection_preprocess_path = PREPOCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" + if detection_preprocess_path.exists(): + with open(detection_preprocess_path, "rb") as f: + detection_preprocess_result = pickle.load(f) + records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["records"]} + else: + detection_preprocess_result = None for child in root: semanticIdKITTI = int(child.find('semanticId').text) @@ -552,14 +552,12 @@ def _extract_detections( #static object if obj.timestamp == -1: - # first filter by radius - obj.filter_by_radius(ego_states_xyz,radius=50.0) - # then filter by pointcloud - for frame in obj.valid_radius_frames: - # TODO in the future, now is too slow because cpu in the server is not free - # or using config? - # lidar_data = lidra_data_all[frame] - # if obj.box_visible_in_point_cloud(lidar_data): + if detection_preprocess_result is None: + obj.filter_by_radius(ego_states_xyz,radius=50.0) + else: + obj.load_detection_preprocess(records_dict) + for record in obj.valid_frames["records"]: + frame = record["timestamp"] detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append([0.0, 0.0, 0.0]) detections_tokens[frame].append(str(obj.globalID)) @@ -606,6 +604,11 @@ def _extract_detections( #TODO lidar extraction now only velo def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: + + #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 + if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: + return {LiDARType.LIDAR_TOP: None} + lidar: Optional[str] = None lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" if lidar_full_path.exists(): diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index 7edcd6af..76e3c9e0 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -1,13 +1,11 @@ import numpy as np from collections import defaultdict - +from typing import Dict, Optional, Any, List from scipy.linalg import polar from scipy.spatial.transform import Rotation as R -from d123.common.geometry.base import StateSE3 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE3 -from d123.common.geometry.transform.se3 import get_rotation_matrix +from d123.geometry import BoundingBoxSE3, StateSE3 from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label DEFAULT_ROLL = 0.0 @@ -51,7 +49,6 @@ def __init__(self): # the window that contains the bbox self.start_frame = -1 self.end_frame = -1 - self.valid_radius_frames = [] # timestamp of the bbox (-1 if statis) self.timestamp = -1 @@ -92,6 +89,9 @@ def parseBbox(self, child): self.label = child.find('label').text self.globalID = local2global(self.semanticId, self.instanceId) + + self.valid_frames = {"global_id": self.globalID, "records": []} + self.parseVertices(child) self.parse_scale_rotation() @@ -119,11 +119,6 @@ def parse_scale_rotation(self): self.yaw = yaw self.pitch = pitch self.roll = roll - - # self.pose = np.eye(4, dtype=np.float64) - # self.pose[:3, :3] = self.Rm - # self.pose[:3, 3] = self.T - # self.w2e = np.linalg.inv(self.pose) def get_state_array(self): center = StateSE3( @@ -140,16 +135,17 @@ def get_state_array(self): return bounding_box_se3.array def filter_by_radius(self,ego_state_xyz,radius=50.0): - # first stage of detection, used to filter out detections by radius - - for index in range(len(ego_state_xyz)): - ego_state = ego_state_xyz[index] - distance = np.linalg.norm(ego_state[:3] - self.T) - if distance <= radius: - self.valid_radius_frames.append(index) + ''' first stage of detection, used to filter out detections by radius ''' + d = np.linalg.norm(ego_state_xyz - self.T[None, :], axis=1) + idxs = np.where(d <= radius)[0] + for idx in idxs: + self.valid_frames["records"].append({ + "timestamp": idx, + "points_in_box": None, + }) def box_visible_in_point_cloud(self, points): - # points: (N,3) , box: (8,3) + ''' points: (N,3) , box: (8,3) ''' box = self.vertices O, A, B, C = box[0], box[1], box[2], box[5] OA = A - O @@ -159,4 +155,11 @@ def box_visible_in_point_cloud(self, points): mask = (np.dot(O, OA) < POA) & (POA < np.dot(A, OA)) & \ (np.dot(O, OB) < POB) & (POB < np.dot(B, OB)) & \ (np.dot(O, OC) < POC) & (POC < np.dot(C, OC)) - return True if np.sum(mask) > 100 else False \ No newline at end of file + + points_in_box = np.sum(mask) + visible = True if points_in_box > 50 else False + return visible, points_in_box + + def load_detection_preprocess(self, records_dict: Dict[int, Any]): + if self.globalID in records_dict: + self.valid_frames["records"] = records_dict[self.globalID]["records"] \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py new file mode 100644 index 00000000..e45e76d9 --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py @@ -0,0 +1,189 @@ +""" +This script precomputes static detection records for KITTI-360: + - Stage 1: radius filtering using ego positions (from poses.txt). + - Stage 2: LiDAR visibility check to fill per-frame point counts. +It writes a pickle containing, for each static object, all feasible frames and +their point counts to avoid recomputation in later pipelines. +We have precomputed and saved the pickle for all training logs, you can either +download them or run this script to generate +""" + +from __future__ import annotations +import os +import pickle +import logging +from pathlib import Path +from typing import Dict, List, Tuple, Optional, Any +from collections import defaultdict + +import numpy as np +import numpy.typing as npt +import xml.etree.ElementTree as ET + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +DIR_3D_RAW = "data_3d_raw" +DIR_3D_BBOX = "data_3d_bboxes" +DIR_POSES = "data_poses" + +PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX +PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES + +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION +from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label +from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import get_lidar_extrinsic + +def _bbox_xml_path(log_name: str) -> Path: + return PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + +def _lidar_frame_path(log_name: str, frame_idx: int) -> Path: + return PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" + +def _load_lidar_xyz(filepath: Path) -> np.ndarray: + """Load one LiDAR frame and return Nx3 xyz.""" + arr = np.fromfile(filepath, dtype=np.float32) + return arr.reshape(-1, 4)[:, :3] + +def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: + """Parse XML and collect static objects with valid class names.""" + xml_path = _bbox_xml_path(log_name) + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + tree = ET.parse(xml_path) + root = tree.getroot() + objs: List[KITTI360Bbox3D] = [] + for child in root: + sem_id = int(child.find("semanticId").text) + name = kittiId2label[sem_id].name + timestamp = int(child.find('timestamp').text) # -1 for static objects + if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT or timestamp != -1: + continue + obj = KITTI360Bbox3D() + obj.parseBbox(child) + objs.append(obj) + return objs + +def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: + """Load ego states from poses.txt.""" + + pose_file = PATH_POSES_ROOT / log_name / "poses.txt" + if not pose_file.exists(): + raise FileNotFoundError(f"Pose file not found: {pose_file}") + + poses = np.loadtxt(pose_file) + poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 + + pose_idx = 0 + poses_time_len = len(poses_time) + + ego_states = [] + + for time_idx in range(length): + while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < time_idx: + pose_idx += 1 + pos = pose_idx + state_item = np.eye(4) + r00, r01, r02 = poses[pos, 1:4] + r10, r11, r12 = poses[pos, 5:8] + r20, r21, r22 = poses[pos, 9:12] + R_mat = np.array([[r00, r01, r02], + [r10, r11, r12], + [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] + ego_state_xyz = np.array([ + poses[pos, 4], + poses[pos, 8], + poses[pos, 12], + ]) + + state_item[:3, :3] = R_mat_cali + state_item[:3, 3] = ego_state_xyz + ego_states.append(state_item) + + return np.array(ego_states) # [N,4,4] + + +def process_detection( + log_name: str, + radius_m: float = 50.0, + output_dir: Optional[Path] = None, +) -> None: + """ + Precompute static detections filtering: + 1) filter by ego-centered radius over all frames + 2) filter by LiDAR point cloud visibility + Save per-frame static detections to a pickle to avoid recomputation. + """ + + lidar_dir = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" + if not lidar_dir.exists(): + raise FileNotFoundError(f"LiDAR data folder not found: {lidar_dir}") + ts_len = len(list(lidar_dir.glob("*.bin"))) + logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") + + # 1) Parse static objects from XML + static_objs = _collect_static_objects(log_name) + logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") + + # 2) Filter by ego-centered radius + ego_states = _collect_ego_states(log_name,ts_len) + logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") + for obj in static_objs: + obj.filter_by_radius(ego_states[:, :3, 3], radius_m) + + # 3) Filter by LiDAR point cloud visibility + lidar_extrinsic = get_lidar_extrinsic() + for time_idx in range(ts_len): + logging.info(f"[preprocess] {log_name}: t={time_idx}") + lidar_path = _lidar_frame_path(log_name, time_idx) + lidar_xyz = _load_lidar_xyz(lidar_path) + + # lidar to pose + lidar_h = np.concatenate((lidar_xyz, np.ones((lidar_xyz.shape[0], 1), dtype=lidar_xyz.dtype)), axis=1) + lidar_in_imu = lidar_h @ lidar_extrinsic.T + lidar_in_imu = lidar_in_imu[:,:3] + + # pose to world + lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3,:3].T + ego_states[time_idx][:3,3] + + for obj in static_objs: + if not any(record["timestamp"] == time_idx for record in obj.valid_frames["records"]): + continue + visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) + if not visible: + obj.valid_frames["records"] = [record for record in obj.valid_frames["records"] if record["timestamp"] != time_idx] + else: + for record in obj.valid_frames["records"]: + if record["timestamp"] == time_idx: + record["points_in_box"] = points_in_box + break + + # 4) Save pickle + records: List[Dict[str, Any]] = [] + for obj in static_objs: + records.append(obj.valid_frames) + if output_dir is None: + output_dir = PATH_3D_BBOX_ROOT / "preprocessed" + output_dir.mkdir(parents=True, exist_ok=True) + out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" + payload = { + "log_name": log_name, + "records": records + } + with open(out_path, "wb") as f: + pickle.dump(payload, f) + logging.info(f"[preprocess] saved: {out_path}") + +if __name__ == "__main__": + import argparse + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser(description="Precompute KITTI-360 static detections filters") + parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") + parser.add_argument("--radius", type=float, default=60.0) + parser.add_argument("--out", type=Path, default=None, help="output directory for pkl") + args = parser.parse_args() + process_detection( + log_name=args.log_name, + radius_m=args.radius, + output_dir=args.out, + ) diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index 2c474fe8..52915f13 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -22,4 +22,4 @@ defaults: - kitti360_dataset force_log_conversion: True -force_map_conversion: True +force_map_conversion: False diff --git a/jbwang_test2.py b/jbwang_test2.py index f9748db5..183df813 100644 --- a/jbwang_test2.py +++ b/jbwang_test2.py @@ -213,12 +213,17 @@ def get_rotation_matrix(roll,pitch,yaw): [0.0, -1.0, 0.0], [0.0, 0.0, -1.0]], dtype=np.float64) R_mat = R_mat @ calib + from d123.geometry.rotation import EulerAngles if idx <= 300: # print("R_mat",R_mat) + new_yaw, new_pitch, new_roll = Quaternion(matrix=R_mat[:3, :3]).yaw_pitch_roll + R = EulerAngles.from_array(np.array([new_roll, new_pitch, new_yaw])).rotation_matrix + # print("R from yaw_pitch_roll",R) + print(R_mat - R) # new_yaw,new_pitch,new_roll = R.from_matrix(R_mat).as_euler('yxz', degrees=False) - print("new",new_roll,new_pitch,new_yaw) - print("roll,pitch,yaw",oxts_data[3:6]) # 前6个元素是位置和速度 - roll, pitch, yaw = oxts_data[3:6] + # print("new",new_roll,new_pitch,new_yaw) + # print("roll,pitch,yaw",oxts_data[3:6]) # 前6个元素是位置和速度 + # roll, pitch, yaw = oxts_data[3:6] # print("true",get_rotation_matrix(roll,pitch,yaw)) # print("new",roll,pitch,yaw) \ No newline at end of file From ac7fc40c90b2661bd2c72b14fa9cc4c5e6d588cb Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Thu, 28 Aug 2025 11:02:30 +0800 Subject: [PATCH 13/32] stop tracking jbwang_test script --- .../dataset_specific/kitti_360/jbwang_test.py | 155 ----------- .../kitti_360/kitti_360_data_converter.py | 9 +- .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.31.57/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti360_test/2025.08.15.14.31.57/log.txt | 10 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.36.40/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti360_test/2025.08.15.14.36.40/log.txt | 10 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.40.29/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti_test2/2025.08.15.14.40.29/log.txt | 10 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.43.13/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti_test2/2025.08.15.14.43.13/log.txt | 12 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.46.49/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti_test2/2025.08.15.14.46.49/log.txt | 10 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.50.55/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti_test2/2025.08.15.14.50.55/log.txt | 11 - .../code/hydra/config.yaml | 60 ----- .../2025.08.15.14.52.39/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/kitti_test2/2025.08.15.14.52.39/log.txt | 11 - .../code/hydra/config.yaml | 60 ----- .../2025.08.11.15.45.36/code/hydra/hydra.yaml | 177 ------------ .../code/hydra/overrides.yaml | 1 - exp/my_run/2025.08.11.15.45.36/log.txt | 10 - jbwang_test.py | 98 ------- jbwang_test2.py | 229 ---------------- notebooks/dataset/jbwang_test.py | 94 ------- notebooks/gym/jbwang_test.py | 180 ------------- notebooks/jbwang_viz_test.py | 252 ------------------ 39 files changed, 5 insertions(+), 3000 deletions(-) delete mode 100644 d123/dataset/dataset_specific/kitti_360/jbwang_test.py delete mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.31.57/log.txt delete mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml delete mode 100644 exp/kitti360_test/2025.08.15.14.36.40/log.txt delete mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.40.29/log.txt delete mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.43.13/log.txt delete mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.46.49/log.txt delete mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.50.55/log.txt delete mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml delete mode 100644 exp/kitti_test2/2025.08.15.14.52.39/log.txt delete mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml delete mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml delete mode 100644 exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml delete mode 100644 exp/my_run/2025.08.11.15.45.36/log.txt delete mode 100644 jbwang_test.py delete mode 100644 jbwang_test2.py delete mode 100644 notebooks/dataset/jbwang_test.py delete mode 100644 notebooks/gym/jbwang_test.py delete mode 100644 notebooks/jbwang_viz_test.py diff --git a/d123/dataset/dataset_specific/kitti_360/jbwang_test.py b/d123/dataset/dataset_specific/kitti_360/jbwang_test.py deleted file mode 100644 index e480783e..00000000 --- a/d123/dataset/dataset_specific/kitti_360/jbwang_test.py +++ /dev/null @@ -1,155 +0,0 @@ -import gc -import json -import os -import pickle -from dataclasses import asdict -from functools import partial -from pathlib import Path -from typing import Any, Dict, Final, List, Optional, Tuple, Union - -import numpy as np -import pyarrow as pa -import yaml -from nuplan.database.nuplan_db.nuplan_scenario_queries import get_cameras, get_images_from_lidar_tokens -from nuplan.database.nuplan_db_orm.ego_pose import EgoPose -from nuplan.database.nuplan_db_orm.lidar_box import LidarBox -from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc -from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB -from nuplan.planning.simulation.observation.observation_type import CameraChannel -from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map -from pyquaternion import Quaternion -from sqlalchemy import func - - -from kitti_360_data_converter import _extract_ego_state_all,get_kitti360_lidar_metadata,_extract_cameras,_extract_detections,_read_timestamps - -# a = _extract_ego_state_all("2013_05_28_drive_0000_sync") -# print(a[0]) -# print(a[1]) -# print(a[10]) -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json - -NUPLAN_CAMERA_TYPES = { - CameraType.CAM_F0: CameraChannel.CAM_F0, - CameraType.CAM_B0: CameraChannel.CAM_B0, - CameraType.CAM_L0: CameraChannel.CAM_L0, - CameraType.CAM_L1: CameraChannel.CAM_L1, - CameraType.CAM_L2: CameraChannel.CAM_L2, - CameraType.CAM_R0: CameraChannel.CAM_R0, - CameraType.CAM_R1: CameraChannel.CAM_R1, - CameraType.CAM_R2: CameraChannel.CAM_R2, -} - -NUPLAN_DATA_ROOT = Path(os.environ["NUPLAN_DATA_ROOT"]) -NUPLAN_ROLLING_SHUTTER_S: Final[TimePoint] = TimePoint.from_s(1 / 60) - -def _extract_camera( - log_db: NuPlanDB, - lidar_pc: LidarPc, - source_log_path: Path, -) -> Dict[CameraType, Union[str, bytes]]: - - camera_dict: Dict[str, Union[str, bytes]] = {} - sensor_root = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "sensor_blobs" - - log_cam_infos = {camera.token: camera for camera in log_db.log.cameras} - for camera_type, camera_channel in NUPLAN_CAMERA_TYPES.items(): - camera_data: Optional[Union[str, bytes]] = None - c2e: Optional[List[float]] = None - image_class = list(get_images_from_lidar_tokens(source_log_path, [lidar_pc.token], [str(camera_channel.value)])) - # print("image_class",image_class) - if len(image_class) != 0: - image = image_class[0] - filename_jpg = sensor_root / image.filename_jpg - - timestamp = image.timestamp + NUPLAN_ROLLING_SHUTTER_S.time_us - img_ego_pose: EgoPose = ( - log_db.log._session.query(EgoPose).order_by(func.abs(EgoPose.timestamp - timestamp)).first() - ) - img_e2g = img_ego_pose.trans_matrix - g2e = lidar_pc.ego_pose.trans_matrix_inv - img_e2e = g2e @ img_e2g - cam_info = log_cam_infos[image.camera_token] - c2img_e = cam_info.trans_matrix - c2e = img_e2e @ c2img_e - # print(f"Camera {camera_type} found for lidar {lidar_pc.token} at timestamp {timestamp}") - print(camera_type,"c2e:", c2e) - camera_dict[camera_type] = camera_data - - return camera_dict - - -def get_cam_info_from_lidar_pc(log,log_file, lidar_pc, rolling_shutter_s=1/60): - - retrieved_images = get_images_from_lidar_tokens( - log_file, [lidar_pc.token], [str(channel.value) for channel in CameraChannel] - ) - - # if interp_trans: - # neighbours = [] - # ego_poses_dict = {} - # for ego_pose in log.ego_poses: - # ego_poses_dict[ego_pose.token] = ego_pose - # if abs(ego_pose.timestamp - lidar_pc.ego_pose.timestamp) / 1e6 < 0.5: - # neighbours.append(ego_pose) - # timestamps = [pose.timestamp for pose in neighbours] - # translations = [pose.translation_np for pose in neighbours] - # splines = [CubicSpline(timestamps, [translation[i] for translation in translations]) for i in range(2)] - - log_cam_infos = {camera.token : camera for camera in log.camera} - cams = {} - for img in retrieved_images: - channel = img.channel - filename = img.filename_jpg - - # if interp_trans: - # img_ego_pose = ego_poses_dict[img.ego_pose_token] - # interpolated_translation = np.array([splines[0](timestamp), splines[1](timestamp), img_ego_pose.z]) - # delta = interpolated_translation - lidar_pc.ego_pose.translation_np - # delta = np.dot(lidar_pc.ego_pose.quaternion.rotation_matrix.T, delta) - if channel == "CAM_F0": - timestamp = img.timestamp + (rolling_shutter_s * 1e6) - img_ego_pose = log.session.query(EgoPose).order_by(func.abs(EgoPose.timestamp - timestamp)).first() - img_e2g = img_ego_pose.trans_matrix - # print("img_e2g:", img_e2g) - - g2e = lidar_pc.ego_pose.trans_matrix_inv - # print("g2e:", g2e) #change obviously - img_e2e = g2e @ img_e2g - # print("img_e2e:", img_e2e) - cam_info = log_cam_infos[img.camera_token] - c2img_e = cam_info.trans_matrix - # print("c2img_e:", c2img_e) - c2e = img_e2e @ c2img_e - # print("channel:", channel, "c2e:", c2e) - - cams[channel] = dict( - data_path = filename, - timestamp = img.timestamp, - token=img.token, - sensor2ego_rotation = Quaternion(matrix=c2e[:3, :3]), - sensor2ego_translation = c2e[:3, 3], - cam_intrinsic = cam_info.intrinsic_np, - distortion = cam_info.distortion_np, - ) - - - if len(cams) != 8: - return None - # print(cams) - return cams - -if __name__ == "__main__": - # Example usage - # data_converter_config: DataConverterConfig - # log_path = Path("/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.10.11.07.12.18_veh-50_00211_00304.db") - # log_path = Path("/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.09.16.15.12.03_veh-42_01037_01434.db") - # log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None) - - # for lidar_pc in log_db.lidar_pc: # Replace with actual token - # # camera_data = _extract_camera(log_db, lidar_pc, log_path) - # camera_data = get_cam_info_from_lidar_pc(log_db,log_path, lidar_pc, rolling_shutter_s=1/60) - # print(_extract_cameras("2013_05_28_drive_0000_sync",0)) - # _extract_detections("2013_05_28_drive_0000_sync", 0) - print(_read_timestamps("2013_05_28_drive_0000_sync")) \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 03e5bd37..2cc40675 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -197,7 +197,6 @@ def convert_kitti360_log_to_arrow( vehicle_parameters = get_kitti360_station_wagon_parameters() camera_metadata = get_kitti360_camera_metadata() - #TODO now only velodyne lidar lidar_metadata = get_kitti360_lidar_metadata() schema_column_list = [ @@ -406,14 +405,17 @@ def _write_recording_table( recording_table = recording_table.sort_by([("timestamp", "ascending")]) write_arrow_table(recording_table, log_file_path) -#TODO Synchronization all other sequences) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: # unix ts_files = [ + PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt", - PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", ] + + if log_name == "2013_05_28_drive_0002_sync": + ts_files = ts_files[1:] + for ts_file in ts_files: if ts_file.exists(): tps: List[TimePoint] = [] @@ -602,7 +604,6 @@ def _extract_detections( return detections_states, detections_velocity, detections_tokens, detections_types -#TODO lidar extraction now only velo def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml deleted file mode 100644 index a505c4d2..00000000 --- a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti360_test -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - nuplan_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter - _convert_: all - splits: - - kitti360 - log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml deleted file mode 100644 index 406ccbe7..00000000 --- a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti360_test - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti360_test - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123/d123/script - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti360_test/2025.08.15.14.31.57 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml b/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml deleted file mode 100644 index 6c8e6217..00000000 --- a/exp/kitti360_test/2025.08.15.14.31.57/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti360_test diff --git a/exp/kitti360_test/2025.08.15.14.31.57/log.txt b/exp/kitti360_test/2025.08.15.14.31.57/log.txt deleted file mode 100644 index 984f705a..00000000 --- a/exp/kitti360_test/2025.08.15.14.31.57/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -2025-08-15 14:31:57,385 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:32:14,105 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:32:35,603 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:32:35,604 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:32:35,604 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:32:35,604 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:32:35,605 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml deleted file mode 100644 index 0fd6120d..00000000 --- a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti360_test -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - kitti360_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter - _convert_: all - splits: - - kitti360 - log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml deleted file mode 100644 index 4eee2c65..00000000 --- a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti360_test - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti360_test - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123/d123/script - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti360_test/2025.08.15.14.36.40 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml b/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml deleted file mode 100644 index 6c8e6217..00000000 --- a/exp/kitti360_test/2025.08.15.14.36.40/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti360_test diff --git a/exp/kitti360_test/2025.08.15.14.36.40/log.txt b/exp/kitti360_test/2025.08.15.14.36.40/log.txt deleted file mode 100644 index 5f939dac..00000000 --- a/exp/kitti360_test/2025.08.15.14.36.40/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -2025-08-15 14:36:40,989 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:36:56,167 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:37:18,685 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:37:18,686 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:37:18,686 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:37:18,686 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:37:18,687 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml deleted file mode 100644 index 5ce47ba9..00000000 --- a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti_test2 -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - kitti360_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter - _convert_: all - splits: - - kitti360 - log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml deleted file mode 100644 index 2d1c615a..00000000 --- a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti_test2 - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti_test2 - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123 - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.40.29 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml deleted file mode 100644 index 676c1042..00000000 --- a/exp/kitti_test2/2025.08.15.14.40.29/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.40.29/log.txt b/exp/kitti_test2/2025.08.15.14.40.29/log.txt deleted file mode 100644 index 8437d38e..00000000 --- a/exp/kitti_test2/2025.08.15.14.40.29/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -2025-08-15 14:40:29,427 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:40:42,538 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:41:00,324 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:41:00,325 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:41:00,325 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:41:00,325 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:41:00,326 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml deleted file mode 100644 index de70bfa3..00000000 --- a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti_test2 -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - nuplan_private_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: all - splits: - - nuplan_private_test - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml deleted file mode 100644 index cca44d29..00000000 --- a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti_test2 - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti_test2 - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123 - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.43.13 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml deleted file mode 100644 index 676c1042..00000000 --- a/exp/kitti_test2/2025.08.15.14.43.13/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.43.13/log.txt b/exp/kitti_test2/2025.08.15.14.43.13/log.txt deleted file mode 100644 index fec50568..00000000 --- a/exp/kitti_test2/2025.08.15.14.43.13/log.txt +++ /dev/null @@ -1,12 +0,0 @@ -2025-08-15 14:43:13,965 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:43:24,401 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:43:39,643 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:43:39,644 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:43:39,644 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:43:39,644 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:43:39,645 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... -2025-08-15 14:43:44,316 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:21} Building RawDataProcessor...DONE! -2025-08-15 14:43:44,316 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:34} Processing dataset: NuplanDataConverter diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml deleted file mode 100644 index 5ce47ba9..00000000 --- a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti_test2 -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - kitti360_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter - _convert_: all - splits: - - kitti360 - log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml deleted file mode 100644 index bd9698a2..00000000 --- a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti_test2 - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti_test2 - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123 - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.46.49 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml deleted file mode 100644 index 676c1042..00000000 --- a/exp/kitti_test2/2025.08.15.14.46.49/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.46.49/log.txt b/exp/kitti_test2/2025.08.15.14.46.49/log.txt deleted file mode 100644 index 00286f48..00000000 --- a/exp/kitti_test2/2025.08.15.14.46.49/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -2025-08-15 14:46:49,566 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:46:59,509 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:47:14,118 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:47:14,118 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:47:14,119 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:47:14,119 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:47:14,122 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml deleted file mode 100644 index 5ce47ba9..00000000 --- a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti_test2 -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - kitti360_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter - _convert_: all - splits: - - kitti360 - log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml deleted file mode 100644 index acff45d7..00000000 --- a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti_test2 - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti_test2 - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123 - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.50.55 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml deleted file mode 100644 index 676c1042..00000000 --- a/exp/kitti_test2/2025.08.15.14.50.55/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.50.55/log.txt b/exp/kitti_test2/2025.08.15.14.50.55/log.txt deleted file mode 100644 index 9902e0ce..00000000 --- a/exp/kitti_test2/2025.08.15.14.50.55/log.txt +++ /dev/null @@ -1,11 +0,0 @@ -2025-08-15 14:50:55,950 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:51:19,466 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:51:52,653 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:51:52,653 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:51:52,654 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... -2025-08-15 14:51:52,655 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:17} Instantiating dataset type: {'_target_': 'd123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter', '_convert_': 'all', 'splits': ['kitti360'], 'log_path': '${oc.env:KITTI360_DATA_ROOT}', 'data_converter_config': {'_target_': 'd123.dataset.dataset_specific.raw_data_converter.DataConverterConfig', '_convert_': 'all', 'output_path': '${d123_data_root}', 'force_log_conversion': '${force_log_conversion}', 'force_map_conversion': '${force_map_conversion}', 'camera_store_option': 'path', 'lidar_store_option': 'path'}} diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml deleted file mode 100644 index de70bfa3..00000000 --- a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: kitti_test2 -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: true -force_map_conversion: false -datasets: - nuplan_private_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: all - splits: - - nuplan_private_test - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml deleted file mode 100644 index d053f8e7..00000000 --- a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=kitti_test2 - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=kitti_test2 - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123 - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/kitti_test2/2025.08.15.14.52.39 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml b/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml deleted file mode 100644 index 676c1042..00000000 --- a/exp/kitti_test2/2025.08.15.14.52.39/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=kitti_test2 diff --git a/exp/kitti_test2/2025.08.15.14.52.39/log.txt b/exp/kitti_test2/2025.08.15.14.52.39/log.txt deleted file mode 100644 index e2585299..00000000 --- a/exp/kitti_test2/2025.08.15.14.52.39/log.txt +++ /dev/null @@ -1,11 +0,0 @@ -2025-08-15 14:52:39,717 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-15 14:53:02,994 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-15 14:53:36,548 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-15 14:53:36,549 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-15 14:53:36,549 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-15 14:53:36,549 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-15 14:53:36,550 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... -2025-08-15 14:53:36,550 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:17} Instantiating dataset type: {'_target_': 'd123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter', '_convert_': 'all', 'splits': ['nuplan_private_test'], 'log_path': '${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits', 'data_converter_config': {'_target_': 'd123.dataset.dataset_specific.raw_data_converter.DataConverterConfig', '_convert_': 'all', 'output_path': '${d123_data_root}', 'force_log_conversion': '${force_log_conversion}', 'force_map_conversion': '${force_map_conversion}', 'camera_store_option': 'path', 'lidar_store_option': 'path'}} diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml deleted file mode 100644 index 86d05e7b..00000000 --- a/exp/my_run/2025.08.11.15.45.36/code/hydra/config.yaml +++ /dev/null @@ -1,60 +0,0 @@ -worker: - _target_: nuplan.planning.utils.multithreading.worker_ray.RayDistributed - _convert_: all - master_node_ip: null - threads_per_node: null - debug_mode: false - log_to_driver: true - logs_subdir: logs - use_distributed: false -scene_filter: - _target_: d123.dataset.scene.scene_filter.SceneFilter - _convert_: all - split_types: null - split_names: null - log_names: null - map_names: null - scene_tokens: null - timestamp_threshold_s: null - ego_displacement_minimum_m: null - duration_s: 9.2 - history_s: 3.0 -scene_builder: - _target_: d123.dataset.scene.scene_builder.ArrowSceneBuilder - _convert_: all - dataset_path: ${d123_data_root} -distributed_timeout_seconds: 7200 -selected_simulation_metrics: null -verbose: false -logger_level: info -logger_format_string: null -max_number_of_workers: null -gpu: true -seed: 42 -d123_devkit_root: ${oc.env:D123_DEVKIT_ROOT} -d123_maps_root: ${oc.env:D123_MAPS_ROOT} -d123_data_root: ${oc.env:D123_DATA_ROOT} -nuplan_devkit_root: ${oc.env:NUPLAN_DEVKIT_ROOT} -nuplan_maps_root: ${oc.env:NUPLAN_MAPS_ROOT} -nuplan_data_root: ${oc.env:NUPLAN_DATA_ROOT} -experiment_name: my_run -date_format: '%Y.%m.%d.%H.%M.%S' -experiment_uid: ${now:${date_format}} -output_dir: ${oc.env:D123_EXP_ROOT}/${experiment_name}/${experiment_uid} -force_log_conversion: false -force_map_conversion: true -datasets: - nuplan_private_dataset: - _target_: d123.dataset.dataset_specific.nuplan.nuplan_data_converter.NuplanDataConverter - _convert_: all - splits: - - nuplan_private_test - log_path: ${oc.env:NUPLAN_DATA_ROOT}/nuplan-v1.1/splits - data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig - _convert_: all - output_path: ${d123_data_root} - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - camera_store_option: path - lidar_store_option: path diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml deleted file mode 100644 index bf09b447..00000000 --- a/exp/my_run/2025.08.11.15.45.36/code/hydra/hydra.yaml +++ /dev/null @@ -1,177 +0,0 @@ -hydra: - run: - dir: ${output_dir} - sweep: - dir: multirun/${now:%Y-%m-%d}/${now:%H-%M-%S} - subdir: ${hydra.job.num} - launcher: - _target_: hydra._internal.core_plugins.basic_launcher.BasicLauncher - sweeper: - _target_: hydra._internal.core_plugins.basic_sweeper.BasicSweeper - max_batch_size: null - params: null - help: - app_name: ${hydra.job.name} - header: '${hydra.help.app_name} is powered by Hydra. - - ' - footer: 'Powered by Hydra (https://hydra.cc) - - Use --hydra-help to view Hydra specific help - - ' - template: '${hydra.help.header} - - == Configuration groups == - - Compose your configuration from those groups (group=option) - - - $APP_CONFIG_GROUPS - - - == Config == - - Override anything in the config (foo.bar=value) - - - $CONFIG - - - ${hydra.help.footer} - - ' - hydra_help: - template: 'Hydra (${hydra.runtime.version}) - - See https://hydra.cc for more info. - - - == Flags == - - $FLAGS_HELP - - - == Configuration groups == - - Compose your configuration from those groups (For example, append hydra/job_logging=disabled - to command line) - - - $HYDRA_CONFIG_GROUPS - - - Use ''--cfg hydra'' to Show the Hydra config. - - ' - hydra_help: ??? - hydra_logging: - version: 1 - formatters: - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(purple)sHYDRA%(reset)s] %(message)s' - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - root: - level: INFO - handlers: - - console - disable_existing_loggers: false - job_logging: - version: 1 - formatters: - simple: - format: '[%(asctime)s][%(name)s][%(levelname)s] - %(message)s' - colorlog: - (): colorlog.ColoredFormatter - format: '[%(cyan)s%(asctime)s%(reset)s][%(blue)s%(name)s%(reset)s][%(log_color)s%(levelname)s%(reset)s] - - %(message)s' - log_colors: - DEBUG: purple - INFO: green - WARNING: yellow - ERROR: red - CRITICAL: red - handlers: - console: - class: logging.StreamHandler - formatter: colorlog - stream: ext://sys.stdout - file: - class: logging.FileHandler - formatter: simple - filename: ${hydra.job.name}.log - root: - level: INFO - handlers: - - console - - file - disable_existing_loggers: false - env: {} - mode: RUN - searchpath: - - pkg://d123.script.config - - pkg://d123.script.config.common - callbacks: {} - output_subdir: ${output_dir}/code/hydra - overrides: - hydra: - - hydra.mode=RUN - task: - - experiment_name=my_run - job: - name: run_dataset_conversion - chdir: false - override_dirname: experiment_name=my_run - id: ??? - num: ??? - config_name: default_dataset_conversion - env_set: {} - env_copy: [] - config: - override_dirname: - kv_sep: '=' - item_sep: ',' - exclude_keys: [] - runtime: - version: 1.3.2 - version_base: '1.3' - cwd: /home/jbwang/d123/d123/script - config_sources: - - path: hydra.conf - schema: pkg - provider: hydra - - path: /home/jbwang/d123/d123/script/config/dataset_conversion - schema: file - provider: main - - path: hydra_plugins.hydra_colorlog.conf - schema: pkg - provider: hydra-colorlog - - path: d123.script.config - schema: pkg - provider: hydra.searchpath in main - - path: d123.script.config.common - schema: pkg - provider: hydra.searchpath in main - - path: '' - schema: structured - provider: schema - output_dir: /home/jbwang/d123/exp/my_run/2025.08.11.15.45.36 - choices: - scene_builder: default_scene_builder - scene_filter: all_scenes - worker: ray_distributed - hydra/env: default - hydra/callbacks: null - hydra/job_logging: colorlog - hydra/hydra_logging: colorlog - hydra/hydra_help: default - hydra/help: default - hydra/sweeper: basic - hydra/launcher: basic - hydra/output: default - verbose: false diff --git a/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml b/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml deleted file mode 100644 index 373bde0c..00000000 --- a/exp/my_run/2025.08.11.15.45.36/code/hydra/overrides.yaml +++ /dev/null @@ -1 +0,0 @@ -- experiment_name=my_run diff --git a/exp/my_run/2025.08.11.15.45.36/log.txt b/exp/my_run/2025.08.11.15.45.36/log.txt deleted file mode 100644 index 2bdc0b60..00000000 --- a/exp/my_run/2025.08.11.15.45.36/log.txt +++ /dev/null @@ -1,10 +0,0 @@ -2025-08-11 15:45:36,813 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:19} Building WorkerPool... -2025-08-11 15:46:10,300 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_ray.py:78} Starting ray local! -2025-08-11 15:46:34,960 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:101} Worker: RayDistributed -2025-08-11 15:46:34,962 INFO {/data/jbwang/submodule/nuplan-devkit-master/nuplan/planning/utils/multithreading/worker_pool.py:102} Number of nodes: 1 -Number of CPUs per node: 64 -Number of GPUs per node: 8 -Number of threads across all nodes: 64 -2025-08-11 15:46:34,962 INFO {/home/jbwang/d123/d123/script/builders/worker_pool_builder.py:27} Building WorkerPool...DONE! -2025-08-11 15:46:34,963 INFO {/home/jbwang/d123/d123/script/run_dataset_conversion.py:30} Starting Dataset Caching... -2025-08-11 15:46:34,964 INFO {/home/jbwang/d123/d123/script/builders/data_converter_builder.py:14} Building RawDataProcessor... diff --git a/jbwang_test.py b/jbwang_test.py deleted file mode 100644 index e42f512a..00000000 --- a/jbwang_test.py +++ /dev/null @@ -1,98 +0,0 @@ -# from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB - -# # # 打开数据库文件 -# # db = NuPlanDB(db_path="/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.05.12.22.00.38_veh-35_01008_01518.db") -# NUPLAN_DATA_ROOT = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini" -# log_path -# log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(log_path), None) - -# # 获取第1050帧数据 -# frame = db.get_frame(1050) -# img_front = frame.camera_front # 前视图像 -# point_cloud = frame.lidar # 点云 - -# # 获取本片段所有车辆状态 -# status_data = db.get_vehicle_status() # 返回DataFrame -# print(status_data) - - - -# from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import NuplanDataConverter, DataConverterConfig -# spits = ["nuplan_mini_train"] -# log_path = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini/" -# converter = NuplanDataConverter( -# log_path=log_path, -# splits=spits, -# data_converter_config=DataConverterConfig(output_path="data/jbwang/d123"), -# ) -# # converter.convert_logs() -from pathlib import Path -log_paths_per_split = { - "nuplan_mini_train": [ - "2021","2022"] - } -log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in log_paths_per_split.items() - for log_path in log_paths - ] -PATH_2D_RAW_ROOT = Path("/nas/datasets/KITTI-360/data_3d_raw/") -candidates = sorted(p for p in PATH_2D_RAW_ROOT.iterdir() if p.is_dir() and p.name.endswith("_sync")) -# print(log_args) -# print(candidates) -# print(candidates[0].name) -# print(candidates[0].stem) -# print(type(candidates[0].name)) -# print(type(candidates[0].stem)) -# PATH_2D_RAW_ROOT_new = PATH_2D_RAW_ROOT/"123"/candidates[0].name -# print(PATH_2D_RAW_ROOT_new) - - - -# import hashlib -# def create_token(input_data: str) -> str: -# # TODO: Refactor this function. -# # TODO: Add a general function to create tokens from arbitrary data. -# if isinstance(input_data, str): -# input_data = input_data.encode("utf-8") - -# hash_obj = hashlib.sha256(input_data) -# return hash_obj.hexdigest()[:16] - -# log_name = "1230_asd_" -# for i in range(20): -# a = create_token(f"{log_name}_{i}") -# print(a)ee - - -# import numpy as np -# from pathlib import Path -# a = np.loadtxt("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/0000000000.txt") -# b = np.loadtxt("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0018_sync/poses.txt") -# data = b -# ts = data[:, 0].astype(np.int32) -# poses = np.reshape(data[:, 1:], (-1, 3, 4)) -# poses = np.concatenate((poses, np.tile(np.array([0, 0, 0, 1]).reshape(1,1,4),(poses.shape[0],1,1))), 1) -# print(a) -# print(b.shape) -# print(ts.shape) -# print(poses.shape) - -# ccc = Path("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/") -# print(len(list(ccc.glob("*.txt")))) - - - - -from d123.dataset.dataset_specific.nuplan.nuplan_data_converter import convert_nuplan_map_to_gpkg - -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig - -MAP_LOCATIONS = {"sg-one-north", "us-ma-boston", "us-nv-las-vegas-strip", "us-pa-pittsburgh-hazelwood"} -maps = list(MAP_LOCATIONS) - -data_conveter_config = DataConverterConfig(output_path = "/nas/datasets/nuplan/maps") -convert_nuplan_map_to_gpkg(maps,data_conveter_config) \ No newline at end of file diff --git a/jbwang_test2.py b/jbwang_test2.py deleted file mode 100644 index 183df813..00000000 --- a/jbwang_test2.py +++ /dev/null @@ -1,229 +0,0 @@ -# # import numpy as np -# # import pickle - -# # # path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" -# # # a = np.fromfile(path, dtype=np.float32) - -# # # print(a.shape) -# # # print(a[:10]) - -# # # path2 = "/nas/datasets/KITTI-360/calibration/calib_cam_to_pose.txt" -# # # c = np.loadtxt(path2) -# # # print(c) - -# # import open3d as o3d -# # import numpy as np - -# # def read_ply_file(file_path): -# # # 读取 PLY 文件 -# # pcd = o3d.io.read_point_cloud(file_path) -# # print(len(pcd.points), len(pcd.colors)) -# # # 提取顶点信息 -# # points = np.asarray(pcd.points) # x, y, z -# # colors = np.asarray(pcd.colors) # red, green, blue -# # # semantics = np.asarray(pcd.semantic) # semanticID, instanceID, isVisible, confidence - -# # # 将所有信息合并到一个数组中 -# # vertices = np.hstack((points, colors)) - -# # return vertices - -# # # 示例用法 -# # file_path = '/nas/datasets/KITTI-360/data_3d_semantics/train/2013_05_28_drive_0000_sync/static/0000000002_0000000385.ply' # 替换为你的 PLY 文件路径 -# # vertices = read_ply_file(file_path) - -# # # 打印前几个顶点信息 -# # print("顶点信息 (前5个顶点):") -# # print(vertices[:5]) - -# import numpy as np -# from scipy.linalg import polar -# from scipy.spatial.transform import Rotation as R - -# def polar_decompose_rotation_scale(A: np.ndarray): -# """ -# A: 3x3 (含旋转+缩放+剪切) -# 返回: -# Rm: 纯旋转 -# Sm: 对称正定 (缩放+剪切) -# scale: 近似轴缩放(从 Sm 特征值开方或对角提取;若存在剪切需谨慎) -# yaw,pitch,roll: 使用 ZYX 序列 (常对应 yaw(Z), pitch(Y), roll(X)) -# """ -# Rm, Sm = polar(A) # A = Rm @ Sm -# # 近似各向缩放(若无剪切): -# scale = np.diag(Sm) -# # 欧拉角 -# yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) -# return { -# "R": Rm, -# "S": Sm, -# "scale_diag": scale, -# "yaw_pitch_roll": (yaw, pitch, roll), -# } - -# M = np.array([ -# [-3.97771668e+00, -1.05715942e+00,-2.18206085e-02], -# [2.43555284e+00, -1.72707462e+00, -1.03932284e-02], -# [-4.41359095e-02, -2.94448305e-02, 1.39303744e+00], -# ]) -# out = polar_decompose_rotation_scale(M) -# print(out) - -# import numpy as np -# path = "/nas/datasets/KITTI-360/data_3d_raw/2013_05_28_drive_0000_sync/velodyne_points/data/0000000000.bin" -# a = np.fromfile(path, dtype=np.float32) -# a = a.reshape((-1,4)) -# print(a[10000:10010,:3]) - - - - - -# import gc -# import json -# import os -# from dataclasses import asdict -# from functools import partial -# from pathlib import Path -# from typing import Any, Dict, Final, List, Optional, Tuple, Union - -# import numpy as np -# from collections import defaultdict -# import datetime -# import hashlib -# import xml.etree.ElementTree as ET -# import pyarrow as pa -# from PIL import Image -# import logging - -# from d123.common.datatypes.detection.detection_types import DetectionType -# from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D - - -# #TODO train and train_full -# bbox_3d_path = Path("/nas/datasets/KITTI-360/data_3d_bboxes/train/2013_05_28_drive_0000_sync.xml") - -# tree = ET.parse(bbox_3d_path) -# root = tree.getroot() - -# KIITI360_DETECTION_NAME_DICT = { -# "truck": DetectionType.VEHICLE, -# "bus": DetectionType.VEHICLE, -# "car": DetectionType.VEHICLE, -# "motorcycle": DetectionType.BICYCLE, -# "bicycle": DetectionType.BICYCLE, -# "pedestrian": DetectionType.PEDESTRIAN, -# } -# # x,y,z = 881.2268115,3247.493293,115.239219 -# # x,y,z = 867.715474,3229.630439,115.189221 # 自车 -# # x,y,z = 873.533508, 3227.16235, 115.185341 # 要找的那个人 -# x,y,z = 874.233508, 3231.56235, 115.185341 # 要找的那个车 -# CENTER_REF = np.array([x, y, z], dtype=np.float64) -# objs_name = [] -# lable_name = [] -# for child in root: -# label = child.find('label').text -# # if child.find('transform') is None or label not in KIITI360_DETECTION_NAME_DICT.keys(): -# # continue - -# if child.find('transform') is None: -# continue -# print("this label is ",label) -# print("!!!!!!!!!!!!!!!!!!!") -# obj = KITTI360Bbox3D() -# obj.parseBbox(child) -# # obj.parseVertices(child) -# name = child.find('label').text -# lable_name.append(name) -# # if obj.start_frame < 10030 and obj.end_frame > 10030: -# center = np.array(obj.T, dtype=np.float64) -# dist = np.linalg.norm(center - CENTER_REF) -# if dist < 7: -# print(f"Object ID: {obj.name}, Start Frame: {obj.start_frame}, End Frame: {obj.end_frame},self.annotationId: {obj.annotationId},{obj.timestamp},{obj.T}") -# objs_name.append(obj.name) -# print(len(objs_name)) -# print(set(objs_name)) -# print(set(lable_name)) -# # print(obj.Rm) -# # print(Sigma) -# names = [] -# for child in root: -# label = child.find('label').text -# if child.find('transform') is None: -# continue -# names.append(label) -# print(set(names)) - -from scipy.spatial.transform import Rotation as R -import numpy as np -from pathlib import Path as PATH - -def get_rotation_matrix(roll,pitch,yaw): - # Intrinsic Z-Y'-X'' rotation: R = R_x(roll) @ R_y(pitch) @ R_z(yaw) - R_x = np.array( - [ - [1, 0, 0], - [0, np.cos(roll), -np.sin(roll)], - [0, np.sin(roll), np.cos(roll)], - ], - dtype=np.float64, - ) - R_y = np.array( - [ - [np.cos(pitch), 0, np.sin(pitch)], - [0, 1, 0], - [-np.sin(pitch), 0, np.cos(pitch)], - ], - dtype=np.float64, - ) - R_z = np.array( - [ - [np.cos(yaw), -np.sin(yaw), 0], - [np.sin(yaw), np.cos(yaw), 0], - [0, 0, 1], - ], - dtype=np.float64, - ) - return R_x @ R_y @ R_z - -oxts_path = PATH("/data/jbwang/d123/data_poses/2013_05_28_drive_0000_sync/oxts/data/" ) -pose_file = PATH("/nas/datasets/KITTI-360/data_poses/2013_05_28_drive_0000_sync/poses.txt") -poses = np.loadtxt(pose_file) -poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 - -pose_idx = 0 -poses_time_len = len(poses_time) - -from pyquaternion import Quaternion - -for idx in range(len(list(oxts_path.glob("*.txt")))): - oxts_path_file = oxts_path / f"{int(idx):010d}.txt" - oxts_data = np.loadtxt(oxts_path_file) - while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < idx: - pose_idx += 1 - pos = pose_idx - - r00, r01, r02 = poses[pos, 1:4] - r10, r11, r12 = poses[pos, 5:8] - r20, r21, r22 = poses[pos, 9:12] - R_mat = np.array([[r00, r01, r02], - [r10, r11, r12], - [r20, r21, r22]], dtype=np.float64) - calib = np.array([[1.0, 0.0, 0.0], - [0.0, -1.0, 0.0], - [0.0, 0.0, -1.0]], dtype=np.float64) - R_mat = R_mat @ calib - from d123.geometry.rotation import EulerAngles - if idx <= 300: - # print("R_mat",R_mat) - - new_yaw, new_pitch, new_roll = Quaternion(matrix=R_mat[:3, :3]).yaw_pitch_roll - R = EulerAngles.from_array(np.array([new_roll, new_pitch, new_yaw])).rotation_matrix - # print("R from yaw_pitch_roll",R) - print(R_mat - R) - # new_yaw,new_pitch,new_roll = R.from_matrix(R_mat).as_euler('yxz', degrees=False) - # print("new",new_roll,new_pitch,new_yaw) - # print("roll,pitch,yaw",oxts_data[3:6]) # 前6个元素是位置和速度 - # roll, pitch, yaw = oxts_data[3:6] - # print("true",get_rotation_matrix(roll,pitch,yaw)) - # print("new",roll,pitch,yaw) \ No newline at end of file diff --git a/notebooks/dataset/jbwang_test.py b/notebooks/dataset/jbwang_test.py deleted file mode 100644 index c37d8d40..00000000 --- a/notebooks/dataset/jbwang_test.py +++ /dev/null @@ -1,94 +0,0 @@ -# s3_uri = "/data/jbwang/d123/data/nuplan_mini_train/2021.10.11.07.12.18_veh-50_00211_00304.arrow" -# s3_uri = "/data/jbwang/d123/data/nuplan_private_test/2021.09.22.13.20.34_veh-28_01446_01583.arrow" -# s3_uri = "/data/jbwang/d123/data/carla/_Rep0_routes_validation1_route0_07_23_14_33_15.arrow" -# s3_uri = "/data/jbwang/d123/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow" -# s3_uri = "/data/jbwang/d123/data2/kitti360_c2e_train/2013_05_28_drive_0000_sync_c2e.arrow" -s3_uri = "/data/jbwang/d123/data2/kitti360_detection_all_test/2013_05_28_drive_0000_sync.arrow" - - -import pyarrow as pa -import pyarrow.fs as fs -import pyarrow.dataset as ds - -import os - -s3_fs = fs.S3FileSystem() -from d123.common.utils.timer import Timer - - -timer = Timer() -timer.start() - -dataset = ds.dataset(f"{s3_uri}", format="ipc") -timer.log("1. Dataset loaded") - -# Get all column names and remove the ones you want to drop -all_columns = dataset.schema.names -# print("all_columns", all_columns) -# print("Schema:") -# print(dataset.schema) -# columns_to_keep = [col for col in all_columns if col not in ["front_cam_demo", "front_cam_transform"]] -timer.log("2. Columns filtered") - -table = dataset.to_table(columns=all_columns) -# print("table",table) -# print(table["token"]) -for col in table.column_names: - if col == "lidar": - continue - print(f"Column : {col}, Type: {table.schema.field(col).type}") - tokens = table["detections_velocity"] # 或 table.column("token") - # tokens = table["detections_type"] - # print(tokens) - # print(len(tokens)) - result = tokens.slice(1470, 40).to_pylist() - # for item in result: - # print(len(item)) -print(result) -# print(table["traffic_light_ids"]) -timer.log("3. Table created") -# Save locally -# with pa.ipc.new_file("filtered_file.arrow", table.schema) as writer: -# writer.write_table(table) -timer.log("4. Table saved locally") - -timer.end() -timer.stats(verbose=False) - -# 查看nuplan数据库的表结构和内容 - -# from pathlib import Path -# from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB -# from nuplan.database.nuplan_db_orm.lidar_pc import LidarPc -# from sqlalchemy import inspect, select -# from sqlalchemy.orm import Session -# from sqlalchemy import func -# from nuplan.database.nuplan_db_orm.ego_pose import EgoPose - -# NUPLAN_DATA_ROOT = Path("/nas/datasets/nuplan/") # 按你实际路径 -# log_path = "/nas/datasets/nuplan/nuplan-v1.1/splits/mini/2021.05.12.22.00.38_veh-35_01008_01518.db" - -# db = NuPlanDB(NUPLAN_DATA_ROOT, log_path, None) -# # print(db.log) -# print(db.log.map_version) -# # print("log.cameras",db.log.cameras) -# # print("Log name:", db.log_name) -# # print("lidar",db.lidar_pc) -# # print("scenario_tags", db.scenario_tag) -# # print(db.log._session.query(EgoPose).order_by(func.abs(EgoPose.timestamp)).first()) - -# # persp = Path("/nas/datasets/KITTI-360/calibration/perspective.txt") -# # with open(persp, "r") as f: -# # lines = [ln.strip() for ln in f if ln.strip()] -# # print(lines) - -# from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import get_kitti360_camera_metadata - -# print(get_kitti360_camera_metadata()) - - - -# from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import _read_timestamps -# result = _read_timestamps("2013_05_28_drive_0000_sync") -# print(len(result)) -# print([result[0].time_us]) \ No newline at end of file diff --git a/notebooks/gym/jbwang_test.py b/notebooks/gym/jbwang_test.py deleted file mode 100644 index 663e2899..00000000 --- a/notebooks/gym/jbwang_test.py +++ /dev/null @@ -1,180 +0,0 @@ -from d123.dataset.scene.scene_builder import ArrowSceneBuilder -from d123.dataset.scene.scene_filter import SceneFilter - -from d123.common.multithreading.worker_sequential import Sequential -# from d123.common.multithreading.worker_ray import RayDistributed - -import os, psutil - -from pathlib import Path -from typing import Optional, Tuple - -import matplotlib.animation as animation -import matplotlib.pyplot as plt -from tqdm import tqdm - -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE2 -from d123.common.geometry.base import Point2D, StateSE2 -from d123.common.geometry.bounding_box.bounding_box import BoundingBoxSE2 -from d123.common.visualization.color.default import EGO_VEHICLE_CONFIG -from d123.common.visualization.matplotlib.observation import ( - add_bounding_box_to_ax, - add_box_detections_to_ax, - add_default_map_on_ax, - add_traffic_lights_to_ax, - add_ego_vehicle_to_ax, -) -from d123.dataset.arrow.conversion import TrafficLightDetectionWrapper -from d123.dataset.maps.abstract_map import AbstractMap -from d123.common.datatypes.detection.detection import BoxDetectionWrapper -from d123.dataset.scene.abstract_scene import AbstractScene -import io -from PIL import Image - - - -def _plot_scene_on_ax( - ax: plt.Axes, - map_api: AbstractMap, - ego_state: EgoStateSE2, - initial_ego_state: Optional[EgoStateSE2], - box_detections: BoxDetectionWrapper, - traffic_light_detections: TrafficLightDetectionWrapper, - radius: float = 120, -) -> plt.Axes: - - if initial_ego_state is not None: - point_2d = initial_ego_state.center.point_2d - else: - point_2d = ego_state.center.point_2d - add_default_map_on_ax(ax, map_api, point_2d, radius=radius) - add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) - - add_box_detections_to_ax(ax, box_detections) - add_ego_vehicle_to_ax(ax, ego_state) - - ax.set_xlim(point_2d.x - radius, point_2d.x + radius) - ax.set_ylim(point_2d.y - radius, point_2d.y + radius) - - ax.set_aspect("equal", adjustable="box") - return ax - - -def plot_scene_to_image( - map_api: AbstractMap, - ego_state: EgoStateSE2, - initial_ego_state: Optional[EgoStateSE2], - box_detections: BoxDetectionWrapper, - traffic_light_detections: TrafficLightDetectionWrapper, - radius: float = 120, - figsize: Tuple[int, int] = (8, 8), -) -> Image: - - fig, ax = plt.subplots(figsize=figsize) - _plot_scene_on_ax(ax, map_api, ego_state, initial_ego_state, box_detections, traffic_light_detections, radius) - ax.set_aspect("equal", adjustable="box") - plt.subplots_adjust(left=0.05, right=0.95, top=0.95, bottom=0.05) - # plt.tight_layout() - - buf = io.BytesIO() - fig.savefig(buf, format="png", bbox_inches="tight") - plt.close(fig) - buf.seek(0) - img = Image.open(buf) - return img - - -def print_memory_usage(): - process = psutil.Process(os.getpid()) - memory_info = process.memory_info() - print(f"Memory usage: {memory_info.rss / 1024 ** 2:.2f} MB") - - -split = "kitti360_detection_all_and_vel" -scene_tokens = None -log_names = None - -scene_filter = SceneFilter( - split_names=[split], log_names=log_names, scene_tokens=scene_tokens, duration_s=15.1, history_s=1.0 -) -scene_builder = ArrowSceneBuilder("/data/jbwang/d123/data2/") -worker = Sequential() -# worker = RayDistributed() -scenes = scene_builder.get_scenes(scene_filter, worker) - -print(len(scenes)) - -for scene in scenes[:10]: - print(scene.log_name, scene.token) - -from d123.dataset.arrow.conversion import DetectionType -from d123.simulation.gym.gym_env import GymEnvironment -from d123.simulation.observation.agents_observation import _filter_agents_by_type - -import time - -images = [] -agent_rollouts = [] -plot: bool = True -action = [1.0, -0.0] # Placeholder action, replace with actual action logic -env = GymEnvironment(scenes) - -start = time.time() - -map_api, ego_state, detection_observation, current_scene = env.reset(scenes[1460]) -initial_ego_state = ego_state -cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE]) -agent_rollouts.append(BoxDetectionWrapper(cars)) -if plot: - images.append( - plot_scene_to_image( - map_api, - ego_state, - initial_ego_state, - detection_observation.box_detections, - detection_observation.traffic_light_detections, - ) - ) - - -for i in range(160): - ego_state, detection_observation, end = env.step(action) - cars, _, _ = _filter_agents_by_type(detection_observation.box_detections, detection_types=[DetectionType.VEHICLE]) - agent_rollouts.append(BoxDetectionWrapper(cars)) - if plot: - images.append( - plot_scene_to_image( - map_api, - ego_state, - initial_ego_state, - detection_observation.box_detections, - detection_observation.traffic_light_detections, - ) - ) - if end: - print("End of scene reached.") - break - -time_s = time.time() - start -print(time_s) -print(151/ time_s) - -import numpy as np - - -def create_gif(images, output_path, duration=100): - """ - Create a GIF from a list of PIL images. - - Args: - images (list): List of PIL.Image objects. - output_path (str): Path to save the GIF. - duration (int): Duration between frames in milliseconds. - """ - if images: - print(len(images)) - images_p = [img.convert("P", palette=Image.ADAPTIVE) for img in images] - images_p[0].save(output_path, save_all=True, append_images=images_p[1:], duration=duration, loop=0) - - -create_gif(images, f"/data/jbwang/d123/data2/{split}_{current_scene.token}.gif", duration=20) \ No newline at end of file diff --git a/notebooks/jbwang_viz_test.py b/notebooks/jbwang_viz_test.py deleted file mode 100644 index 73f05dbf..00000000 --- a/notebooks/jbwang_viz_test.py +++ /dev/null @@ -1,252 +0,0 @@ -# from typing import Tuple - -# import matplotlib.pyplot as plt - -# from nuplan.planning.utils.multithreading.worker_sequential import Sequential - -# from d123.dataset.scene.scene_builder import ArrowSceneBuilder -# from d123.dataset.scene.scene_filter import SceneFilter -# from d123.dataset.scene.abstract_scene import AbstractScene - -# from typing import Dict -# from d123.common.datatypes.sensor.camera import CameraType -# from d123.common.visualization.matplotlib.camera import add_camera_ax -# from d123.common.visualization.matplotlib.camera import add_box_detections_to_camera_ax - -# # split = "nuplan_private_test" -# # log_names = ["2021.09.29.17.35.58_veh-44_00066_00432"] - - - - -# # splits = ["carla"] -# splits = ["nuplan_private_test"] -# # splits = ["wopd_train"] -# # log_names = None - - - -# # splits = ["nuplan_private_test"] -# log_names = None - -# scene_tokens = None - -# scene_filter = SceneFilter( -# split_names=splits, -# log_names=log_names, -# scene_tokens=scene_tokens, -# duration_s=19, -# history_s=0.0, -# timestamp_threshold_s=20, -# shuffle=False, -# camera_types=[CameraType.CAM_F0], -# ) -# scene_builder = ArrowSceneBuilder("/data/jbwang/d123/data/") -# worker = Sequential() -# # worker = RayDistributed() -# scenes = scene_builder.get_scenes(scene_filter, worker) - -# print(f"Found {len(scenes)} scenes") - - -# from typing import List, Optional, Tuple -# import matplotlib.pyplot as plt -# import numpy as np -# from d123.common.geometry.base import Point2D -# from d123.common.visualization.color.color import BLACK, DARK_GREY, DARKER_GREY, LIGHT_GREY, NEW_TAB_10, TAB_10 -# from d123.common.visualization.color.config import PlotConfig -# from d123.common.visualization.color.default import CENTERLINE_CONFIG, MAP_SURFACE_CONFIG, ROUTE_CONFIG -# from d123.common.visualization.matplotlib.observation import ( -# add_box_detections_to_ax, -# add_default_map_on_ax, -# add_ego_vehicle_to_ax, -# add_traffic_lights_to_ax, -# ) -# from d123.common.visualization.matplotlib.utils import add_shapely_linestring_to_ax, add_shapely_polygon_to_ax -# from d123.dataset.maps.abstract_map import AbstractMap -# from d123.dataset.maps.abstract_map_objects import AbstractLane -# from d123.dataset.maps.map_datatypes import MapLayer -# from d123.dataset.scene.abstract_scene import AbstractScene - - -# import shapely.geometry as geom - -# LEFT_CONFIG: PlotConfig = PlotConfig( -# fill_color=TAB_10[2], -# fill_color_alpha=1.0, -# line_color=TAB_10[2], -# line_color_alpha=0.5, -# line_width=1.0, -# line_style="-", -# zorder=3, -# ) - -# RIGHT_CONFIG: PlotConfig = PlotConfig( -# fill_color=TAB_10[3], -# fill_color_alpha=1.0, -# line_color=TAB_10[3], -# line_color_alpha=0.5, -# line_width=1.0, -# line_style="-", -# zorder=3, -# ) - - -# LANE_CONFIG: PlotConfig = PlotConfig( -# fill_color=BLACK, -# fill_color_alpha=1.0, -# line_color=BLACK, -# line_color_alpha=0.0, -# line_width=0.0, -# line_style="-", -# zorder=5, -# ) - -# ROAD_EDGE_CONFIG: PlotConfig = PlotConfig( -# fill_color=DARKER_GREY.set_brightness(0.0), -# fill_color_alpha=1.0, -# line_color=DARKER_GREY.set_brightness(0.0), -# line_color_alpha=1.0, -# line_width=1.0, -# line_style="-", -# zorder=3, -# ) - -# ROAD_LINE_CONFIG: PlotConfig = PlotConfig( -# fill_color=DARKER_GREY, -# fill_color_alpha=1.0, -# line_color=NEW_TAB_10[5], -# line_color_alpha=1.0, -# line_width=1.5, -# line_style="-", -# zorder=3, -# ) - - -# def add_debug_map_on_ax( -# ax: plt.Axes, -# map_api: AbstractMap, -# point_2d: Point2D, -# radius: float, -# route_lane_group_ids: Optional[List[int]] = None, -# ) -> None: -# layers: List[MapLayer] = [ -# MapLayer.LANE, -# MapLayer.LANE_GROUP, -# MapLayer.GENERIC_DRIVABLE, -# MapLayer.CARPARK, -# MapLayer.CROSSWALK, -# MapLayer.INTERSECTION, -# MapLayer.WALKWAY, -# MapLayer.ROAD_EDGE, -# MapLayer.ROAD_LINE, -# ] -# x_min, x_max = point_2d.x - radius, point_2d.x + radius -# y_min, y_max = point_2d.y - radius, point_2d.y + radius -# patch = geom.box(x_min, y_min, x_max, y_max) -# map_objects_dict = map_api.query(geometry=patch, layers=layers, predicate="intersects") - -# done = False -# for layer, map_objects in map_objects_dict.items(): -# for map_object in map_objects: -# try: -# if layer in [ -# # MapLayer.GENERIC_DRIVABLE, -# # MapLayer.CARPARK, -# # MapLayer.CROSSWALK, -# # MapLayer.INTERSECTION, -# # MapLayer.WALKWAY, -# ]: -# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) - -# # if layer in [MapLayer.LANE_GROUP]: -# # add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) - -# if layer in [MapLayer.LANE]: -# map_object: AbstractLane -# if map_object.right_lane is not None and map_object.left_lane is not None and not done: -# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, LANE_CONFIG) -# add_shapely_polygon_to_ax(ax, map_object.right_lane.shapely_polygon, RIGHT_CONFIG) -# add_shapely_polygon_to_ax(ax, map_object.left_lane.shapely_polygon, LEFT_CONFIG) -# done = True -# else: -# add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, MAP_SURFACE_CONFIG[layer]) - - -# # add_shapely_linestring_to_ax(ax, map_object.right_boundary.linestring, RIGHT_CONFIG) -# # add_shapely_linestring_to_ax(ax, map_object.left_boundary.linestring, LEFT_CONFIG) -# # add_shapely_polygon_to_ax(ax, map_object.shapely_polygon, LANE_CONFIG) - -# # centroid = map_object.shapely_polygon.centroid -# # ax.text( -# # centroid.x, -# # centroid.y, -# # str(map_object.id), -# # horizontalalignment="center", -# # verticalalignment="center", -# # fontsize=8, -# # bbox=dict(facecolor="white", alpha=0.7, boxstyle="round,pad=0.2"), -# # ) -# # if layer in [MapLayer.ROAD_EDGE]: -# # add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, ROAD_EDGE_CONFIG) -# # edge_lengths.append(map_object.polyline_3d.linestring.length) - -# if layer in [MapLayer.ROAD_LINE]: -# line_type = int(map_object.road_line_type) -# plt_config = PlotConfig( -# fill_color=NEW_TAB_10[line_type % len(NEW_TAB_10)], -# fill_color_alpha=1.0, -# line_color=NEW_TAB_10[line_type % len(NEW_TAB_10)], -# line_color_alpha=1.0, -# line_width=1.5, -# line_style="-", -# zorder=3, -# ) -# add_shapely_linestring_to_ax(ax, map_object.polyline_3d.linestring, plt_config) - -# except Exception: -# import traceback - -# print(f"Error adding map object of type {layer.name} and id {map_object.id}") -# traceback.print_exc() - -# ax.set_title(f"Map: {map_api.map_name}") - - -# def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, radius: float = 80) -> plt.Axes: - -# ego_vehicle_state = scene.get_ego_state_at_iteration(iteration) -# box_detections = scene.get_box_detections_at_iteration(iteration) - -# point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d -# add_debug_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None) -# # add_default_map_on_ax(ax, scene.map_api, point_2d, radius=radius, route_lane_group_ids=None) -# # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.map_api) - -# add_box_detections_to_ax(ax, box_detections) -# add_ego_vehicle_to_ax(ax, ego_vehicle_state) - -# zoom = 1.0 -# ax.set_xlim(point_2d.x - radius * zoom, point_2d.x + radius * zoom) -# ax.set_ylim(point_2d.y - radius * zoom, point_2d.y + radius * zoom) - -# ax.set_aspect("equal", adjustable="box") -# return ax - - -# def plot_scene_at_iteration( -# scene: AbstractScene, iteration: int = 0, radius: float = 80 -# ) -> Tuple[plt.Figure, plt.Axes]: - -# size = 15 - -# fig, ax = plt.subplots(figsize=(size, size)) -# _plot_scene_on_ax(ax, scene, iteration, radius) -# return fig, ax - - -# scene_index = 1 -# fig, ax = plot_scene_at_iteration(scenes[scene_index], iteration=100, radius=100) - -# # fig.savefig(f"/home/daniel/scene_{scene_index}_iteration_1.pdf", dpi=300, bbox_inches="tight") - From b4d06bdcac7e58c891728d5289e13df1b345f825 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Thu, 28 Aug 2025 15:30:26 +0800 Subject: [PATCH 14/32] ready to push --- d123/common/visualization/viser/server.py | 4 +- .../kitti_360/kitti_360_data_converter.py | 51 ++++++++++--------- .../kitti_360/preprocess_detection.py | 2 +- 3 files changed, 29 insertions(+), 28 deletions(-) diff --git a/d123/common/visualization/viser/server.py b/d123/common/visualization/viser/server.py index 990a90dd..6cba5dd5 100644 --- a/d123/common/visualization/viser/server.py +++ b/d123/common/visualization/viser/server.py @@ -43,9 +43,9 @@ # VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_F0, CameraType.CAM_L0, CameraType.CAM_R0] # VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = all_camera_types -VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L, CameraType.CAM_STEREO_R] +VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L] # VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [] -VISUALIZE_CAMERA_GUI: List[CameraType] = [CameraType.CAM_F0] +VISUALIZE_CAMERA_GUI: List[CameraType] = [CameraType.CAM_STEREO_L] CAMERA_SCALE: float = 1.0 # Lidar config: diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 2cc40675..1b967fca 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -56,11 +56,11 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -#TODO PATH_2D_RAW_ROOT -# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW +# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW +# PATH_3D_RAW_ROOT: Path = Path("/data/jbwang/d123/data_3d_raw") PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES @@ -406,7 +406,9 @@ def _write_recording_table( write_arrow_table(recording_table, log_file_path) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: - # unix + """ + Read KITTI-360 timestamps for the given sequence and return Unix epoch timestamps. + """ ts_files = [ PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", @@ -449,10 +451,9 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: raise FileNotFoundError(f"Pose file not found: {pose_file}") poses = np.loadtxt(pose_file) poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 - - #TODO - #oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" - oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" + + oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" + # oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" pose_idx = 0 poses_time_len = len(poses_time) @@ -632,29 +633,29 @@ def _extract_cameras( elif cam_dir_name in ["image_02", "image_03"]: img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" - if img_path_png.exists(): - cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" - if not cam2pose_txt.exists(): - raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - - lastrow = np.array([0,0,0,1]).reshape(1,4) - - with open(cam2pose_txt, 'r') as f: - for line in f: - parts = line.strip().split() - key = parts[0][:-1] - if key == cam_dir_name: - values = list(map(float, parts[1:])) - matrix = np.array(values).reshape(3, 4) - cam2pose = np.concatenate((matrix, lastrow)) - cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + lastrow = np.array([0,0,0,1]).reshape(1,4) + with open(cam2pose_txt, 'r') as f: + for line in f: + parts = line.strip().split() + key = parts[0][:-1] + if key == cam_dir_name: + values = list(map(float, parts[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + if img_path_png.exists(): if data_converter_config.camera_store_option == "path": camera_data = str(img_path_png), cam2pose.flatten().tolist() elif data_converter_config.camera_store_option == "binary": with open(img_path_png, "rb") as f: camera_data = f.read(), cam2pose else: - raise FileNotFoundError(f"Camera image not found: {img_path_png}") + #TODO + camera_data = None, cam2pose.flatten().tolist() camera_dict[camera_type] = camera_data return camera_dict diff --git a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py index e45e76d9..8b7c284f 100644 --- a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py +++ b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py @@ -163,7 +163,7 @@ def process_detection( for obj in static_objs: records.append(obj.valid_frames) if output_dir is None: - output_dir = PATH_3D_BBOX_ROOT / "preprocessed" + output_dir = PATH_3D_BBOX_ROOT / "preprocess" output_dir.mkdir(parents=True, exist_ok=True) out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" payload = { From 4c12d3d23012d44f7110036461e3939e70845e8f Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Fri, 29 Aug 2025 11:06:06 +0800 Subject: [PATCH 15/32] add fisheyecamera --- d123/common/datatypes/sensor/camera.py | 68 +++++++++++-------- .../kitti_360/kitti_360_data_converter.py | 6 +- d123/dataset/scene/arrow_scene.py | 4 +- 3 files changed, 45 insertions(+), 33 deletions(-) diff --git a/d123/common/datatypes/sensor/camera.py b/d123/common/datatypes/sensor/camera.py index c2a33d9d..e6dc60d6 100644 --- a/d123/common/datatypes/sensor/camera.py +++ b/d123/common/datatypes/sensor/camera.py @@ -2,7 +2,7 @@ import json from dataclasses import dataclass -from typing import Any, Dict +from typing import Any, Dict, Union import numpy as np import numpy.typing as npt @@ -80,31 +80,6 @@ def fov_y(self) -> float: return fov_y_rad -def camera_metadata_dict_to_json(camera_metadata: Dict[CameraType, CameraMetadata]) -> Dict[str, Dict[str, Any]]: - """ - Converts a dictionary of CameraMetadata to a JSON-serializable format. - :param camera_metadata: Dictionary of CameraMetadata. - :return: JSON-serializable dictionary. - """ - camera_metadata_dict = { - camera_type.serialize(): metadata.to_dict() for camera_type, metadata in camera_metadata.items() - } - return json.dumps(camera_metadata_dict) - - -def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, CameraMetadata]: - """ - Converts a JSON-serializable dictionary back to a dictionary of CameraMetadata. - :param json_dict: JSON-serializable dictionary. - :return: Dictionary of CameraMetadata. - """ - camera_metadata_dict = json.loads(json_dict) - return { - CameraType.deserialize(camera_type): CameraMetadata.from_dict(metadata) - for camera_type, metadata in camera_metadata_dict.items() - } - -#TODO Code Refactoring @dataclass class FisheyeMEICameraMetadata: camera_type: CameraType @@ -124,6 +99,18 @@ def to_dict(self) -> Dict[str, Any]: "distortion": self.distortion.tolist() if self.distortion is not None else None, "projection_parameters": self.projection_parameters.tolist() if self.projection_parameters is not None else None, } + + @classmethod + def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: + # TODO: remove None types. Only a placeholder for now. + return cls( + camera_type=CameraType(json_dict["camera_type"]), + width=json_dict["width"], + height=json_dict["height"], + mirror_parameters=json_dict["mirror_parameters"], + distortion=np.array(json_dict["distortion"]) if json_dict["distortion"] is not None else None, + projection_parameters=np.array(json_dict["projection_parameters"]) if json_dict["projection_parameters"] is not None else None, + ) def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: ''' camera coordinate to image plane ''' @@ -151,7 +138,34 @@ def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float6 y = gamma2*y + v0 return x, y, norm * points_3d[:,2] / np.abs(points_3d[:,2]) - + +def camera_metadata_dict_to_json(camera_metadata: Dict[CameraType, CameraMetadata]) -> Dict[str, Dict[str, Any]]: + """ + Converts a dictionary of CameraMetadata to a JSON-serializable format. + :param camera_metadata: Dictionary of CameraMetadata. + :return: JSON-serializable dictionary. + """ + camera_metadata_dict = { + camera_type.serialize(): metadata.to_dict() for camera_type, metadata in camera_metadata.items() + } + return json.dumps(camera_metadata_dict) + + +def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]]: + """ + Converts a JSON-serializable dictionary back to a dictionary of CameraMetadata. + :param json_dict: JSON-serializable dictionary. + :return: Dictionary of CameraMetadata. + """ + camera_metadata_dict = json.loads(json_dict) + out: Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]] = {} + for camera_type, metadata in camera_metadata_dict.items(): + cam_type = CameraType.deserialize(camera_type) + if isinstance(metadata, dict) and "mirror_parameters" in metadata: + out[cam_type] = FisheyeMEICameraMetadata.from_dict(metadata) + else: + out[cam_type] = CameraMetadata.from_dict(metadata) + return out @dataclass class Camera: diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 1b967fca..93a84c9e 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -43,9 +43,8 @@ KITTI360_CAMERA_TYPES = { CameraType.CAM_STEREO_L: "image_00", CameraType.CAM_STEREO_R: "image_01", - # TODO need code refactoring to support fisheye cameras - # CameraType.CAM_L1: "image_02", - # CameraType.CAM_R1: "image_03", + CameraType.CAM_L1: "image_02", + CameraType.CAM_R1: "image_03", } DIR_2D_RAW = "data_2d_raw" @@ -655,7 +654,6 @@ def _extract_cameras( with open(img_path_png, "rb") as f: camera_data = f.read(), cam2pose else: - #TODO camera_data = None, cam2pose.flatten().tolist() camera_dict[camera_type] = camera_data return camera_dict diff --git a/d123/dataset/scene/arrow_scene.py b/d123/dataset/scene/arrow_scene.py index ecd68111..39d90c9c 100644 --- a/d123/dataset/scene/arrow_scene.py +++ b/d123/dataset/scene/arrow_scene.py @@ -6,7 +6,7 @@ from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, CameraType, camera_metadata_dict_from_json +from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_from_json from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata, LiDARType, lidar_metadata_dict_from_json from d123.common.datatypes.time.time_point import TimePoint from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -70,7 +70,7 @@ def __init__( ) = _get_scene_data(arrow_file_path) self._metadata: LogMetadata = _metadata self._vehicle_parameters: VehicleParameters = _vehicle_parameters - self._camera_metadata: Dict[CameraType, CameraMetadata] = _camera_metadata + self._camera_metadata: Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]] = _camera_metadata self._lidar_metadata: Dict[LiDARType, LiDARMetadata] = _lidar_metadata self._map_api: Optional[AbstractMap] = None From 4241723b677a3c1908b3d0548411b32b5b18d0fd Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Sun, 14 Sep 2025 16:04:11 +0800 Subject: [PATCH 16/32] refactor camera.py, create base CameraMetadata and rename origin into PinholeCameraMetadata --- d123/common/datatypes/sensor/camera.py | 38 +++++++++++------- .../av2/av2_data_converter.py | 8 ++-- .../carla/carla_data_converter.py | 6 +-- .../kitti_360/kitti_360_data_converter.py | 40 +++++-------------- .../kitti_360/kitti_360_helper.py | 32 ++++++++++++++- .../kitti_360/preprocess_detection.py | 10 ++--- .../nuplan/nuplan_data_converter.py | 10 ++--- .../wopd/wopd_data_converter.py | 8 ++-- d123/dataset/scene/arrow_scene.py | 4 +- 9 files changed, 87 insertions(+), 69 deletions(-) diff --git a/d123/common/datatypes/sensor/camera.py b/d123/common/datatypes/sensor/camera.py index e6dc60d6..a9cc209e 100644 --- a/d123/common/datatypes/sensor/camera.py +++ b/d123/common/datatypes/sensor/camera.py @@ -3,6 +3,7 @@ import json from dataclasses import dataclass from typing import Any, Dict, Union +from abc import ABC, abstractmethod import numpy as np import numpy.typing as npt @@ -26,13 +27,24 @@ class CameraType(SerialIntEnum): CAM_STEREO_L = 8 CAM_STEREO_R = 9 - @dataclass -class CameraMetadata: - +class CameraMetadata(ABC): camera_type: CameraType width: int height: int + + @abstractmethod + def to_dict(self) -> Dict[str, Any]: + ... + + @classmethod + @abstractmethod + def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: + ... + +@dataclass +class PinholeCameraMetadata(CameraMetadata): + intrinsic: npt.NDArray[np.float64] # 3x3 matrix # TODO: don't store matrix but values. distortion: npt.NDArray[np.float64] # 5x1 vector # TODO: don't store matrix but values. @@ -47,7 +59,7 @@ def to_dict(self) -> Dict[str, Any]: } @classmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: + def from_dict(cls, json_dict: Dict[str, Any]) -> PinholeCameraMetadata: # TODO: remove None types. Only a placeholder for now. return cls( camera_type=CameraType(json_dict["camera_type"]), @@ -81,11 +93,9 @@ def fov_y(self) -> float: @dataclass -class FisheyeMEICameraMetadata: - camera_type: CameraType - width: int - height: int - mirror_parameters: int +class FisheyeMEICameraMetadata(CameraMetadata): + + mirror_parameters: float distortion: npt.NDArray[np.float64] # k1,k2,p1,p2 projection_parameters: npt.NDArray[np.float64] #gamma1,gamma2,u0,v0 @@ -101,7 +111,7 @@ def to_dict(self) -> Dict[str, Any]: } @classmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: + def from_dict(cls, json_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: # TODO: remove None types. Only a placeholder for now. return cls( camera_type=CameraType(json_dict["camera_type"]), @@ -151,26 +161,26 @@ def camera_metadata_dict_to_json(camera_metadata: Dict[CameraType, CameraMetadat return json.dumps(camera_metadata_dict) -def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]]: +def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: """ Converts a JSON-serializable dictionary back to a dictionary of CameraMetadata. :param json_dict: JSON-serializable dictionary. :return: Dictionary of CameraMetadata. """ camera_metadata_dict = json.loads(json_dict) - out: Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]] = {} + out: Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} for camera_type, metadata in camera_metadata_dict.items(): cam_type = CameraType.deserialize(camera_type) if isinstance(metadata, dict) and "mirror_parameters" in metadata: out[cam_type] = FisheyeMEICameraMetadata.from_dict(metadata) else: - out[cam_type] = CameraMetadata.from_dict(metadata) + out[cam_type] = PinholeCameraMetadata.from_dict(metadata) return out @dataclass class Camera: - metadata: CameraMetadata + metadata: PinholeCameraMetadata image: npt.NDArray[np.uint8] extrinsic: npt.NDArray[np.float64] # 4x4 matrix diff --git a/d123/dataset/dataset_specific/av2/av2_data_converter.py b/d123/dataset/dataset_specific/av2/av2_data_converter.py index f5e5e44a..d1dace89 100644 --- a/d123/dataset/dataset_specific/av2/av2_data_converter.py +++ b/d123/dataset/dataset_specific/av2/av2_data_converter.py @@ -11,7 +11,7 @@ import pyarrow as pa from pyquaternion import Quaternion -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.time.time_point import TimePoint from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index @@ -234,17 +234,17 @@ def convert_av2_log_to_arrow( return [] -def get_av2_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: +def get_av2_camera_metadata(log_path: Path) -> Dict[CameraType, PinholeCameraMetadata]: intrinsics_file = log_path / "calibration" / "intrinsics.feather" intrinsics_df = pd.read_feather(intrinsics_file) - camera_metadata: Dict[CameraType, CameraMetadata] = {} + camera_metadata: Dict[CameraType, PinholeCameraMetadata] = {} for _, row in intrinsics_df.iterrows(): row = row.to_dict() camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] - camera_metadata[camera_type] = CameraMetadata( + camera_metadata[camera_type] = PinholeCameraMetadata( camera_type=camera_type, width=row["width_px"], height=row["height_px"], diff --git a/d123/dataset/dataset_specific/carla/carla_data_converter.py b/d123/dataset/dataset_specific/carla/carla_data_converter.py index c6ce3622..5dede534 100644 --- a/d123/dataset/dataset_specific/carla/carla_data_converter.py +++ b/d123/dataset/dataset_specific/carla/carla_data_converter.py @@ -11,7 +11,7 @@ import numpy as np import pyarrow as pa -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.sensor.lidar_index import CarlaLidarIndex from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3Index @@ -247,7 +247,7 @@ def _get_metadata(location: str, log_name: str) -> LogMetadata: ) -def get_carla_camera_metadata(first_log_dict: Dict[str, Any]) -> Dict[CameraType, CameraMetadata]: +def get_carla_camera_metadata(first_log_dict: Dict[str, Any]) -> Dict[CameraType, PinholeCameraMetadata]: # FIXME: This is a placeholder function to return camera metadata. @@ -256,7 +256,7 @@ def get_carla_camera_metadata(first_log_dict: Dict[str, Any]) -> Dict[CameraType dtype=np.float64, ) camera_metadata = { - CameraType.CAM_F0: CameraMetadata( + CameraType.CAM_F0: PinholeCameraMetadata( camera_type=CameraType.CAM_F0, width=1024, height=512, diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 93a84c9e..aee14883 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -22,7 +22,7 @@ from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.sensor.lidar_index import Kitti360LidarIndex from d123.common.datatypes.time.time_point import TimePoint @@ -31,7 +31,7 @@ from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex @@ -55,8 +55,8 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW # PATH_3D_RAW_ROOT: Path = Path("/data/jbwang/d123/data_3d_raw") @@ -244,7 +244,7 @@ def convert_kitti360_log_to_arrow( return [] -def get_kitti360_camera_metadata() -> Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]]: +def get_kitti360_camera_metadata() -> Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: persp = PATH_CALIB_ROOT / "perspective.txt" @@ -270,10 +270,10 @@ def get_kitti360_camera_metadata() -> Dict[CameraType, Union[CameraMetadata, Fis fisheye03 = _readYAMLFile(fisheye_camera03_path) fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} - log_cam_infos: Dict[str, Union[CameraMetadata, FisheyeMEICameraMetadata]] = {} + log_cam_infos: Dict[str, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): if cam_name in ["image_00", "image_01"]: - log_cam_infos[cam_type] = CameraMetadata( + log_cam_infos[cam_type] = PinholeCameraMetadata( camera_type=cam_type, width=persp_result[cam_name]["wh"][0], height=persp_result[cam_name]["wh"][1], @@ -324,28 +324,6 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: ) return metadata -def get_lidar_extrinsic() -> np.ndarray: - cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" - if not cam2pose_txt.exists(): - raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - - cam2velo_txt = PATH_CALIB_ROOT / "calib_cam_to_velo.txt" - if not cam2velo_txt.exists(): - raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") - - lastrow = np.array([0,0,0,1]).reshape(1,4) - - with open(cam2pose_txt, 'r') as f: - image_00 = next(f) - values = list(map(float, image_00.strip().split()[1:])) - matrix = np.array(values).reshape(3, 4) - cam2pose = np.concatenate((matrix, lastrow)) - cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose - - cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) - extrinsic = cam2pose @ np.linalg.inv(cam2velo) - return extrinsic - def _write_recording_table( log_name: str, recording_schema: pa.Schema, @@ -451,8 +429,8 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: poses = np.loadtxt(pose_file) poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 - oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" - # oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" + # oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" + oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" pose_idx = 0 poses_time_len = len(poses_time) diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index 76e3c9e0..77217b5d 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -8,6 +8,13 @@ from d123.geometry import BoundingBoxSE3, StateSE3 from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label +import os +from pathlib import Path + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +DIR_CALIB = "calibration" +PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB + DEFAULT_ROLL = 0.0 DEFAULT_PITCH = 0.0 @@ -162,4 +169,27 @@ def box_visible_in_point_cloud(self, points): def load_detection_preprocess(self, records_dict: Dict[int, Any]): if self.globalID in records_dict: - self.valid_frames["records"] = records_dict[self.globalID]["records"] \ No newline at end of file + self.valid_frames["records"] = records_dict[self.globalID]["records"] + + +def get_lidar_extrinsic() -> np.ndarray: + cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" + if not cam2pose_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") + + cam2velo_txt = PATH_CALIB_ROOT / "calib_cam_to_velo.txt" + if not cam2velo_txt.exists(): + raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") + + lastrow = np.array([0,0,0,1]).reshape(1,4) + + with open(cam2pose_txt, 'r') as f: + image_00 = next(f) + values = list(map(float, image_00.strip().split()[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + + cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) + extrinsic = cam2pose @ np.linalg.inv(cam2velo) + return extrinsic \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py index 8b7c284f..5827e779 100644 --- a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py +++ b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py @@ -25,13 +25,13 @@ DIR_3D_BBOX = "data_3d_bboxes" DIR_POSES = "data_poses" -PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +# PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +PATH_3D_RAW_ROOT = Path("/data/jbwang/d123/data_3d_raw/") PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label -from d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter import get_lidar_extrinsic def _bbox_xml_path(log_name: str) -> Path: return PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" @@ -178,9 +178,9 @@ def process_detection( import argparse logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description="Precompute KITTI-360 static detections filters") - parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") + parser.add_argument("--log_name", default="2013_05_28_drive_0007_sync") parser.add_argument("--radius", type=float, default=60.0) - parser.add_argument("--out", type=Path, default=None, help="output directory for pkl") + parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") args = parser.parse_args() process_detection( log_name=args.log_name, diff --git a/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py b/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py index b7b52e0b..47e4bb02 100644 --- a/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py +++ b/d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py @@ -16,7 +16,7 @@ import d123.dataset.dataset_specific.nuplan.utils as nuplan_utils from d123.common.datatypes.detection.detection import TrafficLightStatus from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.sensor.lidar_index import NuplanLidarIndex from d123.common.datatypes.time.time_point import TimePoint @@ -256,15 +256,15 @@ def convert_nuplan_log_to_arrow( return [] -def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: +def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, PinholeCameraMetadata]: - def _get_camera_metadata(camera_type: CameraType) -> CameraMetadata: + def _get_camera_metadata(camera_type: CameraType) -> PinholeCameraMetadata: cam = list(get_cameras(log_path, [str(NUPLAN_CAMERA_TYPES[camera_type].value)]))[0] intrinsic = np.array(pickle.loads(cam.intrinsic)) rotation = np.array(pickle.loads(cam.rotation)) rotation = Quaternion(rotation).rotation_matrix distortion = np.array(pickle.loads(cam.distortion)) - return CameraMetadata( + return PinholeCameraMetadata( camera_type=camera_type, width=cam.width, height=cam.height, @@ -272,7 +272,7 @@ def _get_camera_metadata(camera_type: CameraType) -> CameraMetadata: distortion=distortion, ) - log_cam_infos: Dict[str, CameraMetadata] = {} + log_cam_infos: Dict[str, PinholeCameraMetadata] = {} for camera_type in NUPLAN_CAMERA_TYPES.keys(): log_cam_infos[camera_type] = _get_camera_metadata(camera_type) diff --git a/d123/dataset/dataset_specific/wopd/wopd_data_converter.py b/d123/dataset/dataset_specific/wopd/wopd_data_converter.py index 3e577a04..2a0c6425 100644 --- a/d123/dataset/dataset_specific/wopd/wopd_data_converter.py +++ b/d123/dataset/dataset_specific/wopd/wopd_data_converter.py @@ -14,7 +14,7 @@ from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.common.datatypes.sensor.lidar_index import WopdLidarIndex from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index @@ -275,9 +275,9 @@ def convert_wopd_tfrecord_log_to_arrow( def get_wopd_camera_metadata( initial_frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -) -> Dict[CameraType, CameraMetadata]: +) -> Dict[CameraType, PinholeCameraMetadata]: - cam_metadatas: Dict[CameraType, CameraMetadata] = {} + cam_metadatas: Dict[CameraType, PinholeCameraMetadata] = {} if data_converter_config.camera_store_option is not None: for calibration in initial_frame.context.camera_calibrations: camera_type = WOPD_CAMERA_TYPES[calibration.name] @@ -289,7 +289,7 @@ def get_wopd_camera_metadata( _distortions = np.array([k1, k2, p1, p2, k3]) if camera_type in WOPD_CAMERA_TYPES.values(): - cam_metadatas[camera_type] = CameraMetadata( + cam_metadatas[camera_type] = PinholeCameraMetadata( camera_type=camera_type, width=calibration.width, height=calibration.height, diff --git a/d123/dataset/scene/arrow_scene.py b/d123/dataset/scene/arrow_scene.py index 39d90c9c..6670f138 100644 --- a/d123/dataset/scene/arrow_scene.py +++ b/d123/dataset/scene/arrow_scene.py @@ -6,7 +6,7 @@ from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_from_json +from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_from_json from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata, LiDARType, lidar_metadata_dict_from_json from d123.common.datatypes.time.time_point import TimePoint from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -70,7 +70,7 @@ def __init__( ) = _get_scene_data(arrow_file_path) self._metadata: LogMetadata = _metadata self._vehicle_parameters: VehicleParameters = _vehicle_parameters - self._camera_metadata: Dict[CameraType, Union[CameraMetadata, FisheyeMEICameraMetadata]] = _camera_metadata + self._camera_metadata: Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = _camera_metadata self._lidar_metadata: Dict[LiDARType, LiDARMetadata] = _lidar_metadata self._map_api: Optional[AbstractMap] = None From 21f1dcbba6df94e7293b69776bdaed89ca364ff1 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 24 Sep 2025 10:07:21 +0800 Subject: [PATCH 17/32] add map convert, fix 0004 detection, interpolate dynamic --- d123/common/visualization/viser/server.py | 2 +- .../kitti_360/kitti_360_data_converter.py | 76 ++++++-- .../kitti_360/kitti_360_helper.py | 174 +++++++++++++++--- .../kitti_360/kitti_360_map_conversion.py | 125 +++++++++++++ .../dataset_specific/kitti_360/labels.py | 15 ++ .../dataset_specific/kitti_360/load_sensor.py | 6 +- .../kitti_360/preprocess_detection.py | 127 ++++++++++--- .../default_dataset_conversion.yaml | 2 +- 8 files changed, 450 insertions(+), 77 deletions(-) create mode 100644 d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py diff --git a/d123/common/visualization/viser/server.py b/d123/common/visualization/viser/server.py index 16e38a66..f70aba28 100644 --- a/d123/common/visualization/viser/server.py +++ b/d123/common/visualization/viser/server.py @@ -38,7 +38,7 @@ BOUNDING_BOX_TYPE: Literal["mesh", "lines"] = "lines" # Map config: -MAP_AVAILABLE: bool = False +MAP_AVAILABLE: bool = True # Cameras config: diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index aee14883..4e221617 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -10,6 +10,7 @@ import numpy as np import pickle +import copy from collections import defaultdict import datetime import hashlib @@ -31,8 +32,9 @@ from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic,interpolate_obj_list +from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.dataset.dataset_specific.kitti_360.kitti_360_map_conversion import convert_kitti360_map from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex KITTI360_DT: Final[float] = 0.1 @@ -55,11 +57,10 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW +# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW -# PATH_3D_RAW_ROOT: Path = Path("/data/jbwang/d123/data_3d_raw") PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES @@ -146,8 +147,22 @@ def get_available_splits(self) -> List[str]: return ["kitti360"] def convert_maps(self, worker: WorkerPool) -> None: - logging.info("KITTI-360 does not provide standard maps. Skipping map conversion.") - return None + log_args = [ + { + "log_path": log_path, + "split": split, + } + for split, log_paths in self._log_paths_per_split.items() + for log_path in log_paths + ] + worker_map( + worker, + partial( + convert_kitti360_map_to_gpkg, + data_converter_config=self.data_converter_config + ), + log_args, + ) def convert_logs(self, worker: WorkerPool) -> None: log_args = [ @@ -168,6 +183,20 @@ def convert_logs(self, worker: WorkerPool) -> None: log_args, ) +def convert_kitti360_map_to_gpkg( + args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig +) -> List[Any]: + for log_info in args: + log_path: Path = log_info["log_path"] + split: str = log_info["split"] + log_name = log_path.stem + + map_path = data_converter_config.output_path / "maps" / split / f"kitti360_{log_name}.gpkg" + if data_converter_config.force_map_conversion or not map_path.exists(): + map_path.unlink(missing_ok=True) + convert_kitti360_map(log_name, map_path) + return [] + def convert_kitti360_log_to_arrow( args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig ) -> List[Any]: @@ -189,7 +218,7 @@ def convert_kitti360_log_to_arrow( metadata = LogMetadata( dataset="kitti360", log_name=log_name, - location=None, + location=log_name, timestep_seconds=KITTI360_DT, map_has_z=True, ) @@ -505,26 +534,34 @@ def _extract_detections( detections_tokens: List[List[str]] = [[] for _ in range(ts_len)] detections_types: List[List[int]] = [[] for _ in range(ts_len)] - bbox_3d_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + if log_name == "2013_05_28_drive_0004_sync": + bbox_3d_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + else: + bbox_3d_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" if not bbox_3d_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") tree = ET.parse(bbox_3d_path) root = tree.getroot() - dynamic_groups: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) detection_preprocess_path = PREPOCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" if detection_preprocess_path.exists(): with open(detection_preprocess_path, "rb") as f: detection_preprocess_result = pickle.load(f) - records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["records"]} + static_records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"]} + dynamic_records_dict = detection_preprocess_result["dynamic"] else: detection_preprocess_result = None for child in root: - semanticIdKITTI = int(child.find('semanticId').text) - name = kittiId2label[semanticIdKITTI].name + if child.find('semanticId') is not None: + semanticIdKITTI = int(child.find('semanticId').text) + name = kittiId2label[semanticIdKITTI].name + else: + lable = child.find('label').text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') if child.find('transform') is None or name not in KIITI360_DETECTION_NAME_DICT.keys(): continue obj = KITTI360Bbox3D() @@ -535,7 +572,7 @@ def _extract_detections( if detection_preprocess_result is None: obj.filter_by_radius(ego_states_xyz,radius=50.0) else: - obj.load_detection_preprocess(records_dict) + obj.load_detection_preprocess(static_records_dict) for record in obj.valid_frames["records"]: frame = record["timestamp"] detections_states[frame].append(obj.get_state_array()) @@ -543,12 +580,15 @@ def _extract_detections( detections_tokens[frame].append(str(obj.globalID)) detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.name])) else: - ann_id = obj.annotationId - dynamic_groups[ann_id].append(obj) + global_ID = obj.globalID + dynamic_objs[global_ID].append(obj) # dynamic object - for ann_id, obj_list in dynamic_groups.items(): - obj_list.sort(key=lambda obj: obj.timestamp) + if detection_preprocess_result is not None: + dynamic_objs = copy.deepcopy(dynamic_records_dict) + + for global_id, obj_list in dynamic_objs.items(): + obj_list = interpolate_obj_list(obj_list) num_frames = len(obj_list) positions = [obj.get_state_array()[:3] for obj in obj_list] diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index 77217b5d..a756a343 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -2,11 +2,13 @@ from collections import defaultdict from typing import Dict, Optional, Any, List +import copy from scipy.linalg import polar from scipy.spatial.transform import Rotation as R from d123.geometry import BoundingBoxSE3, StateSE3 -from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label +from d123.geometry.polyline import Polyline3D +from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT import os from pathlib import Path @@ -44,6 +46,11 @@ def global2local(globalId): return int(semanticId), int(instanceId) class KITTI360Bbox3D(): + + # global id(only used for sequence 0004) + dynamic_global_id = 2000000 + static_global_id = 1000000 + # Constructor def __init__(self): @@ -65,37 +72,39 @@ def __init__(self): #label self.label = '' - - def parseOpencvMatrix(self, node): - rows = int(node.find('rows').text) - cols = int(node.find('cols').text) - data = node.find('data').text.split(' ') - - mat = [] - for d in data: - d = d.replace('\n', '') - if len(d)<1: - continue - mat.append(float(d)) - mat = np.reshape(mat, [rows, cols]) - return mat + # used to mark if the bbox is interpolated + self.is_interpolated = False + # GT annotation idx + self.idx_next = -1 + self.idx_prev = -1 + def parseBbox(self, child): - semanticIdKITTI = int(child.find('semanticId').text) - self.semanticId = kittiId2label[semanticIdKITTI].id - self.instanceId = int(child.find('instanceId').text) - self.name = kittiId2label[semanticIdKITTI].name - - self.start_frame = int(child.find('start_frame').text) - self.end_frame = int(child.find('end_frame').text) - self.timestamp = int(child.find('timestamp').text) self.annotationId = int(child.find('index').text) + 1 self.label = child.find('label').text - self.globalID = local2global(self.semanticId, self.instanceId) + if child.find('semanticId') is None: + self.name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(self.label, 'unknown') + self.is_dynamic = int(child.find('dynamic').text) + if self.is_dynamic != 0: + dynamicSeq = int(child.find('dynamicSeq').text) + self.globalID = KITTI360Bbox3D.dynamic_global_id + dynamicSeq + else: + self.globalID = KITTI360Bbox3D.static_global_id + KITTI360Bbox3D.static_global_id += 1 + else: + self.start_frame = int(child.find('start_frame').text) + self.end_frame = int(child.find('end_frame').text) + + semanticIdKITTI = int(child.find('semanticId').text) + self.semanticId = kittiId2label[semanticIdKITTI].id + self.instanceId = int(child.find('instanceId').text) + self.name = kittiId2label[semanticIdKITTI].name + + self.globalID = local2global(self.semanticId, self.instanceId) self.valid_frames = {"global_id": self.globalID, "records": []} @@ -103,10 +112,11 @@ def parseBbox(self, child): self.parse_scale_rotation() def parseVertices(self, child): - transform = self.parseOpencvMatrix(child.find('transform')) + transform = parseOpencvMatrix(child.find('transform')) R = transform[:3,:3] T = transform[:3,3] - vertices = self.parseOpencvMatrix(child.find('vertices')) + vertices = parseOpencvMatrix(child.find('vertices')) + self.vertices_template = copy.deepcopy(vertices) vertices = np.matmul(R, vertices.transpose()).transpose() + T self.vertices = vertices @@ -122,6 +132,7 @@ def parse_scale_rotation(self): yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) self.Rm = np.array(Rm) + self.Sm = np.array(Sm) self.scale = scale self.yaw = yaw self.pitch = pitch @@ -153,7 +164,10 @@ def filter_by_radius(self,ego_state_xyz,radius=50.0): def box_visible_in_point_cloud(self, points): ''' points: (N,3) , box: (8,3) ''' - box = self.vertices + box = self.vertices.copy() + # avoid calculating ground point cloud + z_offset = 0.1 + box[:,2] += z_offset O, A, B, C = box[0], box[1], box[2], box[5] OA = A - O OB = B - O @@ -164,13 +178,117 @@ def box_visible_in_point_cloud(self, points): (np.dot(O, OC) < POC) & (POC < np.dot(C, OC)) points_in_box = np.sum(mask) - visible = True if points_in_box > 50 else False + visible = True if points_in_box > 40 else False return visible, points_in_box def load_detection_preprocess(self, records_dict: Dict[int, Any]): if self.globalID in records_dict: self.valid_frames["records"] = records_dict[self.globalID]["records"] +def interpolate_obj_list(obj_list: List[KITTI360Bbox3D]) -> List[KITTI360Bbox3D]: + """ + Fill missing timestamps in obj_list by linear interpolation. + For each missing timestamp between two objects, create a new KITTI360Bbox3D object + with only interpolated position (T), yaw, pitch, roll, and copy other attributes. + Returns a new list with all timestamps filled and sorted. + """ + if not obj_list: + return obj_list + + # Sort by timestamp ascending + obj_list.sort(key=lambda obj: obj.timestamp) + timestamps = [obj.timestamp for obj in obj_list] + min_ts, max_ts = min(timestamps), max(timestamps) + full_ts = list(range(min_ts, max_ts + 1)) + missing_ts = sorted(set(full_ts) - set(timestamps)) + + # Prepare arrays for interpolation + T_arr = np.array([obj.T for obj in obj_list]) + yaw_arr = np.array([obj.yaw for obj in obj_list]) + pitch_arr = np.array([obj.pitch for obj in obj_list]) + roll_arr = np.array([obj.roll for obj in obj_list]) + ts_arr = np.array(timestamps) + + for ts in missing_ts: + idx_next = np.searchsorted(ts_arr, ts) + idx_prev = idx_next - 1 + if idx_prev < 0 or idx_next >= len(obj_list): + continue + + frac = (ts - ts_arr[idx_prev]) / (ts_arr[idx_next] - ts_arr[idx_prev]) + T_interp = T_arr[idx_prev] * (1 - frac) + T_arr[idx_next] * frac + + yaw_delat = normalize_angle(yaw_arr[idx_next] - yaw_arr[idx_prev]) + yaw_interp = yaw_arr[idx_prev] + yaw_delat * frac + yaw_interp = normalize_angle(yaw_interp) + + pitch_interp = pitch_arr[idx_prev] * (1 - frac) + pitch_arr[idx_next] * frac + roll_interp = roll_arr[idx_prev] * (1 - frac) + roll_arr[idx_next] * frac + + obj_new = copy.deepcopy(obj_list[idx_prev]) + obj_new.timestamp = ts + obj_new.T = T_interp + obj_new.yaw = yaw_interp + obj_new.pitch = pitch_interp + obj_new.roll = roll_interp + obj_new.Rm = R.from_euler('zyx', [obj_new.yaw, obj_new.pitch, obj_new.roll], degrees=False).as_matrix() + obj_new.R = obj_new.Rm @ obj_new.Sm + obj_new.vertices = (obj_new.R @ obj_new.vertices_template.T).T + obj_new.T + obj_new.is_interpolated = True + obj_new.idx_prev = ts_arr[idx_prev] + obj_new.idx_next = ts_arr[idx_next] + + obj_list.append(obj_new) + + obj_list.sort(key=lambda obj: obj.timestamp) + return obj_list + +def normalize_angle(a): + return np.arctan2(np.sin(a), np.cos(a)) + +class KITTI360_MAP_Bbox3D(): + def __init__(self): + self.id = -1 + self.label = ' ' + + self.vertices: Polyline3D = None + self.R = None + self.T = None + + def parseVertices_plane(self, child): + transform = parseOpencvMatrix(child.find('transform')) + R = transform[:3,:3] + T = transform[:3,3] + if child.find("transform_plane").find('rows').text == '0': + vertices = parseOpencvMatrix(child.find('vertices')) + else: + vertices = parseOpencvMatrix(child.find('vertices_plane')) + + vertices = np.matmul(R, vertices.transpose()).transpose() + T + self.vertices = Polyline3D.from_array(vertices) + + self.R = R + self.T = T + + def parseBbox(self, child): + self.id = int(child.find('index').text) + self.label = child.find('label').text + self.parseVertices_plane(child) + + +def parseOpencvMatrix(node): + rows = int(node.find('rows').text) + cols = int(node.find('cols').text) + data = node.find('data').text.split(' ') + + mat = [] + for d in data: + d = d.replace('\n', '') + if len(d)<1: + continue + mat.append(float(d)) + mat = np.reshape(mat, [rows, cols]) + return mat def get_lidar_extrinsic() -> np.ndarray: cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py new file mode 100644 index 00000000..bf13eda6 --- /dev/null +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py @@ -0,0 +1,125 @@ +import os +import warnings +from pathlib import Path +from typing import Dict, List, Optional + +import geopandas as gpd +import numpy as np +import pandas as pd +import xml.etree.ElementTree as ET +import pyogrio +from shapely.geometry import LineString +import shapely.geometry as geom + +from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import ( + get_road_edge_linear_rings, + split_line_geometry_by_max_length, +) +from d123.dataset.maps.gpkg.utils import get_all_rows_with_value, get_row_with_value +from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from d123.geometry.polyline import Polyline3D +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D + +MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges + +KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) + +DIR_3D_BBOX = "data_3d_bboxes" + +PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX + +KIITI360_MAP_BBOX = [ + "road", + "sidewalk", + # "railtrack", + # "ground", + # "driveway", +] + +def convert_kitti360_map(log_name, map_path): + + xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + + tree = ET.parse(xml_path) + root = tree.getroot() + objs: List[KITTI360_MAP_Bbox3D] = [] + for child in root: + label = child.find('label').text + if child.find("transform") is None or label not in KIITI360_MAP_BBOX: + continue + obj = KITTI360_MAP_Bbox3D() + obj.parseBbox(child) + objs.append(obj) + + dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} + dataframes[MapLayer.LANE] = _get_none_data() + dataframes[MapLayer.LANE_GROUP] = _get_none_data() + dataframes[MapLayer.INTERSECTION] = _get_none_data() + dataframes[MapLayer.CROSSWALK] = _get_none_data() + dataframes[MapLayer.WALKWAY] = _extract_walkway_df(objs) + dataframes[MapLayer.CARPARK] = _get_none_data() + dataframes[MapLayer.GENERIC_DRIVABLE] = _extract_generic_drivable_df(objs) + dataframes[MapLayer.ROAD_EDGE] = _extract_road_edge_df(objs) + dataframes[MapLayer.ROAD_LINE] = _get_none_data() + + map_file_name = map_path + for layer, gdf in dataframes.items(): + gdf.to_file(map_file_name, layer=layer.serialize(), driver="GPKG", mode="a") + +def _get_none_data() -> gpd.GeoDataFrame: + ids = [] + geometries = [] + data = pd.DataFrame({"id": ids}) + gdf = gpd.GeoDataFrame(data, geometry=geometries) + return gdf + +def _extract_generic_drivable_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: + ids: List[int] = [] + outlines: List[geom.LineString] = [] + geometries: List[geom.Polygon] = [] + for obj in objs: + if obj.label != "road": + continue + ids.append(obj.id) + outlines.append(obj.vertices.linestring) + geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + data = pd.DataFrame({"id": ids, "outline": outlines}) + gdf = gpd.GeoDataFrame(data, geometry=geometries) + return gdf + +def _extract_walkway_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: + ids: List[int] = [] + outlines: List[geom.LineString] = [] + geometries: List[geom.Polygon] = [] + for obj in objs: + if obj.label != "sidewalk": + continue + ids.append(obj.id) + outlines.append(obj.vertices.linestring) + geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + + data = pd.DataFrame({"id": ids, "outline": outlines}) + gdf = gpd.GeoDataFrame(data, geometry=geometries) + return gdf + +def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: + geometries: List[geom.Polygon] = [] + for obj in objs: + if obj.label != "road": + continue + geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + road_edge_linear_rings = get_road_edge_linear_rings(geometries) + road_edges = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) + + ids = [] + road_edge_types = [] + for idx in range(len(road_edges)): + ids.append(idx) + # TODO @DanielDauner: Figure out if other types should/could be assigned here. + road_edge_types.append(int(RoadEdgeType.ROAD_EDGE_BOUNDARY)) + + data = pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}) + return gpd.GeoDataFrame(data, geometry=road_edges) \ No newline at end of file diff --git a/d123/dataset/dataset_specific/kitti_360/labels.py b/d123/dataset/dataset_specific/kitti_360/labels.py index de24f152..6903be9f 100644 --- a/d123/dataset/dataset_specific/kitti_360/labels.py +++ b/d123/dataset/dataset_specific/kitti_360/labels.py @@ -169,6 +169,21 @@ def assureSingleInstanceName( name ): from d123.common.datatypes.detection.detection_types import DetectionType +BBOX_LABLES_TO_DETECTION_NAME_DICT = { + 'car': 'car', + 'truck': 'truck', + "bicycle": "bicycle", + "trafficLight": "traffic light", + "trailer": "trailer", + "bus": "bus", + "pedestrian": "person", + "motorcycle": "motorcycle", + "stop": "stop", + "trafficSign": "traffic sign", + "rider": "rider", + "caravan": "caravan", +} + KIITI360_DETECTION_NAME_DICT = { "traffic light": DetectionType.SIGN, "traffic sign": DetectionType.SIGN, diff --git a/d123/dataset/dataset_specific/kitti_360/load_sensor.py b/d123/dataset/dataset_specific/kitti_360/load_sensor.py index 2a23401f..c4df6d36 100644 --- a/d123/dataset/dataset_specific/kitti_360/load_sensor.py +++ b/d123/dataset/dataset_specific/kitti_360/load_sensor.py @@ -1,12 +1,16 @@ from pathlib import Path import numpy as np +import logging from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: - assert filepath.exists(), f"LiDAR file not found: {filepath}" + if not filepath.exists(): + logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") + return LiDAR(metadata=lidar_metadata, point_cloud=np.zeros((4, 0), dtype=np.float32)) + pcd = np.fromfile(filepath, dtype=np.float32) pcd = np.reshape(pcd,[-1,4]) # [N,4] diff --git a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py index 5827e779..f2d14ce1 100644 --- a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py +++ b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py @@ -1,8 +1,8 @@ """ -This script precomputes static detection records for KITTI-360: +This script precomputes detection records for KITTI-360: - Stage 1: radius filtering using ego positions (from poses.txt). - Stage 2: LiDAR visibility check to fill per-frame point counts. -It writes a pickle containing, for each static object, all feasible frames and +It writes a pickle containing, for each object, all feasible frames and their point counts to avoid recomputation in later pipelines. We have precomputed and saved the pickle for all training logs, you can either download them or run this script to generate @@ -12,9 +12,11 @@ import os import pickle import logging +import copy from pathlib import Path from typing import Dict, List, Tuple, Optional, Any from collections import defaultdict +import concurrent.futures import numpy as np import numpy.typing as npt @@ -25,15 +27,16 @@ DIR_3D_BBOX = "data_3d_bboxes" DIR_POSES = "data_poses" -# PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW -PATH_3D_RAW_ROOT = Path("/data/jbwang/d123/data_3d_raw/") +PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic,interpolate_obj_list +from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: + if log_name == "2013_05_28_drive_0004_sync": + return PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" return PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" def _lidar_frame_path(log_name: str, frame_idx: int) -> Path: @@ -44,24 +47,36 @@ def _load_lidar_xyz(filepath: Path) -> np.ndarray: arr = np.fromfile(filepath, dtype=np.float32) return arr.reshape(-1, 4)[:, :3] -def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: - """Parse XML and collect static objects with valid class names.""" +def _collect_objects(log_name: str) -> Tuple[List[KITTI360Bbox3D], Dict[int, List[KITTI360Bbox3D]]]: + """Parse XML and collect objects with valid class names.""" xml_path = _bbox_xml_path(log_name) if not xml_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") tree = ET.parse(xml_path) root = tree.getroot() - objs: List[KITTI360Bbox3D] = [] + + static_objs: List[KITTI360Bbox3D] = [] + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + for child in root: - sem_id = int(child.find("semanticId").text) - name = kittiId2label[sem_id].name - timestamp = int(child.find('timestamp').text) # -1 for static objects - if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT or timestamp != -1: + if child.find('semanticId') is not None: + semanticIdKITTI = int(child.find('semanticId').text) + name = kittiId2label[semanticIdKITTI].name + else: + lable = child.find('label').text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') + if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT: continue obj = KITTI360Bbox3D() obj.parseBbox(child) - objs.append(obj) - return objs + timestamp = int(child.find('timestamp').text) + if timestamp == -1: + static_objs.append(obj) + else: + global_ID = obj.globalID + dynamic_objs[global_ID].append(obj) + + return static_objs, dynamic_objs def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: """Load ego states from poses.txt.""" @@ -105,14 +120,18 @@ def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: def process_detection( log_name: str, - radius_m: float = 50.0, + radius_m: float = 60.0, output_dir: Optional[Path] = None, ) -> None: """ - Precompute static detections filtering: + Precompute detections filtering + for static objects: 1) filter by ego-centered radius over all frames 2) filter by LiDAR point cloud visibility - Save per-frame static detections to a pickle to avoid recomputation. + for dynamic objects: + 1) interpolate boxes for missing frames + 2) select box with highest LiDAR point count + Save per-frame detections to a pickle to avoid recomputation. """ lidar_dir = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" @@ -121,21 +140,36 @@ def process_detection( ts_len = len(list(lidar_dir.glob("*.bin"))) logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") - # 1) Parse static objects from XML - static_objs = _collect_static_objects(log_name) + # 1) Parse objects from XML + static_objs: List[KITTI360Bbox3D] + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] + static_objs, dynamic_objs = _collect_objects(log_name) + + # only interpolate dynamic objects + for global_ID, obj_list in dynamic_objs.items(): + obj_list_interpolated = interpolate_obj_list(obj_list) + dynamic_objs[global_ID] = obj_list_interpolated + dymanic_objs_updated = copy.deepcopy(dynamic_objs) + logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") + logging.info(f"[preprocess] {log_name}: dynamic objects = {len(dynamic_objs.keys())}") - # 2) Filter by ego-centered radius + # 2) Filter static objs by ego-centered radius ego_states = _collect_ego_states(log_name,ts_len) logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") for obj in static_objs: obj.filter_by_radius(ego_states[:, :3, 3], radius_m) - # 3) Filter by LiDAR point cloud visibility + # 3) Filter static objs by LiDAR point cloud visibility lidar_extrinsic = get_lidar_extrinsic() - for time_idx in range(ts_len): + + def process_one_frame(time_idx: int) -> None: logging.info(f"[preprocess] {log_name}: t={time_idx}") lidar_path = _lidar_frame_path(log_name, time_idx) + if not lidar_path.exists(): + logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") + return + lidar_xyz = _load_lidar_xyz(lidar_path) # lidar to pose @@ -158,17 +192,53 @@ def process_detection( record["points_in_box"] = points_in_box break + # for dynamic objects, select the box with the highest LiDAR point count + for global_ID, obj_list in dynamic_objs.items(): + obj_at_time = [obj for obj in obj_list if obj.timestamp == time_idx] + if not obj_at_time: + continue + + obj = obj_at_time[0] + # NOTE only update interpolated boxes + if not obj.is_interpolated: + continue + + max_points = -1 + best_obj = None + ts_prev = obj.idx_prev + ts_next = obj.idx_next + candidates = [candidate for candidate in obj_list if ts_prev <= candidate.timestamp <= ts_next] + + for obj in candidates: + visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) + if points_in_box > max_points: + max_points = points_in_box + best_obj = obj + + if best_obj is not None: + idx = next((i for i, o in enumerate(dynamic_objs[global_ID]) if o.timestamp == time_idx), None) + if idx is not None: + dymanic_objs_updated[global_ID][idx] = copy.deepcopy(best_obj) + dymanic_objs_updated[global_ID][idx].timestamp = time_idx + + max_workers = os.cpu_count() * 2 + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + results = list(executor.map(process_one_frame, range(ts_len))) + # 4) Save pickle - records: List[Dict[str, Any]] = [] + static_records: List[Dict[str, Any]] = [] for obj in static_objs: - records.append(obj.valid_frames) + static_records.append(obj.valid_frames) + if output_dir is None: output_dir = PATH_3D_BBOX_ROOT / "preprocess" output_dir.mkdir(parents=True, exist_ok=True) out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" + payload = { "log_name": log_name, - "records": records + "static": static_records, + "dynamic": dymanic_objs_updated } with open(out_path, "wb") as f: pickle.dump(payload, f) @@ -177,11 +247,12 @@ def process_detection( if __name__ == "__main__": import argparse logging.basicConfig(level=logging.INFO) - parser = argparse.ArgumentParser(description="Precompute KITTI-360 static detections filters") - parser.add_argument("--log_name", default="2013_05_28_drive_0007_sync") + parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") + parser.add_argument("--log_name", default="2013_05_28_drive_0004_sync") parser.add_argument("--radius", type=float, default=60.0) parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") args = parser.parse_args() + process_detection( log_name=args.log_name, radius_m=args.radius, diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index 52915f13..2c474fe8 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -22,4 +22,4 @@ defaults: - kitti360_dataset force_log_conversion: True -force_map_conversion: False +force_map_conversion: True From ad9d33fbcbfc0eb4fccf42d6f8b15c2134bd4913 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 30 Sep 2025 13:14:06 +0800 Subject: [PATCH 18/32] only extract timestamp that exists in ego_pose.txt --- .../kitti_360/kitti_360_data_converter.py | 71 ++++++------ .../kitti_360/kitti_360_helper.py | 81 ++----------- .../kitti_360/kitti_360_map_conversion.py | 2 +- .../kitti_360/preprocess_detection.py | 108 +++++------------- 4 files changed, 68 insertions(+), 194 deletions(-) diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py index 4e221617..76396bbd 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_data_converter.py @@ -32,7 +32,7 @@ from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic,interpolate_obj_list +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT from d123.dataset.dataset_specific.kitti_360.kitti_360_map_conversion import convert_kitti360_map from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex @@ -57,8 +57,8 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT @@ -330,7 +330,7 @@ def _read_projection_matrix(p_line: str) -> np.ndarray: K = P[:, :3] return K -def _readYAMLFile(fileName): +def _readYAMLFile(fileName:Path) -> Dict[str, Any]: '''make OpenCV YAML file compatible with python''' ret = {} skip_lines=1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" @@ -360,22 +360,22 @@ def _write_recording_table( data_converter_config: DataConverterConfig ) -> None: - ts_list = _read_timestamps(log_name) - ego_state_all = _extract_ego_state_all(log_name) + ts_list: List[TimePoint] = _read_timestamps(log_name) + ego_state_all, valid_timestamp = _extract_ego_state_all(log_name) ego_states_xyz = np.array([ego_state[:3] for ego_state in ego_state_all],dtype=np.float64) - detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list),ego_states_xyz) + detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list),ego_states_xyz,valid_timestamp) with pa.OSFile(str(log_file_path), "wb") as sink: with pa.ipc.new_file(sink, recording_schema) as writer: - for idx, tp in enumerate(ts_list): - + for idx in range(len(valid_timestamp)): + valid_idx = valid_timestamp[idx] row_data = { "token": [create_token(f"{log_name}_{idx}")], - "timestamp": [tp.time_us], - "detections_state": [detections_states[idx]], - "detections_velocity": [detections_velocity[idx]], - "detections_token": [detections_tokens[idx]], - "detections_type": [detections_types[idx]], + "timestamp": [ts_list[valid_idx].time_us], + "detections_state": [detections_states[valid_idx]], + "detections_velocity": [detections_velocity[valid_idx]], + "detections_token": [detections_tokens[valid_idx]], + "detections_type": [detections_types[valid_idx]], "ego_states": [ego_state_all[idx]], "traffic_light_ids": [[]], "traffic_light_types": [[]], @@ -384,7 +384,7 @@ def _write_recording_table( } if data_converter_config.lidar_store_option is not None: - lidar_data_dict = _extract_lidar(log_name, idx, data_converter_config) + lidar_data_dict = _extract_lidar(log_name, valid_idx, data_converter_config) for lidar_type, lidar_data in lidar_data_dict.items(): if lidar_data is not None: row_data[lidar_type.serialize()] = [lidar_data] @@ -392,7 +392,7 @@ def _write_recording_table( row_data[lidar_type.serialize()] = [None] if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_cameras(log_name, idx, data_converter_config) + camera_data_dict = _extract_cameras(log_name, valid_idx, data_converter_config) for camera_type, camera_data in camera_data_dict.items(): if camera_data is not None: row_data[camera_type.serialize()] = [camera_data[0]] @@ -448,7 +448,7 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: return tps return None -def _extract_ego_state_all(log_name: str) -> List[List[float]]: +def _extract_ego_state_all(log_name: str) -> Tuple[List[List[float]], List[int]]: ego_state_all: List[List[float]] = [] @@ -456,24 +456,20 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: if not pose_file.exists(): raise FileNotFoundError(f"Pose file not found: {pose_file}") poses = np.loadtxt(pose_file) - poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) - # oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" - oxts_path = Path("/data/jbwang/d123/data_poses/") / log_name / "oxts" / "data" + oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" - pose_idx = 0 - poses_time_len = len(poses_time) - - for idx in range(len(list(oxts_path.glob("*.txt")))): - oxts_path_file = oxts_path / f"{int(idx):010d}.txt" + for idx in range(len(valid_timestamp)): + oxts_path_file = oxts_path / f"{int(valid_timestamp[idx]):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) vehicle_parameters = get_kitti360_station_wagon_parameters() - while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < idx: - pose_idx += 1 - pos = pose_idx - # pos = np.searchsorted(poses_time, idx, side='right') - 1 + pos = idx + if log_name=="2013_05_28_drive_0004_sync" and pos == 0: + pos = 1 # NOTE you can use oxts_data[3:6] as roll, pitch, yaw for simplicity #roll, pitch, yaw = oxts_data[3:6] @@ -521,12 +517,13 @@ def _extract_ego_state_all(log_name: str) -> List[List[float]]: timepoint=None, ).array.tolist() ) - return ego_state_all + return ego_state_all, valid_timestamp def _extract_detections( log_name: str, ts_len: int, - ego_states_xyz: np.ndarray + ego_states_xyz: np.ndarray, + valid_timestamp: List[int], ) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] @@ -544,17 +541,16 @@ def _extract_detections( tree = ET.parse(bbox_3d_path) root = tree.getroot() - dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) - detection_preprocess_path = PREPOCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" if detection_preprocess_path.exists(): with open(detection_preprocess_path, "rb") as f: detection_preprocess_result = pickle.load(f) static_records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"]} - dynamic_records_dict = detection_preprocess_result["dynamic"] else: detection_preprocess_result = None + dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) + for child in root: if child.find('semanticId') is not None: semanticIdKITTI = int(child.find('semanticId').text) @@ -570,7 +566,7 @@ def _extract_detections( #static object if obj.timestamp == -1: if detection_preprocess_result is None: - obj.filter_by_radius(ego_states_xyz,radius=50.0) + obj.filter_by_radius(ego_states_xyz,valid_timestamp,radius=50.0) else: obj.load_detection_preprocess(static_records_dict) for record in obj.valid_frames["records"]: @@ -584,11 +580,8 @@ def _extract_detections( dynamic_objs[global_ID].append(obj) # dynamic object - if detection_preprocess_result is not None: - dynamic_objs = copy.deepcopy(dynamic_records_dict) - for global_id, obj_list in dynamic_objs.items(): - obj_list = interpolate_obj_list(obj_list) + obj_list.sort(key=lambda obj: obj.timestamp) num_frames = len(obj_list) positions = [obj.get_state_array()[:3] for obj in obj_list] diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py index a756a343..e8520253 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_helper.py @@ -1,7 +1,7 @@ import numpy as np from collections import defaultdict -from typing import Dict, Optional, Any, List +from typing import Dict, Optional, Any, List, Tuple import copy from scipy.linalg import polar from scipy.spatial.transform import Rotation as R @@ -30,14 +30,14 @@ KITTI3602NUPLAN_IMU_CALIBRATION = kitti3602nuplan_imu_calibration_ideal MAX_N = 1000 -def local2global(semanticId, instanceId): +def local2global(semanticId: int, instanceId: int) -> int: globalId = semanticId*MAX_N + instanceId if isinstance(globalId, np.ndarray): return globalId.astype(np.int32) else: return int(globalId) -def global2local(globalId): +def global2local(globalId: int) -> Tuple[int, int]: semanticId = globalId // MAX_N instanceId = globalId % MAX_N if isinstance(globalId, np.ndarray): @@ -72,12 +72,6 @@ def __init__(self): #label self.label = '' - - # used to mark if the bbox is interpolated - self.is_interpolated = False - # GT annotation idx - self.idx_next = -1 - self.idx_prev = -1 def parseBbox(self, child): self.timestamp = int(child.find('timestamp').text) @@ -138,7 +132,7 @@ def parse_scale_rotation(self): self.pitch = pitch self.roll = roll - def get_state_array(self): + def get_state_array(self) -> np.ndarray: center = StateSE3( x=self.T[0], y=self.T[1], @@ -152,17 +146,17 @@ def get_state_array(self): return bounding_box_se3.array - def filter_by_radius(self,ego_state_xyz,radius=50.0): + def filter_by_radius(self, ego_state_xyz: np.ndarray, valid_timestamp: List[int], radius: float = 50.0) -> None: ''' first stage of detection, used to filter out detections by radius ''' d = np.linalg.norm(ego_state_xyz - self.T[None, :], axis=1) idxs = np.where(d <= radius)[0] for idx in idxs: self.valid_frames["records"].append({ - "timestamp": idx, + "timestamp": valid_timestamp[idx], "points_in_box": None, }) - def box_visible_in_point_cloud(self, points): + def box_visible_in_point_cloud(self, points: np.ndarray) -> Tuple[bool, int]: ''' points: (N,3) , box: (8,3) ''' box = self.vertices.copy() # avoid calculating ground point cloud @@ -185,67 +179,6 @@ def load_detection_preprocess(self, records_dict: Dict[int, Any]): if self.globalID in records_dict: self.valid_frames["records"] = records_dict[self.globalID]["records"] -def interpolate_obj_list(obj_list: List[KITTI360Bbox3D]) -> List[KITTI360Bbox3D]: - """ - Fill missing timestamps in obj_list by linear interpolation. - For each missing timestamp between two objects, create a new KITTI360Bbox3D object - with only interpolated position (T), yaw, pitch, roll, and copy other attributes. - Returns a new list with all timestamps filled and sorted. - """ - if not obj_list: - return obj_list - - # Sort by timestamp ascending - obj_list.sort(key=lambda obj: obj.timestamp) - timestamps = [obj.timestamp for obj in obj_list] - min_ts, max_ts = min(timestamps), max(timestamps) - full_ts = list(range(min_ts, max_ts + 1)) - missing_ts = sorted(set(full_ts) - set(timestamps)) - - # Prepare arrays for interpolation - T_arr = np.array([obj.T for obj in obj_list]) - yaw_arr = np.array([obj.yaw for obj in obj_list]) - pitch_arr = np.array([obj.pitch for obj in obj_list]) - roll_arr = np.array([obj.roll for obj in obj_list]) - ts_arr = np.array(timestamps) - - for ts in missing_ts: - idx_next = np.searchsorted(ts_arr, ts) - idx_prev = idx_next - 1 - if idx_prev < 0 or idx_next >= len(obj_list): - continue - - frac = (ts - ts_arr[idx_prev]) / (ts_arr[idx_next] - ts_arr[idx_prev]) - T_interp = T_arr[idx_prev] * (1 - frac) + T_arr[idx_next] * frac - - yaw_delat = normalize_angle(yaw_arr[idx_next] - yaw_arr[idx_prev]) - yaw_interp = yaw_arr[idx_prev] + yaw_delat * frac - yaw_interp = normalize_angle(yaw_interp) - - pitch_interp = pitch_arr[idx_prev] * (1 - frac) + pitch_arr[idx_next] * frac - roll_interp = roll_arr[idx_prev] * (1 - frac) + roll_arr[idx_next] * frac - - obj_new = copy.deepcopy(obj_list[idx_prev]) - obj_new.timestamp = ts - obj_new.T = T_interp - obj_new.yaw = yaw_interp - obj_new.pitch = pitch_interp - obj_new.roll = roll_interp - obj_new.Rm = R.from_euler('zyx', [obj_new.yaw, obj_new.pitch, obj_new.roll], degrees=False).as_matrix() - obj_new.R = obj_new.Rm @ obj_new.Sm - obj_new.vertices = (obj_new.R @ obj_new.vertices_template.T).T + obj_new.T - obj_new.is_interpolated = True - obj_new.idx_prev = ts_arr[idx_prev] - obj_new.idx_next = ts_arr[idx_next] - - obj_list.append(obj_new) - - obj_list.sort(key=lambda obj: obj.timestamp) - return obj_list - -def normalize_angle(a): - return np.arctan2(np.sin(a), np.cos(a)) - class KITTI360_MAP_Bbox3D(): def __init__(self): self.id = -1 diff --git a/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py b/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py index bf13eda6..924a7822 100644 --- a/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py +++ b/d123/dataset/dataset_specific/kitti_360/kitti_360_map_conversion.py @@ -36,7 +36,7 @@ # "driveway", ] -def convert_kitti360_map(log_name, map_path): +def convert_kitti360_map(log_name: str, map_path: Path) -> None: xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" diff --git a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py index f2d14ce1..97ea6eb8 100644 --- a/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py +++ b/d123/dataset/dataset_specific/kitti_360/preprocess_detection.py @@ -1,8 +1,8 @@ """ -This script precomputes detection records for KITTI-360: +This script precomputes static detection records for KITTI-360: - Stage 1: radius filtering using ego positions (from poses.txt). - Stage 2: LiDAR visibility check to fill per-frame point counts. -It writes a pickle containing, for each object, all feasible frames and +It writes a pickle containing, for each static object, all feasible frames and their point counts to avoid recomputation in later pipelines. We have precomputed and saved the pickle for all training logs, you can either download them or run this script to generate @@ -31,8 +31,8 @@ PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic,interpolate_obj_list -from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic +from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": @@ -47,8 +47,8 @@ def _load_lidar_xyz(filepath: Path) -> np.ndarray: arr = np.fromfile(filepath, dtype=np.float32) return arr.reshape(-1, 4)[:, :3] -def _collect_objects(log_name: str) -> Tuple[List[KITTI360Bbox3D], Dict[int, List[KITTI360Bbox3D]]]: - """Parse XML and collect objects with valid class names.""" +def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: + """Parse XML and collect static objects with valid class names.""" xml_path = _bbox_xml_path(log_name) if not xml_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") @@ -56,7 +56,6 @@ def _collect_objects(log_name: str) -> Tuple[List[KITTI360Bbox3D], Dict[int, Lis root = tree.getroot() static_objs: List[KITTI360Bbox3D] = [] - dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) for child in root: if child.find('semanticId') is not None: @@ -65,20 +64,15 @@ def _collect_objects(log_name: str) -> Tuple[List[KITTI360Bbox3D], Dict[int, Lis else: lable = child.find('label').text name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') - if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT: + timestamp = int(child.find('timestamp').text) # -1 for static objects + if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT or timestamp != -1: continue obj = KITTI360Bbox3D() obj.parseBbox(child) - timestamp = int(child.find('timestamp').text) - if timestamp == -1: - static_objs.append(obj) - else: - global_ID = obj.globalID - dynamic_objs[global_ID].append(obj) - - return static_objs, dynamic_objs + static_objs.append(obj) + return static_objs -def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: +def _collect_ego_states(log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: """Load ego states from poses.txt.""" pose_file = PATH_POSES_ROOT / log_name / "poses.txt" @@ -86,17 +80,12 @@ def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: raise FileNotFoundError(f"Pose file not found: {pose_file}") poses = np.loadtxt(pose_file) - poses_time = poses[:, 0] - 1 # Adjusting time to start from 0 + poses_time = poses[:, 0].astype(np.int32) + valid_timestamp: List[int] = list(poses_time) - pose_idx = 0 - poses_time_len = len(poses_time) - ego_states = [] - - for time_idx in range(length): - while pose_idx + 1 < poses_time_len and poses_time[pose_idx + 1] < time_idx: - pose_idx += 1 - pos = pose_idx + for time_idx in range(len(valid_timestamp)): + pos = time_idx state_item = np.eye(4) r00, r01, r02 = poses[pos, 1:4] r10, r11, r12 = poses[pos, 5:8] @@ -115,7 +104,8 @@ def _collect_ego_states(log_name: str,length: int) -> npt.NDArray[np.float64]: state_item[:3, 3] = ego_state_xyz ego_states.append(state_item) - return np.array(ego_states) # [N,4,4] + # [N,4,4] + return np.array(ego_states), valid_timestamp def process_detection( @@ -128,9 +118,6 @@ def process_detection( for static objects: 1) filter by ego-centered radius over all frames 2) filter by LiDAR point cloud visibility - for dynamic objects: - 1) interpolate boxes for missing frames - 2) select box with highest LiDAR point count Save per-frame detections to a pickle to avoid recomputation. """ @@ -141,31 +128,22 @@ def process_detection( logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") # 1) Parse objects from XML - static_objs: List[KITTI360Bbox3D] - dynamic_objs: Dict[int, List[KITTI360Bbox3D]] - static_objs, dynamic_objs = _collect_objects(log_name) - - # only interpolate dynamic objects - for global_ID, obj_list in dynamic_objs.items(): - obj_list_interpolated = interpolate_obj_list(obj_list) - dynamic_objs[global_ID] = obj_list_interpolated - dymanic_objs_updated = copy.deepcopy(dynamic_objs) - + static_objs: List[KITTI360Bbox3D] = _collect_static_objects(log_name) logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") - logging.info(f"[preprocess] {log_name}: dynamic objects = {len(dynamic_objs.keys())}") # 2) Filter static objs by ego-centered radius - ego_states = _collect_ego_states(log_name,ts_len) + ego_states, valid_timestamp = _collect_ego_states(log_name) logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") for obj in static_objs: - obj.filter_by_radius(ego_states[:, :3, 3], radius_m) + obj.filter_by_radius(ego_states[:, :3, 3], valid_timestamp, radius_m) # 3) Filter static objs by LiDAR point cloud visibility lidar_extrinsic = get_lidar_extrinsic() def process_one_frame(time_idx: int) -> None: - logging.info(f"[preprocess] {log_name}: t={time_idx}") - lidar_path = _lidar_frame_path(log_name, time_idx) + valid_time_idx = valid_timestamp[time_idx] + logging.info(f"[preprocess] {log_name}: t={valid_time_idx}") + lidar_path = _lidar_frame_path(log_name, valid_time_idx) if not lidar_path.exists(): logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") return @@ -181,49 +159,20 @@ def process_one_frame(time_idx: int) -> None: lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3,:3].T + ego_states[time_idx][:3,3] for obj in static_objs: - if not any(record["timestamp"] == time_idx for record in obj.valid_frames["records"]): + if not any(record["timestamp"] == valid_time_idx for record in obj.valid_frames["records"]): continue visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) if not visible: - obj.valid_frames["records"] = [record for record in obj.valid_frames["records"] if record["timestamp"] != time_idx] + obj.valid_frames["records"] = [record for record in obj.valid_frames["records"] if record["timestamp"] != valid_time_idx] else: for record in obj.valid_frames["records"]: - if record["timestamp"] == time_idx: + if record["timestamp"] == valid_time_idx: record["points_in_box"] = points_in_box break - # for dynamic objects, select the box with the highest LiDAR point count - for global_ID, obj_list in dynamic_objs.items(): - obj_at_time = [obj for obj in obj_list if obj.timestamp == time_idx] - if not obj_at_time: - continue - - obj = obj_at_time[0] - # NOTE only update interpolated boxes - if not obj.is_interpolated: - continue - - max_points = -1 - best_obj = None - ts_prev = obj.idx_prev - ts_next = obj.idx_next - candidates = [candidate for candidate in obj_list if ts_prev <= candidate.timestamp <= ts_next] - - for obj in candidates: - visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) - if points_in_box > max_points: - max_points = points_in_box - best_obj = obj - - if best_obj is not None: - idx = next((i for i, o in enumerate(dynamic_objs[global_ID]) if o.timestamp == time_idx), None) - if idx is not None: - dymanic_objs_updated[global_ID][idx] = copy.deepcopy(best_obj) - dymanic_objs_updated[global_ID][idx].timestamp = time_idx - max_workers = os.cpu_count() * 2 with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - results = list(executor.map(process_one_frame, range(ts_len))) + results = list(executor.map(process_one_frame, range(len(valid_timestamp)))) # 4) Save pickle static_records: List[Dict[str, Any]] = [] @@ -238,7 +187,6 @@ def process_one_frame(time_idx: int) -> None: payload = { "log_name": log_name, "static": static_records, - "dynamic": dymanic_objs_updated } with open(out_path, "wb") as f: pickle.dump(payload, f) @@ -248,7 +196,7 @@ def process_one_frame(time_idx: int) -> None: import argparse logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") - parser.add_argument("--log_name", default="2013_05_28_drive_0004_sync") + parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") parser.add_argument("--radius", type=float, default=60.0) parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") args = parser.parse_args() From 5c95ecbd4ba65b6f2524ccb46355c41c88df8b70 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 30 Sep 2025 13:27:04 +0800 Subject: [PATCH 19/32] merge dev_v0.0.7 into kitti360 --- d123/datasets/av2/av2_data_converter.py | 11 ----------- d123/datasets/carla/carla_data_converter.py | 8 -------- d123/datasets/nuplan/nuplan_data_converter.py | 13 ------------- d123/datatypes/scene/arrow/arrow_scene.py | 11 ----------- 4 files changed, 43 deletions(-) diff --git a/d123/datasets/av2/av2_data_converter.py b/d123/datasets/av2/av2_data_converter.py index 59f306e0..f066aa42 100644 --- a/d123/datasets/av2/av2_data_converter.py +++ b/d123/datasets/av2/av2_data_converter.py @@ -10,17 +10,6 @@ import pandas as pd import pyarrow as pa -<<<<<<< HEAD:d123/dataset/dataset_specific/av2/av2_data_converter.py -from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import ( - get_av2_ford_fusion_hybrid_parameters, - rear_axle_se3_to_center_se3, -) -======= ->>>>>>> dev_v0.0.7:d123/datasets/av2/av2_data_converter.py from d123.common.multithreading.worker_utils import WorkerPool, worker_map from d123.datasets.av2.av2_constants import ( AV2_CAMERA_TYPE_MAPPING, diff --git a/d123/datasets/carla/carla_data_converter.py b/d123/datasets/carla/carla_data_converter.py index 80525baf..bcf8342c 100644 --- a/d123/datasets/carla/carla_data_converter.py +++ b/d123/datasets/carla/carla_data_converter.py @@ -11,14 +11,6 @@ import numpy as np import pyarrow as pa -<<<<<<< HEAD:d123/dataset/dataset_specific/carla/carla_data_converter.py -from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import CarlaLidarIndex -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import get_carla_lincoln_mkz_2020_parameters -======= ->>>>>>> dev_v0.0.7:d123/datasets/carla/carla_data_converter.py from d123.common.multithreading.worker_utils import WorkerPool, worker_map from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table from d123.datasets.raw_data_converter import DataConverterConfig, RawDataConverter diff --git a/d123/datasets/nuplan/nuplan_data_converter.py b/d123/datasets/nuplan/nuplan_data_converter.py index e2d1cef5..398e536a 100644 --- a/d123/datasets/nuplan/nuplan_data_converter.py +++ b/d123/datasets/nuplan/nuplan_data_converter.py @@ -12,18 +12,6 @@ import yaml from pyquaternion import Quaternion -<<<<<<< HEAD:d123/dataset/dataset_specific/nuplan/nuplan_data_converter.py - -import d123.dataset.dataset_specific.nuplan.utils as nuplan_utils -from d123.common.datatypes.detection.detection import TrafficLightStatus -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import NuplanLidarIndex -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import ( -======= import d123.datasets.nuplan.utils as nuplan_utils from d123.common.multithreading.worker_utils import WorkerPool, worker_map from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table @@ -39,7 +27,6 @@ from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index from d123.datatypes.vehicle_state.vehicle_parameters import ( ->>>>>>> dev_v0.0.7:d123/datasets/nuplan/nuplan_data_converter.py get_nuplan_chrysler_pacifica_parameters, rear_axle_se3_to_center_se3, ) diff --git a/d123/datatypes/scene/arrow/arrow_scene.py b/d123/datatypes/scene/arrow/arrow_scene.py index 45738893..e05b717c 100644 --- a/d123/datatypes/scene/arrow/arrow_scene.py +++ b/d123/datatypes/scene/arrow/arrow_scene.py @@ -4,23 +4,12 @@ import pyarrow as pa -<<<<<<< HEAD:d123/dataset/scene/arrow_scene.py -from d123.common.datatypes.detection.detection import BoxDetectionWrapper, TrafficLightDetectionWrapper -from d123.common.datatypes.recording.detection_recording import DetectionRecording -from d123.common.datatypes.sensor.camera import Camera, CameraMetadata, PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_from_json -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata, LiDARType, lidar_metadata_dict_from_json -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.common.datatypes.vehicle_state.vehicle_parameters import VehicleParameters -from d123.dataset.arrow.conversion import ( -======= from d123.common.utils.arrow_helper import open_arrow_table from d123.datatypes.detections.detection import BoxDetectionWrapper, DetectionRecording, TrafficLightDetectionWrapper from d123.datatypes.maps.abstract_map import AbstractMap from d123.datatypes.maps.gpkg.gpkg_map import get_local_map_api, get_map_api_from_names from d123.datatypes.scene.abstract_scene import AbstractScene from d123.datatypes.scene.arrow.utils.conversion import ( ->>>>>>> dev_v0.0.7:d123/datatypes/scene/arrow/arrow_scene.py get_box_detections_from_arrow_table, get_camera_from_arrow_table, get_ego_vehicle_state_from_arrow_table, From 5bf2e5aa9c33bfda2d0f2b4a042164f991746d54 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 30 Sep 2025 14:47:20 +0800 Subject: [PATCH 20/32] merge dev_0.0.7 into kitti360 and makes kitti360 compatible with existing code --- d123/datasets/av2/av2_data_converter.py | 2 +- d123/datasets/carla/carla_data_converter.py | 2 +- .../kitti_360/kitti_360_data_converter.py | 35 ++++++++++--------- d123/datasets/kitti_360/kitti_360_helper.py | 15 +++++--- .../kitti_360/kitti_360_map_conversion.py | 8 ++--- d123/datasets/kitti_360/labels.py | 2 +- d123/datasets/kitti_360/load_sensor.py | 2 +- .../kitti_360/preprocess_detection.py | 4 +-- d123/datasets/nuplan/nuplan_data_converter.py | 2 +- d123/datasets/wopd/wopd_data_converter.py | 2 +- d123/datatypes/scene/arrow/arrow_scene.py | 2 +- .../datatypes/scene/arrow/utils/conversion.py | 2 +- .../default_dataset_conversion.yaml | 2 +- .../config/datasets/kitti360_dataset.yaml | 4 +-- 14 files changed, 47 insertions(+), 37 deletions(-) diff --git a/d123/datasets/av2/av2_data_converter.py b/d123/datasets/av2/av2_data_converter.py index f066aa42..7d2e3525 100644 --- a/d123/datasets/av2/av2_data_converter.py +++ b/d123/datasets/av2/av2_data_converter.py @@ -25,7 +25,7 @@ from d123.datasets.av2.av2_map_conversion import convert_av2_map from d123.datasets.raw_data_converter import DataConverterConfig, RawDataConverter from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datatypes.sensors.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index diff --git a/d123/datasets/carla/carla_data_converter.py b/d123/datasets/carla/carla_data_converter.py index bcf8342c..f5b8fd16 100644 --- a/d123/datasets/carla/carla_data_converter.py +++ b/d123/datasets/carla/carla_data_converter.py @@ -19,7 +19,7 @@ from d123.datatypes.maps.abstract_map_objects import AbstractLane from d123.datatypes.maps.gpkg.gpkg_map import get_map_api_from_names from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datatypes.sensors.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.datatypes.sensors.lidar_index import CarlaLidarIndex from d123.datatypes.vehicle_state.ego_state import EgoStateSE3Index diff --git a/d123/datasets/kitti_360/kitti_360_data_converter.py b/d123/datasets/kitti_360/kitti_360_data_converter.py index 76396bbd..0616dcfa 100644 --- a/d123/datasets/kitti_360/kitti_360_data_converter.py +++ b/d123/datasets/kitti_360/kitti_360_data_converter.py @@ -22,20 +22,21 @@ from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map -from d123.common.datatypes.detection.detection_types import DetectionType -from d123.common.datatypes.sensor.camera import PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.common.datatypes.sensor.lidar_index import Kitti360LidarIndex -from d123.common.datatypes.time.time_point import TimePoint -from d123.common.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -from d123.common.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 -from d123.dataset.arrow.helper import open_arrow_table, write_arrow_table -from d123.dataset.dataset_specific.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.dataset.logs.log_metadata import LogMetadata -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT -from d123.dataset.dataset_specific.kitti_360.kitti_360_map_conversion import convert_kitti360_map +from d123.datatypes.detections.detection_types import DetectionType +from d123.datatypes.sensors.camera import PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json +from d123.datatypes.sensors.lidar_index import Kitti360LidarIndex +from d123.datatypes.time.time_point import TimePoint +from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index +from d123.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 +from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table +from d123.datasets.raw_data_converter import DataConverterConfig, RawDataConverter +from d123.datatypes.scene.scene_metadata import LogMetadata +from d123.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic +from d123.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.datasets.kitti_360.kitti_360_map_conversion import convert_kitti360_map from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex +from d123.geometry.rotation import EulerAngles KITTI360_DT: Final[float] = 0.1 SORT_BY_TIMESTAMP: Final[bool] = True @@ -482,13 +483,15 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[List[float]], List[int]] R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] yaw, pitch, roll = Quaternion(matrix=R_mat_cali[:3, :3]).yaw_pitch_roll + ego_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion rear_axle_pose = StateSE3( x=poses[pos, 4], y=poses[pos, 8], z=poses[pos, 12], - roll=roll, - pitch=pitch, - yaw=yaw, + qw=ego_quaternion.qw, + qx=ego_quaternion.qx, + qy=ego_quaternion.qy, + qz=ego_quaternion.qz, ) center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) diff --git a/d123/datasets/kitti_360/kitti_360_helper.py b/d123/datasets/kitti_360/kitti_360_helper.py index e8520253..01c3d1fe 100644 --- a/d123/datasets/kitti_360/kitti_360_helper.py +++ b/d123/datasets/kitti_360/kitti_360_helper.py @@ -8,7 +8,8 @@ from d123.geometry import BoundingBoxSE3, StateSE3 from d123.geometry.polyline import Polyline3D -from d123.dataset.dataset_specific.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.geometry.rotation import EulerAngles +from d123.datasets.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT import os from pathlib import Path @@ -124,6 +125,7 @@ def parse_scale_rotation(self): Rm[0] = -Rm[0] scale = np.diag(Sm) yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion self.Rm = np.array(Rm) self.Sm = np.array(Sm) @@ -131,15 +133,20 @@ def parse_scale_rotation(self): self.yaw = yaw self.pitch = pitch self.roll = roll + self.qw = obj_quaternion.qw + self.qx = obj_quaternion.qx + self.qy = obj_quaternion.qy + self.qz = obj_quaternion.qz def get_state_array(self) -> np.ndarray: center = StateSE3( x=self.T[0], y=self.T[1], z=self.T[2], - roll=self.roll, - pitch=self.pitch, - yaw=self.yaw, + qw=self.qw, + qx=self.qx, + qy=self.qy, + qz=self.qz, ) scale = self.scale bounding_box_se3 = BoundingBoxSE3(center, scale[0], scale[1], scale[2]) diff --git a/d123/datasets/kitti_360/kitti_360_map_conversion.py b/d123/datasets/kitti_360/kitti_360_map_conversion.py index 924a7822..643a13c6 100644 --- a/d123/datasets/kitti_360/kitti_360_map_conversion.py +++ b/d123/datasets/kitti_360/kitti_360_map_conversion.py @@ -11,14 +11,14 @@ from shapely.geometry import LineString import shapely.geometry as geom -from d123.dataset.conversion.map.road_edge.road_edge_2d_utils import ( +from d123.datasets.utils.maps.road_edge.road_edge_2d_utils import ( get_road_edge_linear_rings, split_line_geometry_by_max_length, ) -from d123.dataset.maps.gpkg.utils import get_all_rows_with_value, get_row_with_value -from d123.dataset.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from d123.datatypes.maps.gpkg.utils import get_all_rows_with_value, get_row_with_value +from d123.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType from d123.geometry.polyline import Polyline3D -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D +from d123.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges diff --git a/d123/datasets/kitti_360/labels.py b/d123/datasets/kitti_360/labels.py index 6903be9f..45e2d315 100644 --- a/d123/datasets/kitti_360/labels.py +++ b/d123/datasets/kitti_360/labels.py @@ -167,7 +167,7 @@ def assureSingleInstanceName( name ): # all good then return name -from d123.common.datatypes.detection.detection_types import DetectionType +from d123.datatypes.detections.detection_types import DetectionType BBOX_LABLES_TO_DETECTION_NAME_DICT = { 'car': 'car', diff --git a/d123/datasets/kitti_360/load_sensor.py b/d123/datasets/kitti_360/load_sensor.py index c4df6d36..7ca4489a 100644 --- a/d123/datasets/kitti_360/load_sensor.py +++ b/d123/datasets/kitti_360/load_sensor.py @@ -3,7 +3,7 @@ import numpy as np import logging -from d123.common.datatypes.sensor.lidar import LiDAR, LiDARMetadata +from d123.datatypes.sensors.lidar import LiDAR, LiDARMetadata def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: diff --git a/d123/datasets/kitti_360/preprocess_detection.py b/d123/datasets/kitti_360/preprocess_detection.py index 97ea6eb8..92806736 100644 --- a/d123/datasets/kitti_360/preprocess_detection.py +++ b/d123/datasets/kitti_360/preprocess_detection.py @@ -31,8 +31,8 @@ PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from d123.dataset.dataset_specific.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.dataset.dataset_specific.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic +from d123.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": diff --git a/d123/datasets/nuplan/nuplan_data_converter.py b/d123/datasets/nuplan/nuplan_data_converter.py index 398e536a..980c53b1 100644 --- a/d123/datasets/nuplan/nuplan_data_converter.py +++ b/d123/datasets/nuplan/nuplan_data_converter.py @@ -21,7 +21,7 @@ from d123.datatypes.detections.detection import TrafficLightStatus from d123.datatypes.detections.detection_types import DetectionType from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datatypes.sensors.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.datatypes.sensors.lidar_index import NuplanLidarIndex from d123.datatypes.time.time_point import TimePoint diff --git a/d123/datasets/wopd/wopd_data_converter.py b/d123/datasets/wopd/wopd_data_converter.py index ebac241a..7a7aa7b1 100644 --- a/d123/datasets/wopd/wopd_data_converter.py +++ b/d123/datasets/wopd/wopd_data_converter.py @@ -18,7 +18,7 @@ from d123.datasets.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map from d123.datasets.wopd.wopd_utils import parse_range_image_and_camera_projection from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datatypes.sensors.camera import CameraMetadata, CameraType, camera_metadata_dict_to_json +from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json from d123.datatypes.sensors.lidar_index import WopdLidarIndex from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index diff --git a/d123/datatypes/scene/arrow/arrow_scene.py b/d123/datatypes/scene/arrow/arrow_scene.py index e05b717c..0fc61ba8 100644 --- a/d123/datatypes/scene/arrow/arrow_scene.py +++ b/d123/datatypes/scene/arrow/arrow_scene.py @@ -69,7 +69,7 @@ def __init__( ) = _get_scene_data(arrow_file_path) self._metadata: LogMetadata = _metadata self._vehicle_parameters: VehicleParameters = _vehicle_parameters - self._camera_metadata: Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = _camera_metadata + self._camera_metadata: Dict[CameraType, CameraMetadata] = _camera_metadata self._lidar_metadata: Dict[LiDARType, LiDARMetadata] = _lidar_metadata self._map_api: Optional[AbstractMap] = None diff --git a/d123/datatypes/scene/arrow/utils/conversion.py b/d123/datatypes/scene/arrow/utils/conversion.py index 1f6c879c..dfa63d54 100644 --- a/d123/datatypes/scene/arrow/utils/conversion.py +++ b/d123/datatypes/scene/arrow/utils/conversion.py @@ -154,7 +154,7 @@ def get_lidar_from_arrow_table( elif log_metadata.dataset == "wopd": raise NotImplementedError elif log_metadata.dataset == "kitti360": - from d123.dataset.dataset_specific.kitti_360.load_sensor import load_kitti360_lidar_from_path + from d123.datasets.kitti_360.load_sensor import load_kitti360_lidar_from_path lidar = load_kitti360_lidar_from_path(full_lidar_path, lidar_metadata) else: diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index 01084657..2c474fe8 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -15,7 +15,7 @@ defaults: - default_dataset_paths - _self_ - datasets: - - nuplan_private_dataset + # - nuplan_private_dataset # - carla_dataset # - wopd_dataset # - av2_sensor_dataset diff --git a/d123/script/config/datasets/kitti360_dataset.yaml b/d123/script/config/datasets/kitti360_dataset.yaml index 17b9e863..c5816a29 100644 --- a/d123/script/config/datasets/kitti360_dataset.yaml +++ b/d123/script/config/datasets/kitti360_dataset.yaml @@ -1,12 +1,12 @@ kitti360_dataset: - _target_: d123.dataset.dataset_specific.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _target_: d123.datasets.kitti_360.kitti_360_data_converter.Kitti360DataConverter _convert_: 'all' splits: ["kitti360"] log_path: ${oc.env:KITTI360_DATA_ROOT} data_converter_config: - _target_: d123.dataset.dataset_specific.raw_data_converter.DataConverterConfig + _target_: d123.datasets.raw_data_converter.DataConverterConfig _convert_: 'all' output_path: ${d123_data_root} From 79cb5cdb0b1e487f9ec2ff61fb0417f30d7c72cd Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Sat, 11 Oct 2025 13:32:34 +0800 Subject: [PATCH 21/32] merge dev_v0.0.7 into kitti360 --- d123/datasets/nuplan/nuplan_data_converter.py | 26 +++---------------- d123/datasets/wopd/wopd_data_converter.py | 18 ------------- 2 files changed, 3 insertions(+), 41 deletions(-) diff --git a/d123/datasets/nuplan/nuplan_data_converter.py b/d123/datasets/nuplan/nuplan_data_converter.py index fc0375c7..93da2a8e 100644 --- a/d123/datasets/nuplan/nuplan_data_converter.py +++ b/d123/datasets/nuplan/nuplan_data_converter.py @@ -25,11 +25,6 @@ ) from d123.datatypes.detections.detection_types import DetectionType from d123.datatypes.scene.scene_metadata import LogMetadata -<<<<<<< HEAD -from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.datatypes.sensors.lidar_index import NuplanLidarIndex -======= from d123.datatypes.sensors.camera.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, @@ -37,7 +32,6 @@ PinholeIntrinsics, ) from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType ->>>>>>> dev_v0.0.7 from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from d123.datatypes.vehicle_state.vehicle_parameters import ( @@ -240,29 +234,15 @@ def convert_nuplan_log_to_arrow( return [] -<<<<<<< HEAD -def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, PinholeCameraMetadata]: +def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: - def _get_camera_metadata(camera_type: CameraType) -> PinholeCameraMetadata: + def _get_camera_metadata(camera_type: CameraType) -> CameraMetadata: cam = list(get_cameras(log_path, [str(NUPLAN_CAMERA_TYPES[camera_type].value)]))[0] intrinsic = np.array(pickle.loads(cam.intrinsic)) rotation = np.array(pickle.loads(cam.rotation)) rotation = Quaternion(rotation).rotation_matrix distortion = np.array(pickle.loads(cam.distortion)) -======= -def get_nuplan_camera_metadata(log_path: Path) -> Dict[PinholeCameraType, PinholeCameraMetadata]: - - def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadata: - cam = list(get_cameras(log_path, [str(NUPLAN_CAMERA_TYPES[camera_type].value)]))[0] - - intrinsics_camera_matrix = np.array(pickle.loads(cam.intrinsic), dtype=np.float64) # array of shape (3, 3) - intrinsic = PinholeIntrinsics.from_camera_matrix(intrinsics_camera_matrix) - - distortion_array = np.array(pickle.loads(cam.distortion), dtype=np.float64) # array of shape (5,) - distortion = PinholeDistortion.from_array(distortion_array, copy=False) - ->>>>>>> dev_v0.0.7 - return PinholeCameraMetadata( + return CameraMetadata( camera_type=camera_type, width=cam.width, height=cam.height, diff --git a/d123/datasets/wopd/wopd_data_converter.py b/d123/datasets/wopd/wopd_data_converter.py index 700172bd..c6a9f3a2 100644 --- a/d123/datasets/wopd/wopd_data_converter.py +++ b/d123/datasets/wopd/wopd_data_converter.py @@ -7,11 +7,6 @@ import numpy as np import numpy.typing as npt -<<<<<<< HEAD -import pyarrow as pa -from d123.datatypes.detections.detection_types import DetectionType -======= ->>>>>>> dev_v0.0.7 from d123.common.multithreading.worker_utils import WorkerPool, worker_map from d123.common.utils.dependencies import check_dependencies @@ -24,12 +19,6 @@ from d123.datatypes.detections.detection import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from d123.datatypes.detections.detection_types import DetectionType from d123.datatypes.scene.scene_metadata import LogMetadata -<<<<<<< HEAD -from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.datatypes.sensors.lidar_index import WopdLidarIndex -from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index -======= from d123.datatypes.sensors.camera.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, @@ -39,7 +28,6 @@ from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 ->>>>>>> dev_v0.0.7 from d123.datatypes.vehicle_state.vehicle_parameters import get_wopd_chrysler_pacifica_parameters from d123.geometry import BoundingBoxSE3Index, EulerAngles, StateSE3, Vector3D, Vector3DIndex from d123.geometry.bounding_box import BoundingBoxSE3 @@ -264,15 +252,9 @@ def convert_wopd_tfrecord_log_to_arrow( def get_wopd_camera_metadata( initial_frame: dataset_pb2.Frame, data_converter_config: DataConverterConfig -<<<<<<< HEAD -) -> Dict[CameraType, PinholeCameraMetadata]: - - cam_metadatas: Dict[CameraType, PinholeCameraMetadata] = {} -======= ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: cam_metadatas: Dict[PinholeCameraType, PinholeCameraMetadata] = {} ->>>>>>> dev_v0.0.7 if data_converter_config.camera_store_option is not None: for calibration in initial_frame.context.camera_calibrations: camera_type = WOPD_CAMERA_TYPES[calibration.name] From 35962da9a09aad23f7df21d66c08e2b49971499b Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Tue, 14 Oct 2025 19:34:35 +0800 Subject: [PATCH 22/32] refactor kitti360 log_writer and incorporate fisheye camera --- d123/common/visualization/viser/server.py | 308 ------------------ .../visualization/viser/viser_viewer.py | 2 +- d123/datasets/av2/av2_data_converter.py | 13 - .../kitti_360/kitti_360_data_converter.py | 253 +++++++------- d123/datasets/kitti_360/load_sensor.py | 4 +- d123/datasets/nuplan/nuplan_data_converter.py | 17 +- d123/datasets/utils/arrow_ipc_writer.py | 3 +- d123/datatypes/scene/abstract_scene.py | 7 +- d123/datatypes/scene/arrow/arrow_scene.py | 5 +- .../scene/arrow/utils/arrow_getters.py | 25 +- d123/datatypes/scene/scene_filter.py | 12 +- d123/datatypes/scene/scene_metadata.py | 18 +- d123/datatypes/sensors/camera.py | 189 ----------- .../sensors/camera/fisheye_mei_camera.py | 189 +++++++++++ d123/datatypes/sensors/camera/utils.py | 36 ++ .../default_dataset_conversion.yaml | 5 +- .../config/datasets/kitti360_dataset.yaml | 19 ++ d123/script/run_viser.py | 5 - 18 files changed, 436 insertions(+), 674 deletions(-) delete mode 100644 d123/common/visualization/viser/server.py delete mode 100644 d123/datatypes/sensors/camera.py create mode 100644 d123/datatypes/sensors/camera/fisheye_mei_camera.py create mode 100644 d123/datatypes/sensors/camera/utils.py diff --git a/d123/common/visualization/viser/server.py b/d123/common/visualization/viser/server.py deleted file mode 100644 index afda1375..00000000 --- a/d123/common/visualization/viser/server.py +++ /dev/null @@ -1,308 +0,0 @@ -import time -from typing import Dict, List, Literal - -import numpy as np -import trimesh -import viser - -from d123.common.visualization.viser.utils import ( - get_bounding_box_meshes, - get_bounding_box_outlines, - get_camera_if_available, - get_camera_values, - get_lidar_points, - get_map_meshes, -) -from d123.datatypes.scene.abstract_scene import AbstractScene -from d123.datatypes.sensors.camera import CameraType -from d123.datatypes.sensors.lidar import LiDARType - -# TODO: Try to fix performance issues. -# TODO: Refactor this file. - -all_camera_types: List[CameraType] = [ - CameraType.CAM_F0, - CameraType.CAM_B0, - CameraType.CAM_L0, - CameraType.CAM_L1, - CameraType.CAM_L2, - CameraType.CAM_R0, - CameraType.CAM_R1, - CameraType.CAM_R2, -] - -# MISC config: -LINE_WIDTH: float = 4.0 - -# Bounding box config: -BOUNDING_BOX_TYPE: Literal["mesh", "lines"] = "mesh" - -# Map config: -MAP_AVAILABLE: bool = True - - -# Cameras config: - -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_F0, CameraType.CAM_L0, CameraType.CAM_R0] -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = all_camera_types -VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [CameraType.CAM_STEREO_L] -# VISUALIZE_CAMERA_FRUSTUM: List[CameraType] = [] -VISUALIZE_CAMERA_GUI: List[CameraType] = [CameraType.CAM_STEREO_L] -CAMERA_SCALE: float = 1.0 - -# Lidar config: -LIDAR_AVAILABLE: bool = True - -LIDAR_TYPES: List[LiDARType] = [ - LiDARType.LIDAR_MERGED, - LiDARType.LIDAR_TOP, - LiDARType.LIDAR_FRONT, - LiDARType.LIDAR_SIDE_LEFT, - LiDARType.LIDAR_SIDE_RIGHT, - LiDARType.LIDAR_BACK, -] -# LIDAR_TYPES: List[LiDARType] = [ -# LiDARType.LIDAR_TOP, -# ] -LIDAR_POINT_SIZE: float = 0.05 - - -class ViserVisualizationServer: - def __init__( - self, - scenes: List[AbstractScene], - scene_index: int = 0, - host: str = "localhost", - port: int = 8080, - label: str = "D123 Viser Server", - ): - assert len(scenes) > 0, "At least one scene must be provided." - self.scenes = scenes - self.scene_index = scene_index - - self.host = host - self.port = port - self.label = label - - self.server = viser.ViserServer(host=self.host, port=self.port, label=self.label) - self.set_scene(self.scenes[self.scene_index % len(self.scenes)]) - - def next(self) -> None: - self.server.flush() - self.server.gui.reset() - self.server.scene.reset() - self.scene_index = (self.scene_index + 1) % len(self.scenes) - print(f"Viser server started at {self.host}:{self.port}") - self.set_scene(self.scenes[self.scene_index]) - - def set_scene(self, scene: AbstractScene) -> None: - num_frames = scene.get_number_of_iterations() - # print(scene.available_camera_types) - - self.server.gui.configure_theme(dark_mode=False, control_width="large") - - # TODO: Fix lighting. Environment map can help, but cannot be freely configured. - # self.server.scene.configure_environment_map( - # hdri="warehouse", - # background=False, - # background_intensity=0.25, - # environment_intensity=0.5, - # ) - - with self.server.gui.add_folder("Playback"): - server_playing = True - - gui_timestep = self.server.gui.add_slider( - "Timestep", - min=0, - max=num_frames - 1, - step=1, - initial_value=0, - disabled=True, - ) - gui_next_frame = self.server.gui.add_button("Next Frame", disabled=True) - gui_prev_frame = self.server.gui.add_button("Prev Frame", disabled=True) - gui_next_scene = self.server.gui.add_button("Next Scene", disabled=False) - gui_playing = self.server.gui.add_checkbox("Playing", True) - gui_framerate = self.server.gui.add_slider("FPS", min=1, max=60, step=0.1, initial_value=10) - gui_framerate_options = self.server.gui.add_button_group("FPS options", ("10", "20", "30", "60")) - - # Frame step buttons. - @gui_next_frame.on_click - def _(_) -> None: - gui_timestep.value = (gui_timestep.value + 1) % num_frames - - @gui_prev_frame.on_click - def _(_) -> None: - gui_timestep.value = (gui_timestep.value - 1) % num_frames - - @gui_next_scene.on_click - def _(_) -> None: - nonlocal server_playing - server_playing = False - - # Disable frame controls when we're playing. - @gui_playing.on_update - def _(_) -> None: - gui_timestep.disabled = gui_playing.value - gui_next_frame.disabled = gui_playing.value - gui_prev_frame.disabled = gui_playing.value - - # Set the framerate when we click one of the options. - @gui_framerate_options.on_click - def _(_) -> None: - gui_framerate.value = int(gui_framerate_options.value) - - prev_timestep = gui_timestep.value - - # Toggle frame visibility when the timestep slider changes. - @gui_timestep.on_update - def _(_) -> None: - nonlocal current_frame_handle, current_frame_handle, prev_timestep - current_timestep = gui_timestep.value - - start = time.time() - # with self.server.atomic(): - mew_frame_handle = self.server.scene.add_frame(f"/frame{gui_timestep.value}", show_axes=False) - if BOUNDING_BOX_TYPE == "mesh": - meshes = [] - for _, mesh in get_bounding_box_meshes(scene, gui_timestep.value).items(): - meshes.append(mesh) - self.server.scene.add_mesh_trimesh( - f"/frame{gui_timestep.value}/detections", - trimesh.util.concatenate(meshes), - visible=True, - ) - elif BOUNDING_BOX_TYPE == "lines": - lines, colors = get_bounding_box_outlines(scene, gui_timestep.value) - self.server.scene.add_line_segments( - f"/frame{gui_timestep.value}/detections", - points=lines, - colors=colors, - line_width=LINE_WIDTH, - ) - else: - raise ValueError(f"Unknown bounding box type: {BOUNDING_BOX_TYPE}") - - current_frame_handle.remove() - current_frame_handle = mew_frame_handle - - for camera_type in VISUALIZE_CAMERA_GUI: - camera = get_camera_if_available(scene, camera_type, gui_timestep.value) - if camera is not None: - camera_gui_handles[camera_type].image = camera.image - - for camera_type in VISUALIZE_CAMERA_FRUSTUM: - camera = get_camera_if_available(scene, camera_type, gui_timestep.value) - if camera is not None: - camera_position, camera_quaternion = get_camera_values(scene, camera, gui_timestep.value) - camera_frustum_handles[camera_type].position = camera_position.array - camera_frustum_handles[camera_type].wxyz = camera_quaternion.q - camera_frustum_handles[camera_type].image = camera.image - - if LIDAR_AVAILABLE: - try: - points, colors = get_lidar_points(scene, gui_timestep.value, LIDAR_TYPES) - except Exception as e: - print(f"Error getting lidar points: {e}") - points = np.zeros((0, 3)) - colors = np.zeros((0, 3)) - - gui_lidar.points = points - gui_lidar.colors = colors - - prev_timestep = current_timestep - - rendering_time = time.time() - start - sleep_time = 1.0 / gui_framerate.value - rendering_time - time.sleep(max(sleep_time, 0.0)) - self.server.flush() # Optional! - - # Load in frames. - current_frame_handle = self.server.scene.add_frame(f"/frame{gui_timestep.value}", show_axes=False) - self.server.scene.add_frame("/map", show_axes=False) - - camera_gui_handles: Dict[CameraType, viser.GuiImageHandle] = {} - camera_frustum_handles: Dict[CameraType, viser.CameraFrustumHandle] = {} - - for camera_type in VISUALIZE_CAMERA_GUI: - camera = get_camera_if_available(scene, camera_type, gui_timestep.value) - if camera is not None: - with self.server.gui.add_folder(f"Camera {camera_type.serialize()}"): - camera_gui_handles[camera_type] = self.server.gui.add_image( - image=camera.image, - label=camera_type.serialize(), - format="jpeg", - ) - - for camera_type in VISUALIZE_CAMERA_FRUSTUM: - camera = get_camera_if_available(scene, camera_type, gui_timestep.value) - if camera is not None: - camera_position, camera_quaternion = get_camera_values(scene, camera, gui_timestep.value) - camera_frustum_handles[camera_type] = self.server.scene.add_camera_frustum( - f"camera_frustum_{camera_type.serialize()}", - fov=camera.metadata.fov_y, - aspect=camera.metadata.aspect_ratio, - scale=CAMERA_SCALE, - image=camera.image, - position=camera_position.array, - wxyz=camera_quaternion.q, - ) - - if LIDAR_AVAILABLE: - try: - points, colors = get_lidar_points(scene, gui_timestep.value, LIDAR_TYPES) - except Exception as e: - print(f"Error getting lidar points: {e}") - points = np.zeros((0, 3)) - colors = np.zeros((0, 3)) - - gui_lidar = self.server.scene.add_point_cloud( - name="LiDAR", - points=points, - colors=colors, - point_size=LIDAR_POINT_SIZE, - point_shape="circle", - ) - - if MAP_AVAILABLE: - for name, mesh in get_map_meshes(scene).items(): - self.server.scene.add_mesh_trimesh(f"/map/{name}", mesh, visible=True) - - # centerlines, __, __, road_edges = get_map_lines(scene) - # for i, centerline in enumerate(centerlines): - # self.server.scene.add_line_segments( - # "/map/centerlines", - # centerlines, - # colors=[[BLACK.rgb]], - # line_width=LINE_WIDTH, - # ) - # self.server.scene.add_line_segments( - # "/map/left_boundary", - # left_boundaries, - # colors=[[TAB_10[2].rgb]], - # line_width=LINE_WIDTH, - # ) - # self.server.scene.add_line_segments( - # "/map/right_boundary",clear - # right_boundaries, - # colors=[[TAB_10[3].rgb]], - # line_width=LINE_WIDTH, - # ) - # print(centerlines.shape, road_edges.shape) - # self.server.scene.add_line_segments( - # "/map/road_edges", - # road_edges, - # colors=[[BLACK.rgb]], - # line_width=LINE_WIDTH, - # ) - - # Playback update loop. - prev_timestep = gui_timestep.value - while server_playing: - # Update the timestep if we're playing. - if gui_playing.value: - gui_timestep.value = (gui_timestep.value + 1) % num_frames - - self.server.flush() - self.next() diff --git a/d123/common/visualization/viser/viser_viewer.py b/d123/common/visualization/viser/viser_viewer.py index 8cf1ec80..c981ea89 100644 --- a/d123/common/visualization/viser/viser_viewer.py +++ b/d123/common/visualization/viser/viser_viewer.py @@ -94,7 +94,7 @@ def __init__( self, scenes: List[AbstractScene], viser_config: ViserConfig = ViserConfig(), - scene_index: int = 0.0, + scene_index: int = 0, ) -> None: assert len(scenes) > 0, "At least one scene must be provided." diff --git a/d123/datasets/av2/av2_data_converter.py b/d123/datasets/av2/av2_data_converter.py index 433b731c..48fc8cb9 100644 --- a/d123/datasets/av2/av2_data_converter.py +++ b/d123/datasets/av2/av2_data_converter.py @@ -23,10 +23,6 @@ from d123.datatypes.detections.detection import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from d123.datatypes.detections.detection_types import DetectionType from d123.datatypes.scene.scene_metadata import LogMetadata -<<<<<<< HEAD -from d123.datatypes.sensors.camera import PinholeCameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -======= from d123.datatypes.sensors.camera.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, @@ -34,7 +30,6 @@ PinholeIntrinsics, ) from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType ->>>>>>> dev_v0.0.7 from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from d123.datatypes.vehicle_state.vehicle_parameters import ( @@ -216,20 +211,12 @@ def convert_av2_log_to_arrow( return [] -<<<<<<< HEAD -def get_av2_camera_metadata(log_path: Path) -> Dict[CameraType, PinholeCameraMetadata]: -======= def get_av2_camera_metadata(log_path: Path) -> Dict[PinholeCameraType, PinholeCameraMetadata]: ->>>>>>> dev_v0.0.7 intrinsics_file = log_path / "calibration" / "intrinsics.feather" intrinsics_df = pd.read_feather(intrinsics_file) -<<<<<<< HEAD - camera_metadata: Dict[CameraType, PinholeCameraMetadata] = {} -======= camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} ->>>>>>> dev_v0.0.7 for _, row in intrinsics_df.iterrows(): row = row.to_dict() camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] diff --git a/d123/datasets/kitti_360/kitti_360_data_converter.py b/d123/datasets/kitti_360/kitti_360_data_converter.py index 0616dcfa..69ed6f8b 100644 --- a/d123/datasets/kitti_360/kitti_360_data_converter.py +++ b/d123/datasets/kitti_360/kitti_360_data_converter.py @@ -20,17 +20,33 @@ import logging from pyquaternion import Quaternion -from nuplan.planning.utils.multithreading.worker_utils import WorkerPool, worker_map - -from d123.datatypes.detections.detection_types import DetectionType -from d123.datatypes.sensors.camera import PinholeCameraMetadata, FisheyeMEICameraMetadata, CameraType, camera_metadata_dict_to_json -from d123.datatypes.sensors.lidar import LiDARMetadata, LiDARType, lidar_metadata_dict_to_json -from d123.datatypes.sensors.lidar_index import Kitti360LidarIndex +from d123.common.multithreading.worker_utils import WorkerPool, worker_map + +from d123.datatypes.detections.detection import ( + BoxDetectionMetadata, + BoxDetectionSE3, + BoxDetectionWrapper, +) +from d123.datatypes.sensors.camera.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from d123.datatypes.sensors.camera.fisheye_mei_camera import ( + FisheyeMEICameraMetadata, + FisheyeMEICameraType, + FisheyeMEIDistortion, + FisheyeMEIProjection, +) +from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType +from d123.datasets.utils.sensor.lidar_index_registry import Kitti360LidarIndex from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index from d123.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table from d123.datasets.raw_data_converter import DataConverterConfig, RawDataConverter +from d123.datasets.utils.arrow_ipc_writer import ArrowLogWriter from d123.datatypes.scene.scene_metadata import LogMetadata from d123.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic from d123.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT @@ -44,10 +60,10 @@ KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) KITTI360_CAMERA_TYPES = { - CameraType.CAM_STEREO_L: "image_00", - CameraType.CAM_STEREO_R: "image_01", - CameraType.CAM_L1: "image_02", - CameraType.CAM_R1: "image_03", + PinholeCameraType.CAM_STEREO_L: "image_00", + PinholeCameraType.CAM_STEREO_R: "image_01", + FisheyeMEICameraType.CAM_L: "image_02", + FisheyeMEICameraType.CAM_R: "image_03", } DIR_2D_RAW = "data_2d_raw" @@ -192,7 +208,10 @@ def convert_kitti360_map_to_gpkg( split: str = log_info["split"] log_name = log_path.stem - map_path = data_converter_config.output_path / "maps" / split / f"kitti360_{log_name}.gpkg" + D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) + map_path = D123_MAPS_ROOT / split / f"{log_name}.gpkg" + #map_path = data_converter_config.output_path / "maps" / split / f"{log_name}.gpkg" + map_path.parent.mkdir(parents=True, exist_ok=True) if data_converter_config.force_map_conversion or not map_path.exists(): map_path.unlink(missing_ok=True) convert_kitti360_map(log_name, map_path) @@ -216,65 +235,32 @@ def convert_kitti360_log_to_arrow( if not log_file_path.parent.exists(): log_file_path.parent.mkdir(parents=True, exist_ok=True) - metadata = LogMetadata( + log_metadata = LogMetadata( dataset="kitti360", + split=split, log_name=log_name, location=log_name, timestep_seconds=KITTI360_DT, + vehicle_parameters=get_kitti360_station_wagon_parameters(), + camera_metadata=get_kitti360_camera_metadata(), + lidar_metadata=get_kitti360_lidar_metadata(), map_has_z=True, + map_is_local=True, ) - vehicle_parameters = get_kitti360_station_wagon_parameters() - camera_metadata = get_kitti360_camera_metadata() - lidar_metadata = get_kitti360_lidar_metadata() - - schema_column_list = [ - ("token", pa.string()), - ("timestamp", pa.int64()), - ("detections_state", pa.list_(pa.list_(pa.float64(), len(BoundingBoxSE3Index)))), - ("detections_velocity", pa.list_(pa.list_(pa.float64(), len(Vector3DIndex)))), - ("detections_token", pa.list_(pa.string())), - ("detections_type", pa.list_(pa.int16())), - ("ego_states", pa.list_(pa.float64(), len(EgoStateSE3Index))), - ("traffic_light_ids", pa.list_(pa.int64())), - ("traffic_light_types", pa.list_(pa.int16())), - ("scenario_tag", pa.list_(pa.string())), - ("route_lane_group_ids", pa.list_(pa.int64())), - ] - if data_converter_config.lidar_store_option is not None: - for lidar_type in lidar_metadata.keys(): - if data_converter_config.lidar_store_option == "path": - schema_column_list.append((lidar_type.serialize(), pa.string())) - elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") - - if data_converter_config.camera_store_option is not None: - for camera_type in camera_metadata.keys(): - if data_converter_config.camera_store_option == "path": - schema_column_list.append((camera_type.serialize(), pa.string())) - schema_column_list.append( - (f"{camera_type.serialize()}_extrinsic", pa.list_(pa.float64(), 4 * 4)) - ) - elif data_converter_config.camera_store_option == "binary": - raise NotImplementedError("Binary camera storage is not implemented.") - - recording_schema = pa.schema(schema_column_list) - recording_schema = recording_schema.with_metadata( - { - "log_metadata": json.dumps(asdict(metadata)), - "vehicle_parameters": json.dumps(asdict(vehicle_parameters)), - "camera_metadata": camera_metadata_dict_to_json(camera_metadata), - "lidar_metadata": lidar_metadata_dict_to_json(lidar_metadata), - } + log_writer = ArrowLogWriter( + log_path=log_file_path, + data_converter_config=data_converter_config, + log_metadata=log_metadata, ) - _write_recording_table(log_name, recording_schema, log_file_path, data_converter_config) + _write_recording_table(log_name, log_writer, log_file_path, data_converter_config) gc.collect() return [] -def get_kitti360_camera_metadata() -> Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: +def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: persp = PATH_CALIB_ROOT / "perspective.txt" @@ -300,24 +286,40 @@ def get_kitti360_camera_metadata() -> Dict[CameraType, Union[PinholeCameraMetada fisheye03 = _readYAMLFile(fisheye_camera03_path) fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} - log_cam_infos: Dict[str, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} + log_cam_infos: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): if cam_name in ["image_00", "image_01"]: log_cam_infos[cam_type] = PinholeCameraMetadata( camera_type=cam_type, width=persp_result[cam_name]["wh"][0], height=persp_result[cam_name]["wh"][1], - intrinsic=np.array(persp_result[cam_name]["intrinsic"]), - distortion=np.array(persp_result[cam_name]["distortion"]), + intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[cam_name]["intrinsic"])), + distortion=PinholeDistortion.from_array(np.array(persp_result[cam_name]["distortion"])), ) elif cam_name in ["image_02","image_03"]: + distortion_params = fisheye_result[cam_name]["distortion_parameters"] + distortion = FisheyeMEIDistortion( + k1=distortion_params['k1'], + k2=distortion_params['k2'], + p1=distortion_params['p1'], + p2=distortion_params['p2'], + ) + + projection_params = fisheye_result[cam_name]["projection_parameters"] + projection = FisheyeMEIProjection( + gamma1=projection_params['gamma1'], + gamma2=projection_params['gamma2'], + u0=projection_params['u0'], + v0=projection_params['v0'], + ) + log_cam_infos[cam_type] = FisheyeMEICameraMetadata( camera_type=cam_type, width=fisheye_result[cam_name]["image_width"], height=fisheye_result[cam_name]["image_height"], - mirror_parameters=fisheye_result[cam_name]["mirror_parameters"], - distortion=np.array(fisheye_result[cam_name]["distortion_parameters"]), - projection_parameters= np.array(fisheye_result[cam_name]["projection_parameters"]), + mirror_parameter=fisheye_result[cam_name]["mirror_parameters"], + distortion=distortion, + projection=projection, ) return log_cam_infos @@ -347,65 +349,45 @@ def _readYAMLFile(fileName:Path) -> Dict[str, Any]: def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} extrinsic = get_lidar_extrinsic() + extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, lidar_index=Kitti360LidarIndex, - extrinsic=extrinsic, + extrinsic=extrinsic_state_se3, ) return metadata def _write_recording_table( log_name: str, - recording_schema: pa.Schema, + log_writer: ArrowLogWriter, log_file_path: Path, data_converter_config: DataConverterConfig ) -> None: ts_list: List[TimePoint] = _read_timestamps(log_name) ego_state_all, valid_timestamp = _extract_ego_state_all(log_name) - ego_states_xyz = np.array([ego_state[:3] for ego_state in ego_state_all],dtype=np.float64) - detections_states,detections_velocity,detections_tokens,detections_types = _extract_detections(log_name,len(ts_list),ego_states_xyz,valid_timestamp) - - with pa.OSFile(str(log_file_path), "wb") as sink: - with pa.ipc.new_file(sink, recording_schema) as writer: - for idx in range(len(valid_timestamp)): - valid_idx = valid_timestamp[idx] - row_data = { - "token": [create_token(f"{log_name}_{idx}")], - "timestamp": [ts_list[valid_idx].time_us], - "detections_state": [detections_states[valid_idx]], - "detections_velocity": [detections_velocity[valid_idx]], - "detections_token": [detections_tokens[valid_idx]], - "detections_type": [detections_types[valid_idx]], - "ego_states": [ego_state_all[idx]], - "traffic_light_ids": [[]], - "traffic_light_types": [[]], - "scenario_tag": [['unknown']], - "route_lane_group_ids": [[]], - } - - if data_converter_config.lidar_store_option is not None: - lidar_data_dict = _extract_lidar(log_name, valid_idx, data_converter_config) - for lidar_type, lidar_data in lidar_data_dict.items(): - if lidar_data is not None: - row_data[lidar_type.serialize()] = [lidar_data] - else: - row_data[lidar_type.serialize()] = [None] - - if data_converter_config.camera_store_option is not None: - camera_data_dict = _extract_cameras(log_name, valid_idx, data_converter_config) - for camera_type, camera_data in camera_data_dict.items(): - if camera_data is not None: - row_data[camera_type.serialize()] = [camera_data[0]] - row_data[f"{camera_type.serialize()}_extrinsic"] = [camera_data[1]] - else: - row_data[camera_type.serialize()] = [None] - row_data[f"{camera_type.serialize()}_extrinsic"] = [None] - - batch = pa.record_batch(row_data, schema=recording_schema) - writer.write_batch(batch) - - del batch + ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all],dtype=np.float64) + box_detection_wrapper_all = _extract_detections(log_name,len(ts_list),ego_states_xyz,valid_timestamp) + logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") + for idx in range(len(valid_timestamp)): + valid_idx = valid_timestamp[idx] + + cameras = _extract_cameras(log_name, valid_idx, data_converter_config) + lidars = _extract_lidar(log_name, valid_idx, data_converter_config) + + log_writer.add_row( + token=create_token(f"{log_name}_{idx}"), + timestamp=ts_list[valid_idx], + ego_state=ego_state_all[idx], + box_detections=box_detection_wrapper_all[valid_idx], + traffic_lights=None, + cameras=cameras, + lidars=lidars, + scenario_tags=None, + route_lane_group_ids=None, + ) + + log_writer.close() if SORT_BY_TIMESTAMP: recording_table = open_arrow_table(log_file_path) @@ -449,7 +431,7 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: return tps return None -def _extract_ego_state_all(log_name: str) -> Tuple[List[List[float]], List[int]]: +def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]]: ego_state_all: List[List[float]] = [] @@ -518,7 +500,7 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[List[float]], List[int]] dynamic_state_se3=dynamic_state, vehicle_parameters=vehicle_parameters, timepoint=None, - ).array.tolist() + ) ) return ego_state_all, valid_timestamp @@ -527,7 +509,7 @@ def _extract_detections( ts_len: int, ego_states_xyz: np.ndarray, valid_timestamp: List[int], -) -> Tuple[List[List[float]], List[List[float]], List[str], List[int]]: +) -> List[BoxDetectionWrapper]: detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] detections_velocity: List[List[List[float]]] = [[] for _ in range(ts_len)] @@ -549,6 +531,7 @@ def _extract_detections( with open(detection_preprocess_path, "rb") as f: detection_preprocess_result = pickle.load(f) static_records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"]} + logging.info(f"Loaded detection preprocess data from {detection_preprocess_path}") else: detection_preprocess_result = None @@ -575,9 +558,9 @@ def _extract_detections( for record in obj.valid_frames["records"]: frame = record["timestamp"] detections_states[frame].append(obj.get_state_array()) - detections_velocity[frame].append([0.0, 0.0, 0.0]) + detections_velocity[frame].append(np.array([0.0, 0.0, 0.0])) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.name])) + detections_types[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) else: global_ID = obj.globalID dynamic_objs[global_ID].append(obj) @@ -614,9 +597,35 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append(vel) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(int(KIITI360_DETECTION_NAME_DICT[obj.name])) - - return detections_states, detections_velocity, detections_tokens, detections_types + detections_types[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) + + box_detection_wrapper_all: List[BoxDetectionWrapper] = [] + for frame in range(ts_len): + box_detections: List[BoxDetectionSE3] = [] + for state, velocity, token, detection_type in zip( + detections_states[frame], + detections_velocity[frame], + detections_tokens[frame], + detections_types[frame], + ): + if state is None: + break + detection_metadata = BoxDetectionMetadata( + detection_type=detection_type, + timepoint=None, + track_token=token, + confidence=None, + ) + bounding_box_se3 = BoundingBoxSE3.from_array(state) + velocity_vector = Vector3D.from_array(velocity) + box_detection = BoxDetectionSE3( + metadata=detection_metadata, + bounding_box_se3=bounding_box_se3, + velocity=velocity_vector, + ) + box_detections.append(box_detection) + box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) + return box_detection_wrapper_all def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: @@ -637,9 +646,9 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverter def _extract_cameras( log_name: str, idx: int, data_converter_config: DataConverterConfig -) -> Dict[CameraType, Optional[str]]: +) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: - camera_dict: Dict[str, Union[str, bytes]] = {} + camera_dict: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]] = {} for camera_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): if cam_dir_name in ["image_00", "image_01"]: img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" @@ -663,11 +672,13 @@ def _extract_cameras( if img_path_png.exists(): if data_converter_config.camera_store_option == "path": - camera_data = str(img_path_png), cam2pose.flatten().tolist() + camera_data = str(img_path_png) elif data_converter_config.camera_store_option == "binary": with open(img_path_png, "rb") as f: - camera_data = f.read(), cam2pose + camera_data = f.read() else: - camera_data = None, cam2pose.flatten().tolist() - camera_dict[camera_type] = camera_data + camera_data = None + + camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_dict[camera_type] = camera_data, camera_extrinsic return camera_dict diff --git a/d123/datasets/kitti_360/load_sensor.py b/d123/datasets/kitti_360/load_sensor.py index 7ca4489a..fa206526 100644 --- a/d123/datasets/kitti_360/load_sensor.py +++ b/d123/datasets/kitti_360/load_sensor.py @@ -3,7 +3,7 @@ import numpy as np import logging -from d123.datatypes.sensors.lidar import LiDAR, LiDARMetadata +from d123.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: @@ -20,7 +20,7 @@ def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) ones = np.ones((xyz.shape[0], 1), dtype=pcd.dtype) points_h = np.concatenate([xyz, ones], axis=1) #[N,4] - transformed_h = lidar_metadata.extrinsic @ points_h.T #[4,N] + transformed_h = lidar_metadata.extrinsic.transformation_matrix @ points_h.T #[4,N] transformed_xyz = transformed_h[:3, :] # (3,N) diff --git a/d123/datasets/nuplan/nuplan_data_converter.py b/d123/datasets/nuplan/nuplan_data_converter.py index 93da2a8e..7beb3f73 100644 --- a/d123/datasets/nuplan/nuplan_data_converter.py +++ b/d123/datasets/nuplan/nuplan_data_converter.py @@ -234,15 +234,18 @@ def convert_nuplan_log_to_arrow( return [] -def get_nuplan_camera_metadata(log_path: Path) -> Dict[CameraType, CameraMetadata]: +def get_nuplan_camera_metadata(log_path: Path) -> Dict[PinholeCameraType, PinholeCameraMetadata]: - def _get_camera_metadata(camera_type: CameraType) -> CameraMetadata: + def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadata: cam = list(get_cameras(log_path, [str(NUPLAN_CAMERA_TYPES[camera_type].value)]))[0] - intrinsic = np.array(pickle.loads(cam.intrinsic)) - rotation = np.array(pickle.loads(cam.rotation)) - rotation = Quaternion(rotation).rotation_matrix - distortion = np.array(pickle.loads(cam.distortion)) - return CameraMetadata( + + intrinsics_camera_matrix = np.array(pickle.loads(cam.intrinsic), dtype=np.float64) # array of shape (3, 3) + intrinsic = PinholeIntrinsics.from_camera_matrix(intrinsics_camera_matrix) + + distortion_array = np.array(pickle.loads(cam.distortion), dtype=np.float64) # array of shape (5,) + distortion = PinholeDistortion.from_array(distortion_array, copy=False) + + return PinholeCameraMetadata( camera_type=camera_type, width=cam.width, height=cam.height, diff --git a/d123/datasets/utils/arrow_ipc_writer.py b/d123/datasets/utils/arrow_ipc_writer.py index fa0ed439..4e2f491f 100644 --- a/d123/datasets/utils/arrow_ipc_writer.py +++ b/d123/datasets/utils/arrow_ipc_writer.py @@ -8,6 +8,7 @@ from d123.datatypes.scene.arrow.utils.arrow_metadata_utils import add_log_metadata_to_arrow_schema from d123.datatypes.scene.scene_metadata import LogMetadata from d123.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType from d123.datatypes.sensors.lidar.lidar import LiDARType from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import EgoStateSE3, EgoStateSE3Index @@ -119,7 +120,7 @@ def add_row( ego_state: Optional[EgoStateSE3] = None, box_detections: Optional[BoxDetectionWrapper] = None, traffic_lights: Optional[TrafficLightDetectionWrapper] = None, - cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + cameras: Optional[Dict[Union[PinholeCameraType, FisheyeMEICameraType], Tuple[Any, ...]]] = None, lidars: Optional[Dict[LiDARType, Any]] = None, scenario_tags: Optional[List[str]] = None, route_lane_group_ids: Optional[List[int]] = None, diff --git a/d123/datatypes/scene/abstract_scene.py b/d123/datatypes/scene/abstract_scene.py index c9fbc1af..8e835afb 100644 --- a/d123/datatypes/scene/abstract_scene.py +++ b/d123/datatypes/scene/abstract_scene.py @@ -1,12 +1,13 @@ from __future__ import annotations import abc -from typing import List, Optional +from typing import List, Optional, Union from d123.datatypes.detections.detection import BoxDetectionWrapper, DetectionRecording, TrafficLightDetectionWrapper from d123.datatypes.maps.abstract_map import AbstractMap from d123.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata from d123.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType from d123.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -56,7 +57,7 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: raise NotImplementedError @abc.abstractmethod - def get_camera_at_iteration(self, iteration: int, camera_type: PinholeCameraType) -> Optional[PinholeCamera]: + def get_camera_at_iteration(self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType]) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: raise NotImplementedError @abc.abstractmethod @@ -81,7 +82,7 @@ def vehicle_parameters(self) -> VehicleParameters: return self.log_metadata.vehicle_parameters @property - def available_camera_types(self) -> List[PinholeCameraType]: + def available_camera_types(self) -> List[Union[PinholeCameraType, FisheyeMEICameraType]]: return list(self.log_metadata.camera_metadata.keys()) @property diff --git a/d123/datatypes/scene/arrow/arrow_scene.py b/d123/datatypes/scene/arrow/arrow_scene.py index 8aa595db..86904b6c 100644 --- a/d123/datatypes/scene/arrow/arrow_scene.py +++ b/d123/datatypes/scene/arrow/arrow_scene.py @@ -19,6 +19,7 @@ from d123.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow from d123.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata from d123.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType from d123.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -124,8 +125,8 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: route_lane_group_ids = table["route_lane_group_ids"][self._get_table_index(iteration)].as_py() return route_lane_group_ids - def get_camera_at_iteration(self, iteration: int, camera_type: PinholeCameraType) -> Optional[PinholeCamera]: - camera: Optional[PinholeCamera] = None + def get_camera_at_iteration(self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType]) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + camera: Optional[Union[PinholeCamera, FisheyeMEICamera]] = None if camera_type in self.available_camera_types: camera = get_camera_from_arrow_table( self._get_recording_table(), diff --git a/d123/datatypes/scene/arrow/utils/arrow_getters.py b/d123/datatypes/scene/arrow/utils/arrow_getters.py index 578560b9..92a68f51 100644 --- a/d123/datatypes/scene/arrow/utils/arrow_getters.py +++ b/d123/datatypes/scene/arrow/utils/arrow_getters.py @@ -2,7 +2,7 @@ import os from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import cv2 import numpy as np @@ -21,6 +21,7 @@ from d123.datatypes.detections.detection_types import DetectionType from d123.datatypes.scene.scene_metadata import LogMetadata from d123.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType from d123.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -95,9 +96,9 @@ def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: def get_camera_from_arrow_table( arrow_table: pa.Table, index: int, - camera_type: PinholeCameraType, + camera_type: Union[PinholeCameraType, FisheyeMEICameraType], log_metadata: LogMetadata, -) -> PinholeCamera: +) -> Union[PinholeCamera, FisheyeMEICamera]: camera_name = camera_type.serialize() table_data = arrow_table[f"{camera_name}_data"][index].as_py() @@ -121,11 +122,19 @@ def get_camera_from_arrow_table( else: raise NotImplementedError("Only string file paths for camera data are supported.") - return PinholeCamera( - metadata=log_metadata.camera_metadata[camera_type], - image=image, - extrinsic=extrinsic, - ) + camera_metadata = log_metadata.camera_metadata[camera_type] + if hasattr(camera_metadata, 'mirror_parameter') and camera_metadata.mirror_parameter is not None: + return FisheyeMEICamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) + else: + return PinholeCamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) def get_lidar_from_arrow_table( diff --git a/d123/datatypes/scene/scene_filter.py b/d123/datatypes/scene/scene_filter.py index c05073db..3bbe340f 100644 --- a/d123/datatypes/scene/scene_filter.py +++ b/d123/datatypes/scene/scene_filter.py @@ -1,7 +1,9 @@ from dataclasses import dataclass -from typing import List, Optional +from typing import List, Optional, Union from d123.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType +from d123.datatypes.sensors.camera.utils import get_camera_type_by_value, deserialize_camera_type # TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) @@ -23,7 +25,7 @@ class SceneFilter: duration_s: Optional[float] = 10.0 history_s: Optional[float] = 3.0 - camera_types: Optional[List[PinholeCameraType]] = None + camera_types: Optional[List[Union[PinholeCameraType, FisheyeMEICameraType]]] = None max_num_scenes: Optional[int] = None shuffle: bool = False @@ -34,10 +36,12 @@ def __post_init__(self): camera_types = [] for camera_type in self.camera_types: if isinstance(camera_type, str): - camera_type = PinholeCameraType.deserialize[camera_type] + camera_type = deserialize_camera_type(camera_type) camera_types.append(camera_type) elif isinstance(camera_type, int): - camera_type = PinholeCameraType(camera_type) + camera_type = get_camera_type_by_value(camera_type) + camera_types.append(camera_type) + elif isinstance(camera_type, (PinholeCameraType, FisheyeMEICameraType)): camera_types.append(camera_type) else: raise ValueError(f"Invalid camera type: {camera_type}") diff --git a/d123/datatypes/scene/scene_metadata.py b/d123/datatypes/scene/scene_metadata.py index ae40aa4d..0c243de4 100644 --- a/d123/datatypes/scene/scene_metadata.py +++ b/d123/datatypes/scene/scene_metadata.py @@ -1,10 +1,11 @@ from __future__ import annotations from dataclasses import asdict, dataclass -from typing import Dict +from typing import Dict, Union import d123 from d123.datatypes.sensors.camera.pinhole_camera import PinholeCameraMetadata, PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from d123.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -19,7 +20,7 @@ class LogMetadata: timestep_seconds: float vehicle_parameters: VehicleParameters - camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] + camera_metadata: Union[Dict[PinholeCameraType, PinholeCameraMetadata], Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata]] lidar_metadata: Dict[LiDARType, LiDARMetadata] map_has_z: bool @@ -30,10 +31,15 @@ class LogMetadata: def from_dict(cls, data_dict: Dict) -> LogMetadata: data_dict["vehicle_parameters"] = VehicleParameters(**data_dict["vehicle_parameters"]) - data_dict["camera_metadata"] = { - PinholeCameraType.deserialize(key): PinholeCameraMetadata.from_dict(value) - for key, value in data_dict.get("camera_metadata", {}).items() - } + camera_metadata = {} + for key, value in data_dict.get("camera_metadata", {}).items(): + if value.get("mirror_parameter") is not None: + camera_type = FisheyeMEICameraType.deserialize(key) + camera_metadata[camera_type] = FisheyeMEICameraMetadata.from_dict(value) + else: + camera_type = PinholeCameraType.deserialize(key) + camera_metadata[camera_type] = PinholeCameraMetadata.from_dict(value) + data_dict["camera_metadata"] = camera_metadata data_dict["lidar_metadata"] = { LiDARType.deserialize(key): LiDARMetadata.from_dict(value) for key, value in data_dict.get("lidar_metadata", {}).items() diff --git a/d123/datatypes/sensors/camera.py b/d123/datatypes/sensors/camera.py deleted file mode 100644 index a9cc209e..00000000 --- a/d123/datatypes/sensors/camera.py +++ /dev/null @@ -1,189 +0,0 @@ -from __future__ import annotations - -import json -from dataclasses import dataclass -from typing import Any, Dict, Union -from abc import ABC, abstractmethod - -import numpy as np -import numpy.typing as npt - -from d123.common.utils.enums import SerialIntEnum - - -class CameraType(SerialIntEnum): - """ - Enum for cameras in d123. - """ - - CAM_F0 = 0 - CAM_B0 = 1 - CAM_L0 = 2 - CAM_L1 = 3 - CAM_L2 = 4 - CAM_R0 = 5 - CAM_R1 = 6 - CAM_R2 = 7 - CAM_STEREO_L = 8 - CAM_STEREO_R = 9 - -@dataclass -class CameraMetadata(ABC): - camera_type: CameraType - width: int - height: int - - @abstractmethod - def to_dict(self) -> Dict[str, Any]: - ... - - @classmethod - @abstractmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> CameraMetadata: - ... - -@dataclass -class PinholeCameraMetadata(CameraMetadata): - - intrinsic: npt.NDArray[np.float64] # 3x3 matrix # TODO: don't store matrix but values. - distortion: npt.NDArray[np.float64] # 5x1 vector # TODO: don't store matrix but values. - - def to_dict(self) -> Dict[str, Any]: - # TODO: remove None types. Only a placeholder for now. - return { - "camera_type": int(self.camera_type), - "width": self.width, - "height": self.height, - "intrinsic": self.intrinsic.tolist() if self.intrinsic is not None else None, - "distortion": self.distortion.tolist() if self.distortion is not None else None, - } - - @classmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> PinholeCameraMetadata: - # TODO: remove None types. Only a placeholder for now. - return cls( - camera_type=CameraType(json_dict["camera_type"]), - width=json_dict["width"], - height=json_dict["height"], - intrinsic=np.array(json_dict["intrinsic"]) if json_dict["intrinsic"] is not None else None, - distortion=np.array(json_dict["distortion"]) if json_dict["distortion"] is not None else None, - ) - - @property - def aspect_ratio(self) -> float: - return self.width / self.height - - @property - def fov_x(self) -> float: - """ - Calculates the horizontal field of view (FOV) in radian. - """ - fx = self.intrinsic[0, 0] - fov_x_rad = 2 * np.arctan(self.width / (2 * fx)) - return fov_x_rad - - @property - def fov_y(self) -> float: - """ - Calculates the vertical field of view (FOV) in radian. - """ - fy = self.intrinsic[1, 1] - fov_y_rad = 2 * np.arctan(self.height / (2 * fy)) - return fov_y_rad - - -@dataclass -class FisheyeMEICameraMetadata(CameraMetadata): - - mirror_parameters: float - distortion: npt.NDArray[np.float64] # k1,k2,p1,p2 - projection_parameters: npt.NDArray[np.float64] #gamma1,gamma2,u0,v0 - - def to_dict(self) -> Dict[str, Any]: - # TODO: remove None types. Only a placeholder for now. - return { - "camera_type": int(self.camera_type), - "width": self.width, - "height": self.height, - "mirror_parameters": self.mirror_parameters, - "distortion": self.distortion.tolist() if self.distortion is not None else None, - "projection_parameters": self.projection_parameters.tolist() if self.projection_parameters is not None else None, - } - - @classmethod - def from_dict(cls, json_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: - # TODO: remove None types. Only a placeholder for now. - return cls( - camera_type=CameraType(json_dict["camera_type"]), - width=json_dict["width"], - height=json_dict["height"], - mirror_parameters=json_dict["mirror_parameters"], - distortion=np.array(json_dict["distortion"]) if json_dict["distortion"] is not None else None, - projection_parameters=np.array(json_dict["projection_parameters"]) if json_dict["projection_parameters"] is not None else None, - ) - - def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - ''' camera coordinate to image plane ''' - norm = np.linalg.norm(points_3d, axis=1) - - x = points_3d[:,0] / norm - y = points_3d[:,1] / norm - z = points_3d[:,2] / norm - - x /= z+self.mirror_parameters - y /= z+self.mirror_parameters - - k1 = self.distortion[0] - k2 = self.distortion[1] - gamma1 = self.projection_parameters[0] - gamma2 = self.projection_parameters[1] - u0 = self.projection_parameters[2] - v0 = self.projection_parameters[3] - - ro2 = x*x + y*y - x *= 1 + k1*ro2 + k2*ro2*ro2 - y *= 1 + k1*ro2 + k2*ro2*ro2 - - x = gamma1*x + u0 - y = gamma2*y + v0 - - return x, y, norm * points_3d[:,2] / np.abs(points_3d[:,2]) - -def camera_metadata_dict_to_json(camera_metadata: Dict[CameraType, CameraMetadata]) -> Dict[str, Dict[str, Any]]: - """ - Converts a dictionary of CameraMetadata to a JSON-serializable format. - :param camera_metadata: Dictionary of CameraMetadata. - :return: JSON-serializable dictionary. - """ - camera_metadata_dict = { - camera_type.serialize(): metadata.to_dict() for camera_type, metadata in camera_metadata.items() - } - return json.dumps(camera_metadata_dict) - - -def camera_metadata_dict_from_json(json_dict: Dict[str, Dict[str, Any]]) -> Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: - """ - Converts a JSON-serializable dictionary back to a dictionary of CameraMetadata. - :param json_dict: JSON-serializable dictionary. - :return: Dictionary of CameraMetadata. - """ - camera_metadata_dict = json.loads(json_dict) - out: Dict[CameraType, Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} - for camera_type, metadata in camera_metadata_dict.items(): - cam_type = CameraType.deserialize(camera_type) - if isinstance(metadata, dict) and "mirror_parameters" in metadata: - out[cam_type] = FisheyeMEICameraMetadata.from_dict(metadata) - else: - out[cam_type] = PinholeCameraMetadata.from_dict(metadata) - return out - -@dataclass -class Camera: - - metadata: PinholeCameraMetadata - image: npt.NDArray[np.uint8] - extrinsic: npt.NDArray[np.float64] # 4x4 matrix - - def get_view_matrix(self) -> np.ndarray: - # Compute the view matrix based on the camera's position and orientation - pass diff --git a/d123/datatypes/sensors/camera/fisheye_mei_camera.py b/d123/datatypes/sensors/camera/fisheye_mei_camera.py new file mode 100644 index 00000000..061ef6b1 --- /dev/null +++ b/d123/datatypes/sensors/camera/fisheye_mei_camera.py @@ -0,0 +1,189 @@ +from __future__ import annotations + +from dataclasses import asdict, dataclass +from typing import Any, Dict, Optional + +import numpy as np +import numpy.typing as npt +from zmq import IntEnum + +from d123.common.utils.enums import SerialIntEnum +from d123.common.utils.mixin import ArrayMixin +from d123.geometry.se import StateSE3 + +#option TODO merge FisheyeMEICameraType and PinholeCameraType +class FisheyeMEICameraType(SerialIntEnum): + """ + Enum for fisheye cameras in d123. + """ + #NOTE Use higher values to avoid conflicts with PinholeCameraType + CAM_L = 10 + CAM_R = 11 + + +@dataclass +class FisheyeMEICamera: + + metadata: FisheyeMEICameraMetadata + image: npt.NDArray[np.uint8] + extrinsic: StateSE3 + + +class FisheyeMEIDistortionIndex(IntEnum): + + K1 = 0 + K2 = 1 + P1 = 2 + P2 = 3 + + +class FisheyeMEIDistortion(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, k1: float, k2: float, p1: float, p2: float) -> None: + array = np.zeros(len(FisheyeMEIDistortionIndex), dtype=np.float64) + array[FisheyeMEIDistortionIndex.K1] = k1 + array[FisheyeMEIDistortionIndex.K2] = k2 + array[FisheyeMEIDistortionIndex.P1] = p1 + array[FisheyeMEIDistortionIndex.P2] = p2 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIDistortion: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIDistortionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def k1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K1] + + @property + def k2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.K2] + + @property + def p1(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P1] + + @property + def p2(self) -> float: + return self._array[FisheyeMEIDistortionIndex.P2] + + +class FisheyeMEIProjectionIndex(IntEnum): + + GAMMA1 = 0 + GAMMA2 = 1 + U0 = 2 + V0 = 3 + + +class FisheyeMEIProjection(ArrayMixin): + _array: npt.NDArray[np.float64] + + def __init__(self, gamma1: float, gamma2: float, u0: float, v0: float) -> None: + array = np.zeros(len(FisheyeMEIProjectionIndex), dtype=np.float64) + array[FisheyeMEIProjectionIndex.GAMMA1] = gamma1 + array[FisheyeMEIProjectionIndex.GAMMA2] = gamma2 + array[FisheyeMEIProjectionIndex.U0] = u0 + array[FisheyeMEIProjectionIndex.V0] = v0 + object.__setattr__(self, "_array", array) + + @classmethod + def from_array(cls, array: npt.NDArray[np.float64], copy: bool = True) -> FisheyeMEIProjection: + assert array.ndim == 1 + assert array.shape[-1] == len(FisheyeMEIProjectionIndex) + instance = object.__new__(cls) + object.__setattr__(instance, "_array", array.copy() if copy else array) + return instance + + @property + def array(self) -> npt.NDArray[np.float64]: + return self._array + + @property + def gamma1(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA1] + + @property + def gamma2(self) -> float: + return self._array[FisheyeMEIProjectionIndex.GAMMA2] + + @property + def u0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.U0] + + @property + def v0(self) -> float: + return self._array[FisheyeMEIProjectionIndex.V0] + + +@dataclass +class FisheyeMEICameraMetadata: + + camera_type: FisheyeMEICameraType + mirror_parameter: Optional[float] + distortion: Optional[FisheyeMEIDistortion] + projection: Optional[FisheyeMEIProjection] + width: int + height: int + + @classmethod + def from_dict(cls, data_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: + data_dict["camera_type"] = FisheyeMEICameraType(data_dict["camera_type"]) + data_dict["distortion"] = ( + FisheyeMEIDistortion.from_array(np.array(data_dict["distortion"])) if data_dict["distortion"] is not None else None + ) + data_dict["projection"] = ( + FisheyeMEIProjection.from_array(np.array(data_dict["projection"])) if data_dict["projection"] is not None else None + ) + return FisheyeMEICameraMetadata(**data_dict) + + def to_dict(self) -> Dict[str, Any]: + data_dict = asdict(self) + data_dict["camera_type"] = int(self.camera_type) + data_dict["distortion"] = self.distortion.array.tolist() if self.distortion is not None else None + data_dict["projection"] = self.projection.array.tolist() if self.projection is not None else None + return data_dict + + def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: + ''' camera coordinate to image plane ''' + norm = np.linalg.norm(points_3d, axis=1) + + x = points_3d[:,0] / norm + y = points_3d[:,1] / norm + z = points_3d[:,2] / norm + + x /= z+self.mirror_parameter + y /= z+self.mirror_parameter + + if self.distortion is not None: + k1 = self.distortion.k1 + k2 = self.distortion.k2 + else: + k1 = k2 = 0.0 + + if self.projection is not None: + gamma1 = self.projection.gamma1 + gamma2 = self.projection.gamma2 + u0 = self.projection.u0 + v0 = self.projection.v0 + else: + gamma1 = gamma2 = 1.0 + u0 = v0 = 0.0 + + ro2 = x*x + y*y + x *= 1 + k1*ro2 + k2*ro2*ro2 + y *= 1 + k1*ro2 + k2*ro2*ro2 + + x = gamma1*x + u0 + y = gamma2*y + v0 + + return x, y, norm * points_3d[:,2] / np.abs(points_3d[:,2]) diff --git a/d123/datatypes/sensors/camera/utils.py b/d123/datatypes/sensors/camera/utils.py new file mode 100644 index 00000000..9e281e55 --- /dev/null +++ b/d123/datatypes/sensors/camera/utils.py @@ -0,0 +1,36 @@ +from typing import Union + +from d123.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType + +def get_camera_type_by_value(value: int) -> Union[PinholeCameraType, FisheyeMEICameraType]: + """Dynamically determine camera type based on value range.""" + pinhole_values = [member.value for member in PinholeCameraType] + fisheye_values = [member.value for member in FisheyeMEICameraType] + + if value in pinhole_values: + return PinholeCameraType(value) + elif value in fisheye_values: + return FisheyeMEICameraType(value) + else: + raise ValueError(f"Invalid camera type value: {value}. " + f"Valid PinholeCameraType values: {pinhole_values}, " + f"Valid FisheyeMEICameraType values: {fisheye_values}") + +def deserialize_camera_type(camera_str: str) -> Union[PinholeCameraType, FisheyeMEICameraType]: + """Deserialize camera type string to appropriate enum.""" + try: + return PinholeCameraType.deserialize(camera_str) + except (ValueError, KeyError): + pass + + try: + return FisheyeMEICameraType.deserialize(camera_str) + except (ValueError, KeyError): + pass + + pinhole_names = [member.name.lower() for member in PinholeCameraType] + fisheye_names = [member.name.lower() for member in FisheyeMEICameraType] + raise ValueError(f"Unknown camera type: '{camera_str}'. " + f"Valid PinholeCameraType names: {pinhole_names}, " + f"Valid FisheyeMEICameraType names: {fisheye_names}") \ No newline at end of file diff --git a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml index 0b4b55f1..19bdd168 100644 --- a/d123/script/config/dataset_conversion/default_dataset_conversion.yaml +++ b/d123/script/config/dataset_conversion/default_dataset_conversion.yaml @@ -14,13 +14,10 @@ defaults: - datasets: # - nuplan_private_dataset # - carla_dataset - - wopd_dataset + # - wopd_dataset # - av2_sensor_dataset -<<<<<<< HEAD - kitti360_dataset -======= - _self_ ->>>>>>> dev_v0.0.7 force_map_conversion: False force_log_conversion: True diff --git a/d123/script/config/datasets/kitti360_dataset.yaml b/d123/script/config/datasets/kitti360_dataset.yaml index c5816a29..be7567bf 100644 --- a/d123/script/config/datasets/kitti360_dataset.yaml +++ b/d123/script/config/datasets/kitti360_dataset.yaml @@ -12,5 +12,24 @@ kitti360_dataset: output_path: ${d123_data_root} force_log_conversion: ${force_log_conversion} force_map_conversion: ${force_map_conversion} + + # Ego + include_ego: true + + # Box Detections + include_box_detections: true + + # Traffic Lights + include_traffic_lights: false + + # Cameras + include_cameras: true camera_store_option: "path" + + # LiDARs + include_lidars: true lidar_store_option: "path" + + # Scenario tag / Route + include_scenario_tags: false + include_route: false diff --git a/d123/script/run_viser.py b/d123/script/run_viser.py index ed05d73f..a6d9c526 100644 --- a/d123/script/run_viser.py +++ b/d123/script/run_viser.py @@ -21,13 +21,8 @@ def main(cfg: DictConfig) -> None: scene_filter = build_scene_filter(cfg.scene_filter) scene_builder = build_scene_builder(cfg.scene_builder) scenes = scene_builder.get_scenes(scene_filter, worker=worker) -<<<<<<< HEAD - - ViserVisualizationServer(scenes=scenes) -======= ViserViewer(scenes=scenes) ->>>>>>> dev_v0.0.7 if __name__ == "__main__": From 24f4dc4528354937bd251c466bcfbabb59e90394 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 15 Oct 2025 15:47:12 +0800 Subject: [PATCH 23/32] merge dev_v0.0.7 into kitti360 to align codebase --- ...load_sensor.py => kitti360_load_sensor.py} | 0 .../kitti_360/kitti_360_data_converter.py | 251 ++++++++---------- .../datasets/kitti_360/kitti_360_helper.py | 2 +- .../kitti_360/kitti_360_map_conversion.py | 121 ++++++--- .../kitti_360/preprocess_detection.py | 4 +- .../scene/arrow/utils/arrow_getters.py | 2 +- .../datasets/kitti360_dataset.yaml | 9 +- 7 files changed, 203 insertions(+), 186 deletions(-) rename d123/conversion/datasets/kitti_360/{load_sensor.py => kitti360_load_sensor.py} (100%) rename d123/script/config/{ => conversion}/datasets/kitti360_dataset.yaml (73%) diff --git a/d123/conversion/datasets/kitti_360/load_sensor.py b/d123/conversion/datasets/kitti_360/kitti360_load_sensor.py similarity index 100% rename from d123/conversion/datasets/kitti_360/load_sensor.py rename to d123/conversion/datasets/kitti_360/kitti360_load_sensor.py diff --git a/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py b/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py index 69ed6f8b..833493bd 100644 --- a/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py @@ -13,7 +13,6 @@ import copy from collections import defaultdict import datetime -import hashlib import xml.etree.ElementTree as ET import pyarrow as pa from PIL import Image @@ -40,17 +39,24 @@ FisheyeMEIProjection, ) from d123.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType -from d123.datasets.utils.sensor.lidar_index_registry import Kitti360LidarIndex +from d123.conversion.utils.sensor_utils.lidar_index_registry import Kitti360LidarIndex from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index from d123.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table -from d123.datasets.raw_data_converter import DataConverterConfig, RawDataConverter -from d123.datasets.utils.arrow_ipc_writer import ArrowLogWriter +from d123.common.utils.uuid import create_deterministic_uuid +from d123.conversion.abstract_dataset_converter import AbstractDatasetConverter +from d123.conversion.dataset_converter_config import DatasetConverterConfig +from d123.conversion.log_writer.abstract_log_writer import AbstractLogWriter +from d123.conversion.log_writer.arrow_log_writer import ArrowLogWriter +from d123.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from d123.datatypes.maps.map_metadata import MapMetadata from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT -from d123.datasets.kitti_360.kitti_360_map_conversion import convert_kitti360_map +from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic +from d123.conversion.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.conversion.datasets.kitti_360.kitti_360_map_conversion import ( + convert_kitti360_map_with_writer +) from d123.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex from d123.geometry.rotation import EulerAngles @@ -91,26 +97,38 @@ } D123_DEVKIT_ROOT = Path(os.environ["D123_DEVKIT_ROOT"]) -PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "d123" / "dataset" / "dataset_specific" / "kitti_360" / "detection_preprocess" - -def create_token(input_data: str) -> str: - # TODO: Refactor this function. - # TODO: Add a general function to create tokens from arbitrary data. - if isinstance(input_data, str): - input_data = input_data.encode("utf-8") - - hash_obj = hashlib.sha256(input_data) - return hash_obj.hexdigest()[:16] +PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "d123" / "conversion" / "datasets" / "kitti_360" / "detection_preprocess" +def create_token(split: str, log_name: str, timestamp_us: int, misc: str = None) -> str: + """Create a deterministic UUID-based token for KITTI-360 data. + + :param split: The data split (e.g., "kitti360") + :param log_name: The name of the log without file extension + :param timestamp_us: The timestamp in microseconds + :param misc: Any additional information to include in the UUID, defaults to None + :return: The generated deterministic UUID as hex string + """ + uuid_obj = create_deterministic_uuid(split=split, log_name=log_name, timestamp_us=timestamp_us, misc=misc) + return uuid_obj.hex + +def get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: + return MapMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + map_has_z=True, + map_is_local=True, + ) -class Kitti360DataConverter(RawDataConverter): +class Kitti360DataConverter(AbstractDatasetConverter): def __init__( self, splits: List[str], log_path: Union[Path, str], - data_converter_config: DataConverterConfig, + dataset_converter_config: DatasetConverterConfig, ) -> None: - super().__init__(data_converter_config) + super().__init__(dataset_converter_config) for split in splits: assert ( split in self.get_available_splits() @@ -118,13 +136,17 @@ def __init__( self._splits: List[str] = splits self._log_path: Path = Path(log_path) - self._log_paths_per_split: Dict[str, List[Path]] = self._collect_log_paths() + self._log_paths_and_split: List[Tuple[Path, str]] = self._collect_log_paths() + + self._total_maps = len(self._log_paths_and_split) # Each log has its own map + self._total_logs = len(self._log_paths_and_split) - def _collect_log_paths(self) -> Dict[str, List[Path]]: + def _collect_log_paths(self) -> List[Tuple[Path, str]]: """ - Collect candidate sequence folders under data_2d_raw that end with '_sync', - and keep only those sequences that are present in ALL required modality roots - (e.g., data_2d_semantics, data_3d_raw, etc.). + Collect candidate sequence folders under data_2d_raw that end with '_sync', + and keep only those sequences that are present in ALL required modality roots + (e.g., data_2d_semantics, data_3d_raw, etc.). + Returns a list of (log_path, split) tuples. """ missing_roots = [str(p) for p in KITTI360_REQUIRED_MODALITY_ROOTS.values() if not p.exists()] if missing_roots: @@ -141,7 +163,7 @@ def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: else: return (root / seq_name).exists() - valid_seqs: List[Path] = [] + log_paths_and_split: List[Tuple[Path, str]] = [] for seq_dir in candidates: seq_name = seq_dir.name missing_modalities = [ @@ -150,115 +172,72 @@ def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: if not _has_modality(seq_name, modality_name, root) ] if not missing_modalities: - valid_seqs.append(seq_dir) #KITTI360_DATA_ROOT / DIR_2D_RAW /seq_name + log_paths_and_split.append((seq_dir, "kitti360")) else: logging.info( f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " f"Root: {KITTI360_DATA_ROOT}" ) - logging.info(f"vadid sequences found: {valid_seqs}") - return {"kitti360": valid_seqs} + + logging.info(f"Valid sequences found: {len(log_paths_and_split)}") + return log_paths_and_split def get_available_splits(self) -> List[str]: """Returns a list of available raw data types.""" return ["kitti360"] - - def convert_maps(self, worker: WorkerPool) -> None: - log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - worker_map( - worker, - partial( - convert_kitti360_map_to_gpkg, - data_converter_config=self.data_converter_config - ), - log_args, - ) - - def convert_logs(self, worker: WorkerPool) -> None: - log_args = [ - { - "log_path": log_path, - "split": split, - } - for split, log_paths in self._log_paths_per_split.items() - for log_path in log_paths - ] - - worker_map( - worker, - partial( - convert_kitti360_log_to_arrow, - data_converter_config=self.data_converter_config, - ), - log_args, - ) - -def convert_kitti360_map_to_gpkg( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - for log_info in args: - log_path: Path = log_info["log_path"] - split: str = log_info["split"] - log_name = log_path.stem - - D123_MAPS_ROOT = Path(os.environ.get("D123_MAPS_ROOT")) - map_path = D123_MAPS_ROOT / split / f"{log_name}.gpkg" - #map_path = data_converter_config.output_path / "maps" / split / f"{log_name}.gpkg" - map_path.parent.mkdir(parents=True, exist_ok=True) - if data_converter_config.force_map_conversion or not map_path.exists(): - map_path.unlink(missing_ok=True) - convert_kitti360_map(log_name, map_path) - return [] - -def convert_kitti360_log_to_arrow( - args: List[Dict[str, Union[List[str], List[Path]]]], data_converter_config: DataConverterConfig -) -> List[Any]: - for log_info in args: - log_path: Path = log_info["log_path"] - split: str = log_info["split"] - log_name = log_path.stem - - if not log_path.exists(): - raise FileNotFoundError(f"Log path {log_path} does not exist.") - log_file_path = data_converter_config.output_path / split / f"{log_name}.arrow" - - if data_converter_config.force_log_conversion or not log_file_path.exists(): - log_file_path.unlink(missing_ok=True) - if not log_file_path.parent.exists(): - log_file_path.parent.mkdir(parents=True, exist_ok=True) - - log_metadata = LogMetadata( - dataset="kitti360", - split=split, - log_name=log_name, - location=log_name, - timestep_seconds=KITTI360_DT, - vehicle_parameters=get_kitti360_station_wagon_parameters(), - camera_metadata=get_kitti360_camera_metadata(), - lidar_metadata=get_kitti360_lidar_metadata(), - map_has_z=True, - map_is_local=True, - ) - - log_writer = ArrowLogWriter( - log_path=log_file_path, - data_converter_config=data_converter_config, - log_metadata=log_metadata, - ) - - _write_recording_table(log_name, log_writer, log_file_path, data_converter_config) - - gc.collect() - return [] - + def get_number_of_maps(self) -> int: + """Returns the number of available raw data maps for conversion.""" + return self._total_maps + + def get_number_of_logs(self) -> int: + """Returns the number of available raw data logs for conversion.""" + return self._total_logs + + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: + """ + Convert a single map in raw data format to the uniform 123D format. + :param map_index: The index of the map to convert. + :param map_writer: The map writer to use for writing the converted map. + """ + source_log_path, split = self._log_paths_and_split[map_index] + log_name = source_log_path.stem + + map_metadata = get_kitti360_map_metadata(split, log_name) + + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) + if map_needs_writing: + convert_kitti360_map_with_writer(log_name, map_writer) + + map_writer.close() + + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: + """ + Convert a single log in raw data format to the uniform 123D format. + :param log_index: The index of the log to convert. + :param log_writer: The log writer to use for writing the converted log. + """ + source_log_path, split = self._log_paths_and_split[log_index] + log_name = source_log_path.stem + + # Create log metadata + log_metadata = LogMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + timestep_seconds=KITTI360_DT, + vehicle_parameters=get_kitti360_station_wagon_parameters(), + camera_metadata=get_kitti360_camera_metadata(), + lidar_metadata=get_kitti360_lidar_metadata(), + map_metadata=get_kitti360_map_metadata(split, log_name) + ) + + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + if log_needs_writing: + _write_recording_table(log_name, log_writer, self.dataset_converter_config) + + log_writer.close() def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: @@ -359,9 +338,8 @@ def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: def _write_recording_table( log_name: str, - log_writer: ArrowLogWriter, - log_file_path: Path, - data_converter_config: DataConverterConfig + log_writer: AbstractLogWriter, + data_converter_config: DatasetConverterConfig ) -> None: ts_list: List[TimePoint] = _read_timestamps(log_name) @@ -375,8 +353,7 @@ def _write_recording_table( cameras = _extract_cameras(log_name, valid_idx, data_converter_config) lidars = _extract_lidar(log_name, valid_idx, data_converter_config) - log_writer.add_row( - token=create_token(f"{log_name}_{idx}"), + log_writer.write( timestamp=ts_list[valid_idx], ego_state=ego_state_all[idx], box_detections=box_detection_wrapper_all[valid_idx], @@ -387,12 +364,10 @@ def _write_recording_table( route_lane_group_ids=None, ) - log_writer.close() - - if SORT_BY_TIMESTAMP: - recording_table = open_arrow_table(log_file_path) - recording_table = recording_table.sort_by([("timestamp", "ascending")]) - write_arrow_table(recording_table, log_file_path) + # if SORT_BY_TIMESTAMP: + # recording_table = open_arrow_table(log_file_path) + # recording_table = recording_table.sort_by([("timestamp", "ascending")]) + # write_arrow_table(recording_table, log_file_path) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: """ @@ -627,7 +602,7 @@ def _extract_detections( box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) return box_detection_wrapper_all -def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverterConfig) -> Dict[LiDARType, Optional[str]]: +def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConverterConfig) -> Dict[LiDARType, Optional[str]]: #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: @@ -645,7 +620,7 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DataConverter return {LiDARType.LIDAR_TOP: lidar} def _extract_cameras( - log_name: str, idx: int, data_converter_config: DataConverterConfig + log_name: str, idx: int, data_converter_config: DatasetConverterConfig ) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: camera_dict: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]] = {} diff --git a/d123/conversion/datasets/kitti_360/kitti_360_helper.py b/d123/conversion/datasets/kitti_360/kitti_360_helper.py index 01c3d1fe..608e4352 100644 --- a/d123/conversion/datasets/kitti_360/kitti_360_helper.py +++ b/d123/conversion/datasets/kitti_360/kitti_360_helper.py @@ -9,7 +9,7 @@ from d123.geometry import BoundingBoxSE3, StateSE3 from d123.geometry.polyline import Polyline3D from d123.geometry.rotation import EulerAngles -from d123.datasets.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.conversion.datasets.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT import os from pathlib import Path diff --git a/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py b/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py index 643a13c6..17f047dc 100644 --- a/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py +++ b/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py @@ -11,14 +11,20 @@ from shapely.geometry import LineString import shapely.geometry as geom -from d123.datasets.utils.maps.road_edge.road_edge_2d_utils import ( +from d123.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( get_road_edge_linear_rings, split_line_geometry_by_max_length, ) -from d123.datatypes.maps.gpkg.utils import get_all_rows_with_value, get_row_with_value +from d123.datatypes.maps.gpkg.gpkg_utils import get_all_rows_with_value, get_row_with_value from d123.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType from d123.geometry.polyline import Polyline3D -from d123.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D +from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D +from d123.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from d123.datatypes.maps.cache.cache_map_objects import ( + CacheGenericDrivable, + CacheWalkway, + CacheRoadEdge, +) MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges @@ -28,7 +34,7 @@ PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX -KIITI360_MAP_BBOX = [ +KITTI360_MAP_BBOX = [ "road", "sidewalk", # "railtrack", @@ -36,39 +42,6 @@ # "driveway", ] -def convert_kitti360_map(log_name: str, map_path: Path) -> None: - - xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" - - if not xml_path.exists(): - raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") - - tree = ET.parse(xml_path) - root = tree.getroot() - objs: List[KITTI360_MAP_Bbox3D] = [] - for child in root: - label = child.find('label').text - if child.find("transform") is None or label not in KIITI360_MAP_BBOX: - continue - obj = KITTI360_MAP_Bbox3D() - obj.parseBbox(child) - objs.append(obj) - - dataframes: Dict[MapLayer, gpd.GeoDataFrame] = {} - dataframes[MapLayer.LANE] = _get_none_data() - dataframes[MapLayer.LANE_GROUP] = _get_none_data() - dataframes[MapLayer.INTERSECTION] = _get_none_data() - dataframes[MapLayer.CROSSWALK] = _get_none_data() - dataframes[MapLayer.WALKWAY] = _extract_walkway_df(objs) - dataframes[MapLayer.CARPARK] = _get_none_data() - dataframes[MapLayer.GENERIC_DRIVABLE] = _extract_generic_drivable_df(objs) - dataframes[MapLayer.ROAD_EDGE] = _extract_road_edge_df(objs) - dataframes[MapLayer.ROAD_LINE] = _get_none_data() - - map_file_name = map_path - for layer, gdf in dataframes.items(): - gdf.to_file(map_file_name, layer=layer.serialize(), driver="GPKG", mode="a") - def _get_none_data() -> gpd.GeoDataFrame: ids = [] geometries = [] @@ -85,7 +58,7 @@ def _extract_generic_drivable_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoData continue ids.append(obj.id) outlines.append(obj.vertices.linestring) - geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + geometries.append(geom.Polygon(obj.vertices.array[:, :3])) data = pd.DataFrame({"id": ids, "outline": outlines}) gdf = gpd.GeoDataFrame(data, geometry=geometries) return gdf @@ -99,7 +72,7 @@ def _extract_walkway_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: continue ids.append(obj.id) outlines.append(obj.vertices.linestring) - geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + geometries.append(geom.Polygon(obj.vertices.array[:, :3])) data = pd.DataFrame({"id": ids, "outline": outlines}) gdf = gpd.GeoDataFrame(data, geometry=geometries) @@ -110,7 +83,7 @@ def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: for obj in objs: if obj.label != "road": continue - geometries.append(geom.Polygon(obj.vertices.array[:, :2])) + geometries.append(geom.Polygon(obj.vertices.array[:, :3])) road_edge_linear_rings = get_road_edge_linear_rings(geometries) road_edges = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) @@ -122,4 +95,70 @@ def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: road_edge_types.append(int(RoadEdgeType.ROAD_EDGE_BOUNDARY)) data = pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}) - return gpd.GeoDataFrame(data, geometry=road_edges) \ No newline at end of file + return gpd.GeoDataFrame(data, geometry=road_edges) + + +def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWriter) -> None: + """ + Convert KITTI-360 map data using the provided map writer. + This function extracts map data from KITTI-360 XML files and writes them using the map writer interface. + + :param log_name: The name of the log to convert + :param map_writer: The map writer to use for writing the converted map + """ + xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + if not xml_path.exists(): + xml_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + + if not xml_path.exists(): + raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") + + tree = ET.parse(xml_path) + root = tree.getroot() + objs: List[KITTI360_MAP_Bbox3D] = [] + + for child in root: + label = child.find('label').text + if child.find("transform") is None or label not in KITTI360_MAP_BBOX: + continue + obj = KITTI360_MAP_Bbox3D() + obj.parseBbox(child) + objs.append(obj) + + + generic_drivable_gdf = _extract_generic_drivable_df(objs) + walkway_gdf = _extract_walkway_df(objs) + road_edge_gdf = _extract_road_edge_df(objs) + + for idx, row in generic_drivable_gdf.iterrows(): + if not row.geometry.is_empty: + map_writer.write_generic_drivable( + CacheGenericDrivable( + object_id=idx, + geometry=row.geometry + ) + ) + + for idx, row in walkway_gdf.iterrows(): + if not row.geometry.is_empty: + map_writer.write_walkway( + CacheWalkway( + object_id=idx, + geometry=row.geometry + ) + ) + + for idx, row in road_edge_gdf.iterrows(): + if not row.geometry.is_empty: + if hasattr(row.geometry, 'exterior'): + road_edge_line = row.geometry.exterior + else: + road_edge_line = row.geometry + + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edge_line) + ) + ) \ No newline at end of file diff --git a/d123/conversion/datasets/kitti_360/preprocess_detection.py b/d123/conversion/datasets/kitti_360/preprocess_detection.py index 92806736..18bbc125 100644 --- a/d123/conversion/datasets/kitti_360/preprocess_detection.py +++ b/d123/conversion/datasets/kitti_360/preprocess_detection.py @@ -31,8 +31,8 @@ PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from d123.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic +from d123.conversion.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": diff --git a/d123/datatypes/scene/arrow/utils/arrow_getters.py b/d123/datatypes/scene/arrow/utils/arrow_getters.py index 03951f07..fb810af4 100644 --- a/d123/datatypes/scene/arrow/utils/arrow_getters.py +++ b/d123/datatypes/scene/arrow/utils/arrow_getters.py @@ -167,7 +167,7 @@ def get_lidar_from_arrow_table( elif log_metadata.dataset == "wopd": raise NotImplementedError elif log_metadata.dataset == "kitti360": - from d123.datasets.kitti_360.load_sensor import load_kitti360_lidar_from_path + from d123.conversion.datasets.kitti_360.kitti360_load_sensor import load_kitti360_lidar_from_path lidar = load_kitti360_lidar_from_path(full_lidar_path, lidar_metadata) else: diff --git a/d123/script/config/datasets/kitti360_dataset.yaml b/d123/script/config/conversion/datasets/kitti360_dataset.yaml similarity index 73% rename from d123/script/config/datasets/kitti360_dataset.yaml rename to d123/script/config/conversion/datasets/kitti360_dataset.yaml index be7567bf..c18c7ec3 100644 --- a/d123/script/config/datasets/kitti360_dataset.yaml +++ b/d123/script/config/conversion/datasets/kitti360_dataset.yaml @@ -1,17 +1,20 @@ kitti360_dataset: - _target_: d123.datasets.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _target_: d123.conversion.datasets.kitti_360.kitti_360_data_converter.Kitti360DataConverter _convert_: 'all' splits: ["kitti360"] log_path: ${oc.env:KITTI360_DATA_ROOT} - data_converter_config: - _target_: d123.datasets.raw_data_converter.DataConverterConfig + dataset_converter_config: + _target_: d123.conversion.dataset_converter_config.DatasetConverterConfig _convert_: 'all' output_path: ${d123_data_root} force_log_conversion: ${force_log_conversion} force_map_conversion: ${force_map_conversion} + + # Map + include_map: true # Ego include_ego: true From e189c65b1b5d59c12cb384a55a5e2d1109f7ef4c Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Thu, 16 Oct 2025 20:28:59 +0800 Subject: [PATCH 24/32] refactor kitti360 --- .../kitti_360/kitti_360_data_converter.py | 16 ++++------------ .../kitti_360/kitti_360_map_conversion.py | 9 ++------- d123/conversion/datasets/kitti_360/labels.py | 2 +- .../datasets/kitti_360/preprocess_detection.py | 2 +- 4 files changed, 8 insertions(+), 21 deletions(-) diff --git a/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py b/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py index 833493bd..5a3aea8c 100644 --- a/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/d123/conversion/datasets/kitti_360/kitti_360_data_converter.py @@ -1,21 +1,15 @@ -import gc -import json import os import re import yaml from dataclasses import asdict -from functools import partial from pathlib import Path from typing import Any, Dict, Final, List, Optional, Tuple, Union import numpy as np import pickle -import copy from collections import defaultdict import datetime import xml.etree.ElementTree as ET -import pyarrow as pa -from PIL import Image import logging from pyquaternion import Quaternion @@ -43,7 +37,6 @@ from d123.datatypes.time.time_point import TimePoint from d123.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index from d123.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 -from d123.common.utils.arrow_helper import open_arrow_table, write_arrow_table from d123.common.utils.uuid import create_deterministic_uuid from d123.conversion.abstract_dataset_converter import AbstractDatasetConverter from d123.conversion.dataset_converter_config import DatasetConverterConfig @@ -53,7 +46,7 @@ from d123.datatypes.maps.map_metadata import MapMetadata from d123.datatypes.scene.scene_metadata import LogMetadata from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.conversion.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT,kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.conversion.datasets.kitti_360.labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT from d123.conversion.datasets.kitti_360.kitti_360_map_conversion import ( convert_kitti360_map_with_writer ) @@ -61,7 +54,6 @@ from d123.geometry.rotation import EulerAngles KITTI360_DT: Final[float] = 0.1 -SORT_BY_TIMESTAMP: Final[bool] = True KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) @@ -519,7 +511,7 @@ def _extract_detections( else: lable = child.find('label').text name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') - if child.find('transform') is None or name not in KIITI360_DETECTION_NAME_DICT.keys(): + if child.find('transform') is None or name not in KITTI360_DETECTION_NAME_DICT.keys(): continue obj = KITTI360Bbox3D() obj.parseBbox(child) @@ -535,7 +527,7 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append(np.array([0.0, 0.0, 0.0])) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) + detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) else: global_ID = obj.globalID dynamic_objs[global_ID].append(obj) @@ -572,7 +564,7 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append(vel) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(KIITI360_DETECTION_NAME_DICT[obj.name]) + detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) box_detection_wrapper_all: List[BoxDetectionWrapper] = [] for frame in range(ts_len): diff --git a/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py b/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py index 17f047dc..23a9d944 100644 --- a/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py +++ b/d123/conversion/datasets/kitti_360/kitti_360_map_conversion.py @@ -1,22 +1,18 @@ import os -import warnings from pathlib import Path -from typing import Dict, List, Optional +from typing import List import geopandas as gpd import numpy as np import pandas as pd import xml.etree.ElementTree as ET -import pyogrio -from shapely.geometry import LineString import shapely.geometry as geom from d123.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( get_road_edge_linear_rings, split_line_geometry_by_max_length, ) -from d123.datatypes.maps.gpkg.gpkg_utils import get_all_rows_with_value, get_row_with_value -from d123.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType +from d123.datatypes.maps.map_datatypes import RoadEdgeType from d123.geometry.polyline import Polyline3D from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D from d123.conversion.map_writer.abstract_map_writer import AbstractMapWriter @@ -91,7 +87,6 @@ def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: road_edge_types = [] for idx in range(len(road_edges)): ids.append(idx) - # TODO @DanielDauner: Figure out if other types should/could be assigned here. road_edge_types.append(int(RoadEdgeType.ROAD_EDGE_BOUNDARY)) data = pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}) diff --git a/d123/conversion/datasets/kitti_360/labels.py b/d123/conversion/datasets/kitti_360/labels.py index 45e2d315..aae1c397 100644 --- a/d123/conversion/datasets/kitti_360/labels.py +++ b/d123/conversion/datasets/kitti_360/labels.py @@ -184,7 +184,7 @@ def assureSingleInstanceName( name ): "caravan": "caravan", } -KIITI360_DETECTION_NAME_DICT = { +KITTI360_DETECTION_NAME_DICT = { "traffic light": DetectionType.SIGN, "traffic sign": DetectionType.SIGN, "person": DetectionType.PEDESTRIAN, diff --git a/d123/conversion/datasets/kitti_360/preprocess_detection.py b/d123/conversion/datasets/kitti_360/preprocess_detection.py index 18bbc125..803d162c 100644 --- a/d123/conversion/datasets/kitti_360/preprocess_detection.py +++ b/d123/conversion/datasets/kitti_360/preprocess_detection.py @@ -32,7 +32,7 @@ PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES from d123.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from d123.conversion.datasets.kitti_360.labels import KIITI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from d123.conversion.datasets.kitti_360.labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": From 9b6473f2275a14b02298557d642fc03b228960da Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Thu, 16 Oct 2025 20:30:13 +0800 Subject: [PATCH 25/32] refactor kitti360 --- d123/conversion/datasets/kitti_360/preprocess_detection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/d123/conversion/datasets/kitti_360/preprocess_detection.py b/d123/conversion/datasets/kitti_360/preprocess_detection.py index 803d162c..99e84681 100644 --- a/d123/conversion/datasets/kitti_360/preprocess_detection.py +++ b/d123/conversion/datasets/kitti_360/preprocess_detection.py @@ -65,7 +65,7 @@ def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: lable = child.find('label').text name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') timestamp = int(child.find('timestamp').text) # -1 for static objects - if child.find("transform") is None or name not in KIITI360_DETECTION_NAME_DICT or timestamp != -1: + if child.find("transform") is None or name not in KITTI360_DETECTION_NAME_DICT or timestamp != -1: continue obj = KITTI360Bbox3D() obj.parseBbox(child) From 230c034bef4e9d905b47bad239a8c400d2817365 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 29 Oct 2025 10:33:03 +0800 Subject: [PATCH 26/32] merge dev_v0.0.7 into kitti360 --- .../kitti_360/kitti_360_data_converter.py | 35 ++++++++++--------- .../datasets/kitti_360/kitti_360_helper.py | 12 ++++--- .../{labels.py => kitti_360_labels.py} | 28 +++++++-------- ...oad_sensor.py => kitti_360_load_sensor.py} | 6 ++-- .../kitti_360/preprocess_detection.py | 2 +- .../scene/arrow/utils/arrow_getters.py | 30 +++++++++++----- .../config/common/default_dataset_paths.yaml | 3 ++ .../scene_builder/default_scene_builder.yaml | 5 ++- .../conversion/datasets/kitti360_dataset.yaml | 3 +- .../config/conversion/default_conversion.yaml | 2 +- .../visualization/viser/viser_config.py | 2 ++ .../visualization/viser/viser_viewer.py | 2 ++ 12 files changed, 79 insertions(+), 51 deletions(-) rename src/py123d/conversion/datasets/kitti_360/{labels.py => kitti_360_labels.py} (95%) rename src/py123d/conversion/datasets/kitti_360/{kitti360_load_sensor.py => kitti_360_load_sensor.py} (92%) diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py index 1443a37a..d8733e12 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py @@ -1,7 +1,6 @@ import os import re import yaml -from dataclasses import asdict from pathlib import Path from typing import Any, Dict, Final, List, Optional, Tuple, Union @@ -11,9 +10,6 @@ import datetime import xml.etree.ElementTree as ET import logging -from pyquaternion import Quaternion - -from py123d.common.multithreading.worker_utils import WorkerPool, worker_map from py123d.datatypes.detections.box_detections import ( BoxDetectionMetadata, @@ -32,25 +28,25 @@ FisheyeMEIDistortion, FisheyeMEIProjection, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType from py123d.conversion.utils.sensor_utils.lidar_index_registry import Kitti360LidarIndex from py123d.datatypes.time.time_point import TimePoint -from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3, EgoStateSE3Index +from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 from py123d.common.utils.uuid_utils import create_deterministic_uuid from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter -from py123d.conversion.log_writer.arrow_log_writer import ArrowLogWriter from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from py123d.conversion.datasets.kitti_360.labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from py123d.conversion.datasets.kitti_360.kitti_360_labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT from py123d.conversion.datasets.kitti_360.kitti_360_map_conversion import ( convert_kitti360_map_with_writer ) -from py123d.geometry import BoundingBoxSE3, BoundingBoxSE3Index, StateSE3, Vector3D, Vector3DIndex +from py123d.conversion.datasets.kitti_360.kitti_360_load_sensor import load_kitti360_lidar_from_path +from py123d.geometry import BoundingBoxSE3, StateSE3, Vector3D from py123d.geometry.rotation import EulerAngles KITTI360_DT: Final[float] = 0.1 @@ -88,8 +84,8 @@ DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } -D123_DEVKIT_ROOT = Path(os.environ["D123_DEVKIT_ROOT"]) -PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "d123" / "conversion" / "datasets" / "kitti_360" / "detection_preprocess" +D123_DEVKIT_ROOT = Path(os.environ["PY123D_DEVKIT_ROOT"]) +PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "src" / "py123d" / "conversion" / "datasets" / "kitti_360" / "detection_preprocess" def create_token(split: str, log_name: str, timestamp_us: int, misc: str = None) -> str: """Create a deterministic UUID-based token for KITTI-360 data. @@ -117,7 +113,7 @@ class Kitti360DataConverter(AbstractDatasetConverter): def __init__( self, splits: List[str], - log_path: Union[Path, str], + kitti360_data_root: Union[Path, str], dataset_converter_config: DatasetConverterConfig, ) -> None: super().__init__(dataset_converter_config) @@ -127,7 +123,7 @@ def __init__( ), f"Split {split} is not available. Available splits: {self.available_splits}" self._splits: List[str] = splits - self._log_path: Path = Path(log_path) + self._log_path: Path = Path(kitti360_data_root) self._log_paths_and_split: List[Tuple[Path, str]] = self._collect_log_paths() self._total_maps = len(self._log_paths_and_split) # Each log has its own map @@ -430,9 +426,8 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] - yaw, pitch, roll = Quaternion(matrix=R_mat_cali[:3, :3]).yaw_pitch_roll - ego_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion + ego_quaternion = EulerAngles.from_rotation_matrix(R_mat_cali).quaternion rear_axle_pose = StateSE3( x=poses[pos, 4], y=poses[pos, 8], @@ -578,7 +573,7 @@ def _extract_detections( if state is None: break detection_metadata = BoxDetectionMetadata( - detection_type=detection_type, + box_detection_type=detection_type, timepoint=None, track_token=token, confidence=None, @@ -606,7 +601,13 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConver if data_converter_config.lidar_store_option == "path": lidar = f"data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" elif data_converter_config.lidar_store_option == "binary": - raise NotImplementedError("Binary lidar storage is not implemented.") + temp_metadata = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LidarIndex, + extrinsic=StateSE3.from_transformation_matrix(get_lidar_extrinsic()), + ) + lidar_pc: LiDAR = load_kitti360_lidar_from_path(lidar_full_path, temp_metadata) + lidar = lidar_pc.point_cloud else: raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") return {LiDARType.LIDAR_TOP: lidar} diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py index 85bc7ecc..8486329c 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py @@ -1,15 +1,14 @@ import numpy as np from collections import defaultdict -from typing import Dict, Optional, Any, List, Tuple +from typing import Dict, Any, List, Tuple import copy from scipy.linalg import polar -from scipy.spatial.transform import Rotation as R from py123d.geometry import BoundingBoxSE3, StateSE3 from py123d.geometry.polyline import Polyline3D from py123d.geometry.rotation import EulerAngles -from py123d.conversion.datasets.kitti_360.labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT +from py123d.conversion.datasets.kitti_360.kitti_360_labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT import os from pathlib import Path @@ -124,8 +123,11 @@ def parse_scale_rotation(self): if np.linalg.det(Rm) < 0: Rm[0] = -Rm[0] scale = np.diag(Sm) - yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) - obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion + # yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) + euler_angles = EulerAngles.from_rotation_matrix(Rm) + yaw,pitch,roll = euler_angles.yaw, euler_angles.pitch, euler_angles.roll + obj_quaternion = euler_angles.quaternion + # obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion self.Rm = np.array(Rm) self.Sm = np.array(Sm) diff --git a/src/py123d/conversion/datasets/kitti_360/labels.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py similarity index 95% rename from src/py123d/conversion/datasets/kitti_360/labels.py rename to src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py index a7428706..7a58b113 100644 --- a/src/py123d/conversion/datasets/kitti_360/labels.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py @@ -167,7 +167,7 @@ def assureSingleInstanceName( name ): # all good then return name -from py123d.datatypes.detections.detection_types import DetectionType +from py123d.datatypes.detections.box_detection_types import BoxDetectionType BBOX_LABLES_TO_DETECTION_NAME_DICT = { 'car': 'car', @@ -185,19 +185,19 @@ def assureSingleInstanceName( name ): } KITTI360_DETECTION_NAME_DICT = { - "traffic light": DetectionType.SIGN, - "traffic sign": DetectionType.SIGN, - "person": DetectionType.PEDESTRIAN, - "rider": DetectionType.BICYCLE, - "car": DetectionType.VEHICLE, - "truck": DetectionType.VEHICLE, - "bus": DetectionType.VEHICLE, - "caravan": DetectionType.VEHICLE, - "trailer": DetectionType.VEHICLE, - "train": DetectionType.VEHICLE, - "motorcycle": DetectionType.BICYCLE, - "bicycle": DetectionType.BICYCLE, - "stop": DetectionType.SIGN, + "traffic light": BoxDetectionType.SIGN, + "traffic sign": BoxDetectionType.SIGN, + "person": BoxDetectionType.PEDESTRIAN, + "rider": BoxDetectionType.BICYCLE, + "car": BoxDetectionType.VEHICLE, + "truck": BoxDetectionType.VEHICLE, + "bus": BoxDetectionType.VEHICLE, + "caravan": BoxDetectionType.VEHICLE, + "trailer": BoxDetectionType.VEHICLE, + "train": BoxDetectionType.VEHICLE, + "motorcycle": BoxDetectionType.BICYCLE, + "bicycle": BoxDetectionType.BICYCLE, + "stop": BoxDetectionType.SIGN, } # KIITI360_DETECTION_NAME_DICT = { diff --git a/src/py123d/conversion/datasets/kitti_360/kitti360_load_sensor.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py similarity index 92% rename from src/py123d/conversion/datasets/kitti_360/kitti360_load_sensor.py rename to src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py index f9a5b6fe..6d021df4 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti360_load_sensor.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py @@ -9,7 +9,7 @@ def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: if not filepath.exists(): logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") - return LiDAR(metadata=lidar_metadata, point_cloud=np.zeros((4, 0), dtype=np.float32)) + return LiDAR(metadata=lidar_metadata, point_cloud=np.zeros((1, 4), dtype=np.float32)) pcd = np.fromfile(filepath, dtype=np.float32) pcd = np.reshape(pcd,[-1,4]) # [N,4] @@ -28,4 +28,6 @@ def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) point_cloud_4xN = np.vstack([transformed_xyz, intensity_row]).astype(np.float32) # (4,N) - return LiDAR(metadata=lidar_metadata, point_cloud=point_cloud_4xN) + point_cloud_Nx4 = point_cloud_4xN.T # (N,4) + + return LiDAR(metadata=lidar_metadata, point_cloud=point_cloud_Nx4) diff --git a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py b/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py index 06e47379..2f959b06 100644 --- a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py +++ b/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py @@ -32,7 +32,7 @@ PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from py123d.conversion.datasets.kitti_360.labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from py123d.conversion.datasets.kitti_360.kitti_360_labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py index 440d12fa..215cb95f 100644 --- a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union import cv2 import numpy as np @@ -21,6 +21,7 @@ ) from py123d.datatypes.scene.scene_metadata import LogMetadata from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType +from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from py123d.datatypes.sensors.lidar.lidar_index import DefaultLidarIndex from py123d.datatypes.time.time_point import TimePoint @@ -35,6 +36,7 @@ "av2-sensor": DATASET_PATHS.av2_sensor_data_root, "wopd": DATASET_PATHS.wopd_data_root, "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, } @@ -102,9 +104,9 @@ def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: def get_camera_from_arrow_table( arrow_table: pa.Table, index: int, - camera_type: PinholeCameraType, + camera_type: Union[PinholeCameraType, FisheyeMEICameraType], log_metadata: LogMetadata, -) -> PinholeCamera: +) -> Union[PinholeCamera, FisheyeMEICamera]: camera_name = camera_type.serialize() table_data = arrow_table[f"{camera_name}_data"][index].as_py() @@ -131,11 +133,19 @@ def get_camera_from_arrow_table( else: raise NotImplementedError("Only string file paths for camera data are supported.") - return PinholeCamera( - metadata=log_metadata.camera_metadata[camera_type], - image=image, - extrinsic=extrinsic, - ) + camera_metadata = log_metadata.camera_metadata[camera_type] + if hasattr(camera_metadata, 'mirror_parameter') and camera_metadata.mirror_parameter is not None: + return FisheyeMEICamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) + else: + return PinholeCamera( + metadata=camera_metadata, + image=image, + extrinsic=extrinsic, + ) def get_lidar_from_arrow_table( @@ -195,6 +205,10 @@ def get_lidar_from_arrow_table( lidar_type in lidar_pc_dict ), f"LiDAR type {lidar_type} not found in Pandaset data at {full_lidar_path}." lidar = LiDAR(metadata=lidar_metadata, point_cloud=lidar_pc_dict[lidar_type]) + elif log_metadata.dataset == "kitti360": + from py123d.conversion.datasets.kitti_360.kitti_360_load_sensor import load_kitti360_lidar_from_path + + lidar = load_kitti360_lidar_from_path(full_lidar_path, lidar_metadata) else: raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") diff --git a/src/py123d/script/config/common/default_dataset_paths.yaml b/src/py123d/script/config/common/default_dataset_paths.yaml index ded971a6..6a925b06 100644 --- a/src/py123d/script/config/common/default_dataset_paths.yaml +++ b/src/py123d/script/config/common/default_dataset_paths.yaml @@ -21,3 +21,6 @@ dataset_paths: # Pandaset defaults pandaset_data_root: ${oc.env:PANDASET_DATA_ROOT,null} + + # KITTI360 defaults + kitti360_data_root: ${oc.env:KITTI360_DATA_ROOT,null} diff --git a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml index 77445192..1fadc982 100644 --- a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml +++ b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml @@ -1,4 +1,7 @@ _target_: py123d.datatypes.scene.arrow.arrow_scene_builder.ArrowSceneBuilder _convert_: 'all' -dataset_path: ${dataset_paths.py123d_data_root} +# dataset_path: ${dataset_paths.py123d_data_root} +logs_root: ${dataset_paths.py123d_logs_root} +maps_root: ${dataset_paths.py123d_maps_root} + diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml index 2096c991..77cea31c 100644 --- a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -3,13 +3,12 @@ kitti360_dataset: _convert_: 'all' splits: ["kitti360"] - log_path: ${oc.env:KITTI360_DATA_ROOT} + kitti360_data_root: ${dataset_paths.kitti360_data_root} dataset_converter_config: _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig _convert_: 'all' - output_path: ${d123_data_root} force_log_conversion: ${force_log_conversion} force_map_conversion: ${force_map_conversion} diff --git a/src/py123d/script/config/conversion/default_conversion.yaml b/src/py123d/script/config/conversion/default_conversion.yaml index daa55f12..4adf788b 100644 --- a/src/py123d/script/config/conversion/default_conversion.yaml +++ b/src/py123d/script/config/conversion/default_conversion.yaml @@ -16,7 +16,7 @@ defaults: - log_writer: arrow_log_writer - map_writer: gpkg_map_writer - datasets: - - ??? + - kitti360_dataset - _self_ diff --git a/src/py123d/visualization/viser/viser_config.py b/src/py123d/visualization/viser/viser_config.py index 330e4504..d4233f5e 100644 --- a/src/py123d/visualization/viser/viser_config.py +++ b/src/py123d/visualization/viser/viser_config.py @@ -14,6 +14,8 @@ PinholeCameraType.CAM_R0, PinholeCameraType.CAM_R1, PinholeCameraType.CAM_R2, + PinholeCameraType.CAM_STEREO_L, + PinholeCameraType.CAM_STEREO_R, ] all_lidar_types: List[LiDARType] = [ diff --git a/src/py123d/visualization/viser/viser_viewer.py b/src/py123d/visualization/viser/viser_viewer.py index b27d6853..dcff0fc7 100644 --- a/src/py123d/visualization/viser/viser_viewer.py +++ b/src/py123d/visualization/viser/viser_viewer.py @@ -31,6 +31,8 @@ PinholeCameraType.CAM_R0, PinholeCameraType.CAM_R1, PinholeCameraType.CAM_R2, + PinholeCameraType.CAM_STEREO_L, + PinholeCameraType.CAM_STEREO_R, ] all_lidar_types: List[LiDARType] = [ From 7e12332a58801b6b3f7f926155740d981e936194 Mon Sep 17 00:00:00 2001 From: jbwang <1159270049@qq.com> Date: Wed, 29 Oct 2025 13:33:21 +0800 Subject: [PATCH 27/32] merge dev_v0.0.7 into kitti360 (lidar related) --- .../scene/arrow/utils/arrow_getters.py | 185 ------------------ .../kitti_360/kitti_360_data_converter.py | 47 ++--- ..._load_sensor.py => kitti_360_sensor_io.py} | 15 +- .../sensor_io/lidar/file_lidar_io.py | 7 + .../datatypes/sensors/lidar/lidar_index.py | 8 + 5 files changed, 47 insertions(+), 215 deletions(-) delete mode 100644 d123/datatypes/scene/arrow/utils/arrow_getters.py rename src/py123d/conversion/datasets/kitti_360/{kitti_360_load_sensor.py => kitti_360_sensor_io.py} (61%) diff --git a/d123/datatypes/scene/arrow/utils/arrow_getters.py b/d123/datatypes/scene/arrow/utils/arrow_getters.py deleted file mode 100644 index fb810af4..00000000 --- a/d123/datatypes/scene/arrow/utils/arrow_getters.py +++ /dev/null @@ -1,185 +0,0 @@ -# TODO: rename this file and potentially move somewhere more appropriate. - -import os -from pathlib import Path -from typing import Dict, List, Optional, Union - -import cv2 -import numpy as np -import numpy.typing as npt -import pyarrow as pa - -from d123.datatypes.detections.detection import ( - BoxDetection, - BoxDetectionMetadata, - BoxDetectionSE3, - BoxDetectionWrapper, - TrafficLightDetection, - TrafficLightDetectionWrapper, - TrafficLightStatus, -) -from d123.datatypes.detections.detection_types import DetectionType -from d123.datatypes.scene.scene_metadata import LogMetadata -from d123.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from d123.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType -from d123.datatypes.sensors.lidar.lidar import LiDAR, LiDARType -from d123.datatypes.time.time_point import TimePoint -from d123.datatypes.vehicle_state.ego_state import EgoStateSE3 -from d123.datatypes.vehicle_state.vehicle_parameters import VehicleParameters -from d123.geometry import BoundingBoxSE3, StateSE3, Vector3D - -DATASET_SENSOR_ROOT: Dict[str, Path] = { - "nuplan": Path(os.environ["NUPLAN_DATA_ROOT"]) / "nuplan-v1.1" / "sensor_blobs", - "carla": Path(os.environ["CARLA_DATA_ROOT"]) / "sensor_blobs", - # "av2-sensor": Path(os.environ["AV2_SENSOR_DATA_ROOT"]) / "sensor", - "kitti360": Path(os.environ["KITTI360_DATA_ROOT"]), - # # "av2-sensor": Path(os.environ["AV2_SENSOR_DATA_ROOT"]) / "sensor_mini", -} - - -def get_timepoint_from_arrow_table(arrow_table: pa.Table, index: int) -> TimePoint: - return TimePoint.from_us(arrow_table["timestamp"][index].as_py()) - - -def get_ego_vehicle_state_from_arrow_table( - arrow_table: pa.Table, index: int, vehicle_parameters: VehicleParameters -) -> EgoStateSE3: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - return EgoStateSE3.from_array( - array=pa.array(arrow_table["ego_state"][index]).to_numpy(), - vehicle_parameters=vehicle_parameters, - timepoint=timepoint, - ) - - -def get_box_detections_from_arrow_table(arrow_table: pa.Table, index: int) -> BoxDetectionWrapper: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - box_detections: List[BoxDetection] = [] - - for detection_state, detection_velocity, detection_token, detection_type in zip( - arrow_table["box_detection_state"][index].as_py(), - arrow_table["box_detection_velocity"][index].as_py(), - arrow_table["box_detection_token"][index].as_py(), - arrow_table["box_detection_type"][index].as_py(), - ): - box_detection = BoxDetectionSE3( - metadata=BoxDetectionMetadata( - detection_type=DetectionType(detection_type), - timepoint=timepoint, - track_token=detection_token, - confidence=None, - ), - bounding_box_se3=BoundingBoxSE3.from_array(np.array(detection_state)), - velocity=Vector3D.from_array(np.array(detection_velocity)) if detection_velocity else None, - ) - box_detections.append(box_detection) - return BoxDetectionWrapper(box_detections=box_detections) - - -def get_traffic_light_detections_from_arrow_table(arrow_table: pa.Table, index: int) -> TrafficLightDetectionWrapper: - timepoint = get_timepoint_from_arrow_table(arrow_table, index) - traffic_light_detections: List[TrafficLightDetection] = [] - - for lane_id, status in zip( - arrow_table["traffic_light_ids"][index].as_py(), - arrow_table["traffic_light_types"][index].as_py(), - ): - traffic_light_detection = TrafficLightDetection( - timepoint=timepoint, - lane_id=lane_id, - status=TrafficLightStatus(status), - ) - traffic_light_detections.append(traffic_light_detection) - - return TrafficLightDetectionWrapper(traffic_light_detections=traffic_light_detections) - - -def get_camera_from_arrow_table( - arrow_table: pa.Table, - index: int, - camera_type: Union[PinholeCameraType, FisheyeMEICameraType], - log_metadata: LogMetadata, -) -> Union[PinholeCamera, FisheyeMEICamera]: - - camera_name = camera_type.serialize() - table_data = arrow_table[f"{camera_name}_data"][index].as_py() - extrinsic_values = arrow_table[f"{camera_name}_extrinsic"][index].as_py() - extrinsic = StateSE3.from_list(extrinsic_values) if extrinsic_values is not None else None - - if table_data is None or extrinsic is None: - return None - - image: Optional[npt.NDArray[np.uint8]] = None - - if isinstance(table_data, str): - sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] - full_image_path = sensor_root / table_data - assert full_image_path.exists(), f"Camera file not found: {full_image_path}" - image = cv2.imread(str(full_image_path), cv2.IMREAD_COLOR) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - elif isinstance(table_data, bytes): - image = cv2.imdecode(np.frombuffer(table_data, np.uint8), cv2.IMREAD_UNCHANGED) - image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) - else: - raise NotImplementedError("Only string file paths for camera data are supported.") - - camera_metadata = log_metadata.camera_metadata[camera_type] - if hasattr(camera_metadata, 'mirror_parameter') and camera_metadata.mirror_parameter is not None: - return FisheyeMEICamera( - metadata=camera_metadata, - image=image, - extrinsic=extrinsic, - ) - else: - return PinholeCamera( - metadata=camera_metadata, - image=image, - extrinsic=extrinsic, - ) - - -def get_lidar_from_arrow_table( - arrow_table: pa.Table, - index: int, - lidar_type: LiDARType, - log_metadata: LogMetadata, -) -> LiDAR: - assert ( - lidar_type.serialize() in arrow_table.schema.names - ), f'"{lidar_type.serialize()}" field not found in Arrow table schema.' - lidar_data = arrow_table[lidar_type.serialize()][index].as_py() - lidar_metadata = log_metadata.lidar_metadata[lidar_type] - - if isinstance(lidar_data, str): - sensor_root = DATASET_SENSOR_ROOT[log_metadata.dataset] - full_lidar_path = sensor_root / lidar_data - assert full_lidar_path.exists(), f"LiDAR file not found: {full_lidar_path}" - - # NOTE: We move data specific import into if-else block, to avoid data specific import errors - if log_metadata.dataset == "nuplan": - from d123.conversion.datasets.nuplan.nuplan_load_sensor import load_nuplan_lidar_from_path - - lidar = load_nuplan_lidar_from_path(full_lidar_path, lidar_metadata) - elif log_metadata.dataset == "carla": - from d123.conversion.datasets.carla.carla_load_sensor import load_carla_lidar_from_path - - lidar = load_carla_lidar_from_path(full_lidar_path, lidar_metadata) - elif log_metadata.dataset == "wopd": - raise NotImplementedError - elif log_metadata.dataset == "kitti360": - from d123.conversion.datasets.kitti_360.kitti360_load_sensor import load_kitti360_lidar_from_path - - lidar = load_kitti360_lidar_from_path(full_lidar_path, lidar_metadata) - else: - raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") - - else: - # FIXME: This is a temporary fix for WOPD dataset. The lidar data is stored as a flattened array of float32. - # Ideally the lidar index should handle the dimension. But for now we hardcode it here. - lidar_data = np.array(lidar_data, dtype=np.float32).reshape(-1, 3) - lidar_data = np.concatenate([np.zeros_like(lidar_data), lidar_data], axis=-1) - if log_metadata.dataset == "wopd": - lidar = LiDAR(metadata=lidar_metadata, point_cloud=lidar_data.T) - else: - raise NotImplementedError("Only string file paths for lidar data are supported.") - return lidar diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py index d8733e12..c29c8a13 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py @@ -29,14 +29,14 @@ FisheyeMEIProjection, ) from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType -from py123d.conversion.utils.sensor_utils.lidar_index_registry import Kitti360LidarIndex +from py123d.conversion.registry.lidar_index_registry import Kitti360LidarIndex from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 from py123d.common.utils.uuid_utils import create_deterministic_uuid from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig -from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata @@ -45,7 +45,6 @@ from py123d.conversion.datasets.kitti_360.kitti_360_map_conversion import ( convert_kitti360_map_with_writer ) -from py123d.conversion.datasets.kitti_360.kitti_360_load_sensor import load_kitti360_lidar_from_path from py123d.geometry import BoundingBoxSE3, StateSE3, Vector3D from py123d.geometry.rotation import EulerAngles @@ -589,28 +588,30 @@ def _extract_detections( box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) return box_detection_wrapper_all -def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConverterConfig) -> Dict[LiDARType, Optional[str]]: +def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConverterConfig) -> List[LiDARData]: - #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 - if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: - return {LiDARType.LIDAR_TOP: None} - - lidar: Optional[str] = None - lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" - if lidar_full_path.exists(): - if data_converter_config.lidar_store_option == "path": - lidar = f"data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" - elif data_converter_config.lidar_store_option == "binary": - temp_metadata = LiDARMetadata( - lidar_type=LiDARType.LIDAR_TOP, - lidar_index=Kitti360LidarIndex, - extrinsic=StateSE3.from_transformation_matrix(get_lidar_extrinsic()), + lidars: List[LiDARData] = [] + if data_converter_config.include_lidars: + #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 + if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: + return lidars + + lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" + if lidar_full_path.exists(): + relative_path = f"data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" + lidars.append( + LiDARData( + lidar_type=LiDARType.LIDAR_TOP, + timestamp=None, + iteration=idx, + dataset_root=PATH_3D_RAW_ROOT, + relative_path=relative_path, + ) ) - lidar_pc: LiDAR = load_kitti360_lidar_from_path(lidar_full_path, temp_metadata) - lidar = lidar_pc.point_cloud - else: - raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") - return {LiDARType.LIDAR_TOP: lidar} + else: + raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") + + return lidars def _extract_cameras( log_name: str, idx: int, data_converter_config: DatasetConverterConfig diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py similarity index 61% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py rename to src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py index 6d021df4..46318ea8 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_load_sensor.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py @@ -1,15 +1,15 @@ from pathlib import Path +from typing import Dict import numpy as np import logging +from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType +from py123d.conversion.datasets.kitti_360.kitti_360_helper import get_lidar_extrinsic -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata - - -def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) -> LiDAR: +def load_kitti360_lidar_pcs_from_file(filepath: Path) -> Dict[LiDARType, np.ndarray]: if not filepath.exists(): logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") - return LiDAR(metadata=lidar_metadata, point_cloud=np.zeros((1, 4), dtype=np.float32)) + return {LiDARType.LIDAR_TOP: np.zeros((1, 4), dtype=np.float32)} pcd = np.fromfile(filepath, dtype=np.float32) pcd = np.reshape(pcd,[-1,4]) # [N,4] @@ -20,7 +20,8 @@ def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) ones = np.ones((xyz.shape[0], 1), dtype=pcd.dtype) points_h = np.concatenate([xyz, ones], axis=1) #[N,4] - transformed_h = lidar_metadata.extrinsic.transformation_matrix @ points_h.T #[4,N] + transformed_h = get_lidar_extrinsic() @ points_h.T #[4,N] + # transformed_h = lidar_metadata.extrinsic.transformation_matrix @ points_h.T #[4,N] transformed_xyz = transformed_h[:3, :] # (3,N) @@ -30,4 +31,4 @@ def load_kitti360_lidar_from_path(filepath: Path, lidar_metadata: LiDARMetadata) point_cloud_Nx4 = point_cloud_4xN.T # (N,4) - return LiDAR(metadata=lidar_metadata, point_cloud=point_cloud_Nx4) + return {LiDARType.LIDAR_TOP: point_cloud_Nx4} diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py index edc5c7d5..0753c292 100644 --- a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -15,6 +15,7 @@ "av2-sensor": DATASET_PATHS.av2_sensor_data_root, "wopd": DATASET_PATHS.wopd_data_root, "pandaset": DATASET_PATHS.pandaset_data_root, + "kitti360": DATASET_PATHS.kitti360_data_root, } @@ -56,6 +57,12 @@ def load_lidar_pcs_from_file( from py123d.conversion.datasets.pandaset.pandaset_sensor_io import load_pandaset_lidars_pcs_from_file lidar_pcs_dict = load_pandaset_lidars_pcs_from_file(full_lidar_path, index) + + elif log_metadata.dataset == "kitti360": + from py123d.conversion.datasets.kitti_360.kitti_360_sensor_io import load_kitti360_lidar_pcs_from_file + + lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path) + else: raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") diff --git a/src/py123d/datatypes/sensors/lidar/lidar_index.py b/src/py123d/datatypes/sensors/lidar/lidar_index.py index 7684b685..c6322a5f 100644 --- a/src/py123d/datatypes/sensors/lidar/lidar_index.py +++ b/src/py123d/datatypes/sensors/lidar/lidar_index.py @@ -62,6 +62,14 @@ class WOPDLidarIndex(LiDARIndex): Z = 5 +@register_lidar_index +class Kitti360LidarIndex(LiDARIndex): + X = 0 + Y = 1 + Z = 2 + INTENSITY = 3 + + @register_lidar_index class AVSensorLidarIndex(LiDARIndex): """Argoverse Sensor LiDAR Indexing Scheme. From 867bd3ebbdfec692dc2ab23fc7687adeb3b118d8 Mon Sep 17 00:00:00 2001 From: Daniel Dauner Date: Sat, 1 Nov 2025 21:39:06 +0100 Subject: [PATCH 28/32] Reformatting, testing, modifying vehicle parameters, and changing the ego frame from imu to rear axle. --- notebooks/bev_matplotlib.ipynb | 27 +- scripts/conversion/kitti360_conversion.sh | 3 + scripts/download/download_kitti_360.sh | 86 +++++ .../conversion/datasets/kitti_360/.gitkeep | 0 .../kitti_360/kitti_360_data_converter.py | 354 ++++++++++-------- .../datasets/kitti_360/kitti_360_helper.py | 175 +++++---- .../datasets/kitti_360/kitti_360_labels.py | 208 +++++----- .../kitti_360/kitti_360_map_conversion.py | 56 ++- .../datasets/kitti_360/kitti_360_sensor_io.py | 45 +-- .../kitti_360/preprocess_detection.py | 74 ++-- .../conversion/log_writer/arrow_log_writer.py | 2 +- .../conversion/map_writer/gpkg_map_writer.py | 85 +++-- .../registry/lidar_index_registry.py | 2 + .../sensor_io/lidar/file_lidar_io.py | 12 +- .../datatypes/maps/abstract_map_objects.py | 3 +- src/py123d/datatypes/scene/abstract_scene.py | 6 +- .../datatypes/scene/arrow/arrow_scene.py | 14 +- .../scene/arrow/utils/arrow_getters.py | 4 +- src/py123d/datatypes/scene/scene_filter.py | 4 +- src/py123d/datatypes/scene/scene_metadata.py | 8 +- .../sensors/camera/fisheye_mei_camera.py | 38 +- src/py123d/datatypes/sensors/camera/utils.py | 26 +- .../vehicle_state/vehicle_parameters.py | 25 +- .../geometry/transform/transform_se3.py | 7 +- .../scene_builder/default_scene_builder.yaml | 1 - .../conversion/datasets/kitti360_dataset.yaml | 2 +- src/py123d/script/run_conversion.py | 7 + src/py123d/script/run_viser.py | 2 +- src/py123d/visualization/matplotlib/utils.py | 9 +- test_viser.py | 10 +- 30 files changed, 748 insertions(+), 547 deletions(-) create mode 100644 scripts/conversion/kitti360_conversion.sh create mode 100644 scripts/download/download_kitti_360.sh delete mode 100644 src/py123d/conversion/datasets/kitti_360/.gitkeep diff --git a/notebooks/bev_matplotlib.ipynb b/notebooks/bev_matplotlib.ipynb index 910bf63a..53fdcd15 100644 --- a/notebooks/bev_matplotlib.ipynb +++ b/notebooks/bev_matplotlib.ipynb @@ -25,23 +25,27 @@ "\n", "# splits = [\"wopd_val\"]\n", "# splits = [\"carla_test\"]\n", - "splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", "# splits = [\"av2-sensor-mini_train\"]\n", "# splits = [\"pandaset_train\"]\n", + "\n", "# log_names = None\n", "\n", + "from py123d.common.multithreading.worker_ray import RayDistributed\n", + "\n", "\n", + "splits = [\"kitti360\"]\n", "\n", - "log_names = None\n", + "log_names = [\"2013_05_28_drive_0000_sync\"]\n", "scene_uuids = None\n", "\n", "scene_filter = SceneFilter(\n", " split_names=splits,\n", " log_names=log_names,\n", " scene_uuids=scene_uuids,\n", - " duration_s=None,\n", + " duration_s=10.0,\n", " history_s=0.0,\n", - " timestamp_threshold_s=20,\n", + " timestamp_threshold_s=30,\n", " shuffle=True,\n", " # camera_types=[CameraType.CAM_F0],\n", ")\n", @@ -117,9 +121,9 @@ ")\n", "\n", "ROAD_EDGE_CONFIG: PlotConfig = PlotConfig(\n", - " fill_color=DARKER_GREY.set_brightness(0.0),\n", + " fill_color=DARKER_GREY,\n", " fill_color_alpha=1.0,\n", - " line_color=DARKER_GREY.set_brightness(0.0),\n", + " line_color=DARKER_GREY,\n", " line_color_alpha=1.0,\n", " line_width=1.0,\n", " line_style=\"-\",\n", @@ -148,10 +152,10 @@ " # MapLayer.LANE,\n", " MapLayer.LANE_GROUP,\n", " MapLayer.GENERIC_DRIVABLE,\n", - " # MapLayer.CARPARK,\n", + " MapLayer.CARPARK,\n", " # MapLayer.CROSSWALK,\n", " # MapLayer.INTERSECTION,\n", - " # MapLayer.WALKWAY,\n", + " MapLayer.WALKWAY,\n", " MapLayer.ROAD_EDGE,\n", " MapLayer.ROAD_LINE,\n", " ]\n", @@ -220,10 +224,10 @@ "\n", " point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d\n", " if map_api is not None:\n", - " # add_debug_map_on_ax(ax, scene.get_map_api(), point_2d, radius=radius, route_lane_group_ids=None)\n", + " add_debug_map_on_ax(ax, scene.get_map_api(), point_2d, radius=radius, route_lane_group_ids=None)\n", "\n", "\n", - " add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", + " # add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=None)\n", " # add_traffic_lights_to_ax(ax, traffic_light_detections, scene.get_map_api())\n", "\n", " add_box_detections_to_ax(ax, box_detections)\n", @@ -256,7 +260,8 @@ "scene = np.random.choice(scenes)\n", "_plot_scene_on_ax(ax, scene, iteration, radius=80)\n", "# _plot_scene_on_ax(ax[1], scene, iteration, radius=50)\n", - "# _plot_scene_on_ax(ax[2], scene, iteration, radius=100)\n", + "# _plot_scene_on_ax(ax[2], scene, iteration,\n", + "# radius=100)\n", "\n", "plt.show()" ] diff --git a/scripts/conversion/kitti360_conversion.sh b/scripts/conversion/kitti360_conversion.sh new file mode 100644 index 00000000..1e939ad5 --- /dev/null +++ b/scripts/conversion/kitti360_conversion.sh @@ -0,0 +1,3 @@ +export KITTI360_DATA_ROOT="/home/daniel/kitti_360/KITTI-360" + +py123d-conversion datasets=["kitti360_dataset"] map_writer.remap_ids=true diff --git a/scripts/download/download_kitti_360.sh b/scripts/download/download_kitti_360.sh new file mode 100644 index 00000000..1cb3e540 --- /dev/null +++ b/scripts/download/download_kitti_360.sh @@ -0,0 +1,86 @@ +# 2D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Fisheye Images (355G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_fisheye.zip + +# Fisheye Calibration Images (11G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_fisheye_calibration.zip + + +# Perspective Images for Train & Val (128G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_2d_perspective.zip + +# Test Semantic (1.5G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test.zip + +# Test NVS 50% Drop (0.3G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop50.zip + +# est NVS 90% Drop (0.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/71f967e900f4e7c2e036a542f150effa31909b53/data_2d_nvs_drop90.zip + +# Test SLAM (14G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_2d_raw/data_2d_test_slam.zip + + +# Semantics of Left Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics.zip + +# Semantics of Right Perspective Camera (1.8G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_semantics_image_01.zip + + +# Confidence of Left Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence.zip + +# Confidence of Right Perspective Camera (44G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ed180d24c0a144f2f1ac71c2c655a3e986517ed8/data_2d_confidence_image_01.zip + + + +# 3D data & labels +# ---------------------------------------------------------------------------------------------------------------------- + +# Raw Velodyne Scans (119G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_velodyne.zip + +# Test SLAM (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/data_3d_raw/data_3d_test_slam.zip + +# Test Completion (35M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_ssc_test.zip + + +# Raw SICK Scans (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/a1d81d9f7fc7195c937f9ad12e2a2c66441ecb4e/download_3d_sick.zip + + +# Accumulated Point Clouds for Train & Val (12G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics.zip + +# Test Semantic (1.2G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/6489aabd632d115c4280b978b2dcf72cb0142ad9/data_3d_semantics_test.zip + + +# 3D Bounding Boxes (30M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/ffa164387078f48a20f0188aa31b0384bb19ce60/data_3d_bboxes.zip + + + +# Calibrations & Poses +# ---------------------------------------------------------------------------------------------------------------------- + +# Calibrations (3K) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/384509ed5413ccc81328cf8c55cc6af078b8c444/calibration.zip + + +# Vechicle Poses (8.9M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses.zip + + +# OXTS Sync Measurements (37.3M) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts.zip + +# OXTS Raw Measurements (0.4G) +wget https://s3.eu-central-1.amazonaws.com/avg-projects/KITTI-360/89a6bae3c8a6f789e12de4807fc1e8fdcf182cf4/data_poses_oxts_extract.zip diff --git a/src/py123d/conversion/datasets/kitti_360/.gitkeep b/src/py123d/conversion/datasets/kitti_360/.gitkeep deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py index c29c8a13..d4d17d99 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py @@ -1,62 +1,71 @@ +import datetime +import logging import os +import pickle import re -import yaml +import xml.etree.ElementTree as ET +from collections import defaultdict from pathlib import Path from typing import Any, Dict, Final, List, Optional, Tuple, Union import numpy as np -import pickle -from collections import defaultdict -import datetime -import xml.etree.ElementTree as ET -import logging +import yaml +from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter +from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.kitti_360.kitti_360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti_360.kitti_360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KITTI360_DETECTION_NAME_DICT, + kittiId2label, +) +from py123d.conversion.datasets.kitti_360.kitti_360_map_conversion import convert_kitti360_map_with_writer +from py123d.conversion.datasets.kitti_360.preprocess_detection import process_detection +from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.lidar_index_registry import Kitti360LidarIndex from py123d.datatypes.detections.box_detections import ( BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper, ) -from py123d.datatypes.sensors.camera.pinhole_camera import ( - PinholeCameraMetadata, - PinholeCameraType, - PinholeDistortion, - PinholeIntrinsics, -) +from py123d.datatypes.maps.map_metadata import MapMetadata +from py123d.datatypes.scene.scene_metadata import LogMetadata from py123d.datatypes.sensors.camera.fisheye_mei_camera import ( FisheyeMEICameraMetadata, FisheyeMEICameraType, FisheyeMEIDistortion, FisheyeMEIProjection, ) -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType -from py123d.conversion.registry.lidar_index_registry import Kitti360LidarIndex +from py123d.datatypes.sensors.camera.pinhole_camera import ( + PinholeCameraMetadata, + PinholeCameraType, + PinholeDistortion, + PinholeIntrinsics, +) +from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 -from py123d.datatypes.vehicle_state.vehicle_parameters import get_kitti360_station_wagon_parameters,rear_axle_se3_to_center_se3 -from py123d.common.utils.uuid_utils import create_deterministic_uuid -from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter -from py123d.conversion.dataset_converter_config import DatasetConverterConfig -from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData -from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.datatypes.maps.map_metadata import MapMetadata -from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from py123d.conversion.datasets.kitti_360.kitti_360_labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT -from py123d.conversion.datasets.kitti_360.kitti_360_map_conversion import ( - convert_kitti360_map_with_writer +from py123d.datatypes.vehicle_state.vehicle_parameters import ( + get_kitti360_vw_passat_parameters, + rear_axle_se3_to_center_se3, ) -from py123d.geometry import BoundingBoxSE3, StateSE3, Vector3D -from py123d.geometry.rotation import EulerAngles +from py123d.geometry import BoundingBoxSE3, Quaternion, StateSE3, Vector3D +from py123d.geometry.transform.transform_se3 import convert_se3_array_between_origins, translate_se3_along_body_frame KITTI360_DT: Final[float] = 0.1 KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) KITTI360_CAMERA_TYPES = { - PinholeCameraType.CAM_STEREO_L: "image_00", - PinholeCameraType.CAM_STEREO_R: "image_01", - FisheyeMEICameraType.CAM_L: "image_02", - FisheyeMEICameraType.CAM_R: "image_03", + PinholeCameraType.CAM_STEREO_L: "image_00", + PinholeCameraType.CAM_STEREO_R: "image_01", + FisheyeMEICameraType.CAM_L: "image_02", + FisheyeMEICameraType.CAM_R: "image_03", } DIR_2D_RAW = "data_2d_raw" @@ -67,8 +76,7 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -# PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT +PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT @@ -83,20 +91,25 @@ DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", } -D123_DEVKIT_ROOT = Path(os.environ["PY123D_DEVKIT_ROOT"]) -PREPOCESS_DETECTION_DIR = D123_DEVKIT_ROOT / "src" / "py123d" / "conversion" / "datasets" / "kitti_360" / "detection_preprocess" - -def create_token(split: str, log_name: str, timestamp_us: int, misc: str = None) -> str: - """Create a deterministic UUID-based token for KITTI-360 data. - - :param split: The data split (e.g., "kitti360") - :param log_name: The name of the log without file extension - :param timestamp_us: The timestamp in microseconds - :param misc: Any additional information to include in the UUID, defaults to None - :return: The generated deterministic UUID as hex string - """ - uuid_obj = create_deterministic_uuid(split=split, log_name=log_name, timestamp_us=timestamp_us, misc=misc) - return uuid_obj.hex +KITTI360_ALL_SEQUENCES: Final[List[str]] = [ + "2013_05_28_drive_0000_sync", + "2013_05_28_drive_0002_sync", + "2013_05_28_drive_0003_sync", + # "2013_05_28_drive_0004_sync", + # "2013_05_28_drive_0005_sync", + # "2013_05_28_drive_0006_sync", + # "2013_05_28_drive_0007_sync", + # "2013_05_28_drive_0008_sync", + # "2013_05_28_drive_0009_sync", + # "2013_05_28_drive_0010_sync", + # "2013_05_28_drive_0018_sync", +] + +# Create a temporary directory for detection preprocessing +# PREPROCESS_DETECTION_DIR = Path(tempfile.mkdtemp(prefix="kitti360_detection_")) + +PREPROCESS_DETECTION_DIR = Path("/home/daniel/kitti360_detection_temp") + def get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: return MapMetadata( @@ -108,12 +121,14 @@ def get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: map_is_local=True, ) + class Kitti360DataConverter(AbstractDatasetConverter): def __init__( self, splits: List[str], kitti360_data_root: Union[Path, str], dataset_converter_config: DatasetConverterConfig, + kitti36_sequences: List[str] = KITTI360_ALL_SEQUENCES, ) -> None: super().__init__(dataset_converter_config) for split in splits: @@ -123,8 +138,9 @@ def __init__( self._splits: List[str] = splits self._log_path: Path = Path(kitti360_data_root) + self._kitti36_sequences: List[str] = kitti36_sequences self._log_paths_and_split: List[Tuple[Path, str]] = self._collect_log_paths() - + self._total_maps = len(self._log_paths_and_split) # Each log has its own map self._total_logs = len(self._log_paths_and_split) @@ -138,9 +154,13 @@ def _collect_log_paths(self) -> List[Tuple[Path, str]]: missing_roots = [str(p) for p in KITTI360_REQUIRED_MODALITY_ROOTS.values() if not p.exists()] if missing_roots: raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") - + # Enumerate candidate sequences from data_2d_raw - candidates = sorted(p for p in PATH_2D_RAW_ROOT.iterdir() if p.is_dir() and p.name.endswith("_sync")) + candidates = sorted( + p + for p in PATH_2D_RAW_ROOT.iterdir() + if p.is_dir() and p.name.endswith("_sync") and p.name in self._kitti36_sequences + ) def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: if modality_name == DIR_3D_BBOX: @@ -165,22 +185,22 @@ def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " f"Root: {KITTI360_DATA_ROOT}" ) - + logging.info(f"Valid sequences found: {len(log_paths_and_split)}") return log_paths_and_split - + def get_available_splits(self) -> List[str]: """Returns a list of available raw data types.""" return ["kitti360"] - + def get_number_of_maps(self) -> int: """Returns the number of available raw data maps for conversion.""" return self._total_maps - + def get_number_of_logs(self) -> int: """Returns the number of available raw data logs for conversion.""" return self._total_logs - + def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: """ Convert a single map in raw data format to the uniform 123D format. @@ -189,15 +209,15 @@ def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: """ source_log_path, split = self._log_paths_and_split[map_index] log_name = source_log_path.stem - + map_metadata = get_kitti360_map_metadata(split, log_name) - + map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) if map_needs_writing: convert_kitti360_map_with_writer(log_name, map_writer) - + map_writer.close() - + def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: """ Convert a single log in raw data format to the uniform 123D format. @@ -206,7 +226,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: """ source_log_path, split = self._log_paths_and_split[log_index] log_name = source_log_path.stem - + # Create log metadata log_metadata = LogMetadata( dataset="kitti360", @@ -214,20 +234,23 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: log_name=log_name, location=log_name, timestep_seconds=KITTI360_DT, - vehicle_parameters=get_kitti360_station_wagon_parameters(), + vehicle_parameters=get_kitti360_vw_passat_parameters(), camera_metadata=get_kitti360_camera_metadata(), lidar_metadata=get_kitti360_lidar_metadata(), - map_metadata=get_kitti360_map_metadata(split, log_name) + map_metadata=get_kitti360_map_metadata(split, log_name), ) - + log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) if log_needs_writing: _write_recording_table(log_name, log_writer, self.dataset_converter_config) - + log_writer.close() -def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]]: - + +def get_kitti360_camera_metadata() -> ( + Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] +): + persp = PATH_CALIB_ROOT / "perspective.txt" assert persp.exists() @@ -244,15 +267,17 @@ def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICa persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] elif key.startswith("D_"): persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] - + fisheye_camera02_path = PATH_CALIB_ROOT / "image_02.yaml" fisheye_camera03_path = PATH_CALIB_ROOT / "image_03.yaml" assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() fisheye02 = _readYAMLFile(fisheye_camera02_path) fisheye03 = _readYAMLFile(fisheye_camera03_path) fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} - - log_cam_infos: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] = {} + + log_cam_infos: Dict[ + Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata] + ] = {} for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): if cam_name in ["image_00", "image_01"]: log_cam_infos[cam_type] = PinholeCameraMetadata( @@ -262,23 +287,23 @@ def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICa intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[cam_name]["intrinsic"])), distortion=PinholeDistortion.from_array(np.array(persp_result[cam_name]["distortion"])), ) - elif cam_name in ["image_02","image_03"]: + elif cam_name in ["image_02", "image_03"]: distortion_params = fisheye_result[cam_name]["distortion_parameters"] distortion = FisheyeMEIDistortion( - k1=distortion_params['k1'], - k2=distortion_params['k2'], - p1=distortion_params['p1'], - p2=distortion_params['p2'], + k1=distortion_params["k1"], + k2=distortion_params["k2"], + p1=distortion_params["p1"], + p2=distortion_params["p2"], ) - + projection_params = fisheye_result[cam_name]["projection_parameters"] projection = FisheyeMEIProjection( - gamma1=projection_params['gamma1'], - gamma2=projection_params['gamma2'], - u0=projection_params['u0'], - v0=projection_params['v0'], + gamma1=projection_params["gamma1"], + gamma2=projection_params["gamma2"], + u0=projection_params["u0"], + v0=projection_params["v0"], ) - + log_cam_infos[cam_type] = FisheyeMEICameraMetadata( camera_type=cam_type, width=fisheye_result[cam_name]["image_width"], @@ -290,6 +315,7 @@ def get_kitti360_camera_metadata() -> Dict[Union[PinholeCameraType, FisheyeMEICa return log_cam_infos + def _read_projection_matrix(p_line: str) -> np.ndarray: parts = p_line.split(" ", 1) if len(parts) != 2: @@ -299,44 +325,47 @@ def _read_projection_matrix(p_line: str) -> np.ndarray: K = P[:, :3] return K -def _readYAMLFile(fileName:Path) -> Dict[str, Any]: - '''make OpenCV YAML file compatible with python''' + +def _readYAMLFile(fileName: Path) -> Dict[str, Any]: + """make OpenCV YAML file compatible with python""" ret = {} - skip_lines=1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" + skip_lines = 1 # Skip the first line which says "%YAML:1.0". Or replace it with "%YAML 1.0" with open(fileName) as fin: for i in range(skip_lines): fin.readline() yamlFileOut = fin.read() - myRe = re.compile(r":([^ ])") # Add space after ":", if it doesn't exist. Python yaml requirement - yamlFileOut = myRe.sub(r': \1', yamlFileOut) + myRe = re.compile(r":([^ ])") # Add space after ":", if it doesn't exist. Python yaml requirement + yamlFileOut = myRe.sub(r": \1", yamlFileOut) ret = yaml.safe_load(yamlFileOut) return ret + def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} extrinsic = get_lidar_extrinsic() extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) + extrinsic_state_se3 = _extrinsic_from_imu_to_rear_axle(extrinsic_state_se3) metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, lidar_index=Kitti360LidarIndex, - extrinsic=extrinsic_state_se3, + extrinsic=extrinsic_state_se3, ) return metadata + def _write_recording_table( - log_name: str, - log_writer: AbstractLogWriter, - data_converter_config: DatasetConverterConfig + log_name: str, log_writer: AbstractLogWriter, data_converter_config: DatasetConverterConfig ) -> None: - + ts_list: List[TimePoint] = _read_timestamps(log_name) ego_state_all, valid_timestamp = _extract_ego_state_all(log_name) - ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all],dtype=np.float64) - box_detection_wrapper_all = _extract_detections(log_name,len(ts_list),ego_states_xyz,valid_timestamp) + ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all], dtype=np.float64) + box_detection_wrapper_all = _extract_detections(log_name, len(ts_list), ego_states_xyz, valid_timestamp) logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") + for idx in range(len(valid_timestamp)): valid_idx = valid_timestamp[idx] - + cameras = _extract_cameras(log_name, valid_idx, data_converter_config) lidars = _extract_lidar(log_name, valid_idx, data_converter_config) @@ -351,10 +380,6 @@ def _write_recording_table( route_lane_group_ids=None, ) - # if SORT_BY_TIMESTAMP: - # recording_table = open_arrow_table(log_file_path) - # recording_table = recording_table.sort_by([("timestamp", "ascending")]) - # write_arrow_table(recording_table, log_file_path) def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: """ @@ -365,7 +390,7 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt", ] - + if log_name == "2013_05_28_drive_0002_sync": ts_files = ts_files[1:] @@ -377,22 +402,23 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: s = line.strip() if not s: continue - dt_str, ns_str = s.split('.') + dt_str, ns_str = s.split(".") dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) - + total_seconds = (dt_obj - unix_epoch).total_seconds() - + ns_value = int(ns_str) us_from_ns = ns_value // 1000 total_us = int(total_seconds * 1_000_000) + us_from_ns - + tps.append(TimePoint.from_us(total_us)) return tps return None + def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]]: ego_state_all: List[List[float]] = [] @@ -403,31 +429,29 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] poses = np.loadtxt(pose_file) poses_time = poses[:, 0].astype(np.int32) valid_timestamp: List[int] = list(poses_time) - - oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" - + + oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" + for idx in range(len(valid_timestamp)): oxts_path_file = oxts_path / f"{int(valid_timestamp[idx]):010d}.txt" oxts_data = np.loadtxt(oxts_path_file) - vehicle_parameters = get_kitti360_station_wagon_parameters() + vehicle_parameters = get_kitti360_vw_passat_parameters() - pos = idx - if log_name=="2013_05_28_drive_0004_sync" and pos == 0: + pos = idx + if log_name == "2013_05_28_drive_0004_sync" and pos == 0: pos = 1 - + # NOTE you can use oxts_data[3:6] as roll, pitch, yaw for simplicity - #roll, pitch, yaw = oxts_data[3:6] + # roll, pitch, yaw = oxts_data[3:6] r00, r01, r02 = poses[pos, 1:4] r10, r11, r12 = poses[pos, 5:8] r20, r21, r22 = poses[pos, 9:12] - R_mat = np.array([[r00, r01, r02], - [r10, r11, r12], - [r20, r21, r22]], dtype=np.float64) - R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] - ego_quaternion = EulerAngles.from_rotation_matrix(R_mat_cali).quaternion - rear_axle_pose = StateSE3( + ego_quaternion = Quaternion.from_rotation_matrix(R_mat_cali) + imu_pose = StateSE3( x=poses[pos, 4], y=poses[pos, 8], z=poses[pos, 12], @@ -437,6 +461,11 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] qz=ego_quaternion.qz, ) + rear_axle_pose = translate_se3_along_body_frame( + imu_pose, + Vector3D(0.05, -0.32, 0.0), + ) + center = rear_axle_se3_to_center_se3(rear_axle_se3=rear_axle_pose, vehicle_parameters=vehicle_parameters) dynamic_state = DynamicStateSE3( velocity=Vector3D( @@ -449,14 +478,14 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] y=oxts_data[15], z=oxts_data[16], ), - angular_velocity=Vector3D( + angular_velocity=Vector3D( x=oxts_data[20], y=oxts_data[21], z=oxts_data[22], ), ) ego_state_all.append( - EgoStateSE3( + EgoStateSE3( center_se3=center, dynamic_state_se3=dynamic_state, vehicle_parameters=vehicle_parameters, @@ -465,13 +494,14 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] ) return ego_state_all, valid_timestamp + def _extract_detections( log_name: str, ts_len: int, ego_states_xyz: np.ndarray, valid_timestamp: List[int], ) -> List[BoxDetectionWrapper]: - + detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] detections_velocity: List[List[List[float]]] = [[] for _ in range(ts_len)] detections_tokens: List[List[str]] = [[] for _ in range(ts_len)] @@ -483,37 +513,38 @@ def _extract_detections( bbox_3d_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" if not bbox_3d_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") - + tree = ET.parse(bbox_3d_path) root = tree.getroot() - detection_preprocess_path = PREPOCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" - if detection_preprocess_path.exists(): - with open(detection_preprocess_path, "rb") as f: - detection_preprocess_result = pickle.load(f) - static_records_dict = {record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"]} - logging.info(f"Loaded detection preprocess data from {detection_preprocess_path}") - else: - detection_preprocess_result = None + detection_preprocess_path = PREPROCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" + if not detection_preprocess_path.exists(): + process_detection(log_name=log_name, radius_m=60.0, output_dir=PREPROCESS_DETECTION_DIR) + with open(detection_preprocess_path, "rb") as f: + detection_preprocess_result = pickle.load(f) + static_records_dict = { + record_item["global_id"]: record_item for record_item in detection_preprocess_result["static"] + } + logging.info(f"Loaded detection preprocess data from {detection_preprocess_path}") dynamic_objs: Dict[int, List[KITTI360Bbox3D]] = defaultdict(list) for child in root: - if child.find('semanticId') is not None: - semanticIdKITTI = int(child.find('semanticId').text) + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) name = kittiId2label[semanticIdKITTI].name else: - lable = child.find('label').text - name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') - if child.find('transform') is None or name not in KITTI360_DETECTION_NAME_DICT.keys(): + lable = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, "unknown") + if child.find("transform") is None or name not in KITTI360_DETECTION_NAME_DICT.keys(): continue obj = KITTI360Bbox3D() obj.parseBbox(child) - - #static object + + # static object if obj.timestamp == -1: if detection_preprocess_result is None: - obj.filter_by_radius(ego_states_xyz,valid_timestamp,radius=50.0) + obj.filter_by_radius(ego_states_xyz, valid_timestamp, radius=50.0) else: obj.load_detection_preprocess(static_records_dict) for record in obj.valid_frames["records"]: @@ -521,7 +552,7 @@ def _extract_detections( detections_states[frame].append(obj.get_state_array()) detections_velocity[frame].append(np.array([0.0, 0.0, 0.0])) detections_tokens[frame].append(str(obj.globalID)) - detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) + detections_types[frame].append(KITTI360_DETECTION_NAME_DICT[obj.name]) else: global_ID = obj.globalID dynamic_objs[global_ID].append(obj) @@ -530,22 +561,22 @@ def _extract_detections( for global_id, obj_list in dynamic_objs.items(): obj_list.sort(key=lambda obj: obj.timestamp) num_frames = len(obj_list) - + positions = [obj.get_state_array()[:3] for obj in obj_list] timestamps = [int(obj.timestamp) for obj in obj_list] velocities = [] for i in range(1, num_frames - 1): - dt_frames = timestamps[i+1] - timestamps[i-1] + dt_frames = timestamps[i + 1] - timestamps[i - 1] if dt_frames > 0: dt = dt_frames * KITTI360_DT - vel = (positions[i+1] - positions[i-1]) / dt - vel = KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] @ obj_list[i].Rm.T @ vel + vel = (positions[i + 1] - positions[i - 1]) / dt + vel = KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] @ obj_list[i].Rm.T @ vel else: vel = np.zeros(3) velocities.append(vel) - + if num_frames > 1: # first and last frame velocities.insert(0, velocities[0]) @@ -588,35 +619,38 @@ def _extract_detections( box_detection_wrapper_all.append(BoxDetectionWrapper(box_detections=box_detections)) return box_detection_wrapper_all + def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConverterConfig) -> List[LiDARData]: - + lidars: List[LiDARData] = [] if data_converter_config.include_lidars: - #NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 + # NOTE special case for sequence 2013_05_28_drive_0002_sync which has no lidar data before frame 4391 if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: return lidars - + lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" + if lidar_full_path.exists(): - relative_path = f"data_3d_raw/{log_name}/velodyne_points/data/{idx:010d}.bin" + lidars.append( LiDARData( lidar_type=LiDARType.LIDAR_TOP, timestamp=None, iteration=idx, - dataset_root=PATH_3D_RAW_ROOT, - relative_path=relative_path, + dataset_root=KITTI360_DATA_ROOT, + relative_path=lidar_full_path.relative_to(KITTI360_DATA_ROOT), ) ) else: raise FileNotFoundError(f"LiDAR file not found: {lidar_full_path}") - + return lidars + def _extract_cameras( log_name: str, idx: int, data_converter_config: DatasetConverterConfig ) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: - + camera_dict: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]] = {} for camera_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): if cam_dir_name in ["image_00", "image_01"]: @@ -627,9 +661,9 @@ def _extract_cameras( cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" if not cam2pose_txt.exists(): raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - - lastrow = np.array([0,0,0,1]).reshape(1,4) - with open(cam2pose_txt, 'r') as f: + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + with open(cam2pose_txt, "r") as f: for line in f: parts = line.strip().split() key = parts[0][:-1] @@ -639,6 +673,9 @@ def _extract_cameras( cam2pose = np.concatenate((matrix, lastrow)) cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_extrinsic = _extrinsic_from_imu_to_rear_axle(camera_extrinsic) + if img_path_png.exists(): if data_converter_config.camera_store_option == "path": camera_data = str(img_path_png) @@ -647,7 +684,12 @@ def _extract_cameras( camera_data = f.read() else: camera_data = None - - camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_dict[camera_type] = camera_data, camera_extrinsic return camera_dict + + +def _extrinsic_from_imu_to_rear_axle(extrinsic: StateSE3) -> StateSE3: + imu_se3 = StateSE3(x=-0.05, y=0.32, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + rear_axle_se3 = StateSE3(x=0.0, y=0.0, z=0.0, qw=1.0, qx=0.0, qy=0.0, qz=0.0) + return StateSE3.from_array(convert_se3_array_between_origins(imu_se3, rear_axle_se3, extrinsic.array)) diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py index 8486329c..09d7d1e4 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py @@ -1,17 +1,15 @@ -import numpy as np - -from collections import defaultdict -from typing import Dict, Any, List, Tuple import copy +import os +from pathlib import Path +from typing import Any, Dict, List, Tuple + +import numpy as np from scipy.linalg import polar +from py123d.conversion.datasets.kitti_360.kitti_360_labels import BBOX_LABLES_TO_DETECTION_NAME_DICT, kittiId2label from py123d.geometry import BoundingBoxSE3, StateSE3 from py123d.geometry.polyline import Polyline3D from py123d.geometry.rotation import EulerAngles -from py123d.conversion.datasets.kitti_360.kitti_360_labels import kittiId2label,BBOX_LABLES_TO_DETECTION_NAME_DICT - -import os -from pathlib import Path KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) DIR_CALIB = "calibration" @@ -20,23 +18,29 @@ DEFAULT_ROLL = 0.0 DEFAULT_PITCH = 0.0 -kitti3602nuplan_imu_calibration_ideal = np.array([ +kitti3602nuplan_imu_calibration_ideal = np.array( + [ [1, 0, 0, 0], [0, -1, 0, 0], [0, 0, -1, 0], [0, 0, 0, 1], - ], dtype=np.float64) + ], + dtype=np.float64, +) KITTI3602NUPLAN_IMU_CALIBRATION = kitti3602nuplan_imu_calibration_ideal MAX_N = 1000 -def local2global(semanticId: int, instanceId: int) -> int: - globalId = semanticId*MAX_N + instanceId + + +def local2global(semanticId: int, instanceId: int) -> int: + globalId = semanticId * MAX_N + instanceId if isinstance(globalId, np.ndarray): return globalId.astype(np.int32) else: return int(globalId) - + + def global2local(globalId: int) -> Tuple[int, int]: semanticId = globalId // MAX_N instanceId = globalId % MAX_N @@ -45,7 +49,8 @@ def global2local(globalId: int) -> Tuple[int, int]: else: return int(semanticId), int(instanceId) -class KITTI360Bbox3D(): + +class KITTI360Bbox3D: # global id(only used for sequence 0004) dynamic_global_id = 2000000 @@ -68,34 +73,34 @@ def __init__(self): self.timestamp = -1 # name - self.name = '' + self.name = "" + + # label + self.label = "" - #label - self.label = '' - def parseBbox(self, child): - self.timestamp = int(child.find('timestamp').text) + self.timestamp = int(child.find("timestamp").text) - self.annotationId = int(child.find('index').text) + 1 + self.annotationId = int(child.find("index").text) + 1 - self.label = child.find('label').text + self.label = child.find("label").text - if child.find('semanticId') is None: - self.name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(self.label, 'unknown') - self.is_dynamic = int(child.find('dynamic').text) + if child.find("semanticId") is None: + self.name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(self.label, "unknown") + self.is_dynamic = int(child.find("dynamic").text) if self.is_dynamic != 0: - dynamicSeq = int(child.find('dynamicSeq').text) + dynamicSeq = int(child.find("dynamicSeq").text) self.globalID = KITTI360Bbox3D.dynamic_global_id + dynamicSeq else: self.globalID = KITTI360Bbox3D.static_global_id KITTI360Bbox3D.static_global_id += 1 else: - self.start_frame = int(child.find('start_frame').text) - self.end_frame = int(child.find('end_frame').text) - - semanticIdKITTI = int(child.find('semanticId').text) + self.start_frame = int(child.find("start_frame").text) + self.end_frame = int(child.find("end_frame").text) + + semanticIdKITTI = int(child.find("semanticId").text) self.semanticId = kittiId2label[semanticIdKITTI].id - self.instanceId = int(child.find('instanceId').text) + self.instanceId = int(child.find("instanceId").text) self.name = kittiId2label[semanticIdKITTI].name self.globalID = local2global(self.semanticId, self.instanceId) @@ -106,26 +111,26 @@ def parseBbox(self, child): self.parse_scale_rotation() def parseVertices(self, child): - transform = parseOpencvMatrix(child.find('transform')) - R = transform[:3,:3] - T = transform[:3,3] - vertices = parseOpencvMatrix(child.find('vertices')) + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + vertices = parseOpencvMatrix(child.find("vertices")) self.vertices_template = copy.deepcopy(vertices) - + vertices = np.matmul(R, vertices.transpose()).transpose() + T self.vertices = vertices - + self.R = R self.T = T - + def parse_scale_rotation(self): - Rm, Sm = polar(self.R) + Rm, Sm = polar(self.R) if np.linalg.det(Rm) < 0: Rm[0] = -Rm[0] scale = np.diag(Sm) # yaw, pitch, roll = R.from_matrix(Rm).as_euler('zyx', degrees=False) euler_angles = EulerAngles.from_rotation_matrix(Rm) - yaw,pitch,roll = euler_angles.yaw, euler_angles.pitch, euler_angles.roll + yaw, pitch, roll = euler_angles.yaw, euler_angles.pitch, euler_angles.roll obj_quaternion = euler_angles.quaternion # obj_quaternion = EulerAngles(roll=roll, pitch=pitch, yaw=yaw).quaternion @@ -133,13 +138,13 @@ def parse_scale_rotation(self): self.Sm = np.array(Sm) self.scale = scale self.yaw = yaw - self.pitch = pitch + self.pitch = pitch self.roll = roll self.qw = obj_quaternion.qw self.qx = obj_quaternion.qx self.qy = obj_quaternion.qy self.qz = obj_quaternion.qz - + def get_state_array(self) -> np.ndarray: center = StateSE3( x=self.T[0], @@ -156,100 +161,110 @@ def get_state_array(self) -> np.ndarray: return bounding_box_se3.array def filter_by_radius(self, ego_state_xyz: np.ndarray, valid_timestamp: List[int], radius: float = 50.0) -> None: - ''' first stage of detection, used to filter out detections by radius ''' + """first stage of detection, used to filter out detections by radius""" d = np.linalg.norm(ego_state_xyz - self.T[None, :], axis=1) idxs = np.where(d <= radius)[0] for idx in idxs: - self.valid_frames["records"].append({ - "timestamp": valid_timestamp[idx], - "points_in_box": None, - }) + self.valid_frames["records"].append( + { + "timestamp": valid_timestamp[idx], + "points_in_box": None, + } + ) def box_visible_in_point_cloud(self, points: np.ndarray) -> Tuple[bool, int]: - ''' points: (N,3) , box: (8,3) ''' + """points: (N,3) , box: (8,3)""" box = self.vertices.copy() # avoid calculating ground point cloud z_offset = 0.1 - box[:,2] += z_offset + box[:, 2] += z_offset O, A, B, C = box[0], box[1], box[2], box[5] OA = A - O OB = B - O OC = C - O POA, POB, POC = (points @ OA[..., None])[:, 0], (points @ OB[..., None])[:, 0], (points @ OC[..., None])[:, 0] - mask = (np.dot(O, OA) < POA) & (POA < np.dot(A, OA)) & \ - (np.dot(O, OB) < POB) & (POB < np.dot(B, OB)) & \ - (np.dot(O, OC) < POC) & (POC < np.dot(C, OC)) - + mask = ( + (np.dot(O, OA) < POA) + & (POA < np.dot(A, OA)) + & (np.dot(O, OB) < POB) + & (POB < np.dot(B, OB)) + & (np.dot(O, OC) < POC) + & (POC < np.dot(C, OC)) + ) + points_in_box = np.sum(mask) visible = True if points_in_box > 40 else False return visible, points_in_box - + def load_detection_preprocess(self, records_dict: Dict[int, Any]): if self.globalID in records_dict: self.valid_frames["records"] = records_dict[self.globalID]["records"] -class KITTI360_MAP_Bbox3D(): + +class KITTI360_MAP_Bbox3D: def __init__(self): self.id = -1 - self.label = ' ' + self.label = " " self.vertices: Polyline3D = None self.R = None self.T = None - + def parseVertices_plane(self, child): - transform = parseOpencvMatrix(child.find('transform')) - R = transform[:3,:3] - T = transform[:3,3] - if child.find("transform_plane").find('rows').text == '0': - vertices = parseOpencvMatrix(child.find('vertices')) + transform = parseOpencvMatrix(child.find("transform")) + R = transform[:3, :3] + T = transform[:3, 3] + if child.find("transform_plane").find("rows").text == "0": + vertices = parseOpencvMatrix(child.find("vertices")) else: - vertices = parseOpencvMatrix(child.find('vertices_plane')) - + vertices = parseOpencvMatrix(child.find("vertices_plane")) + vertices = np.matmul(R, vertices.transpose()).transpose() + T self.vertices = Polyline3D.from_array(vertices) - + self.R = R self.T = T def parseBbox(self, child): - self.id = int(child.find('index').text) - self.label = child.find('label').text + self.id = int(child.find("index").text) + self.label = child.find("label").text self.parseVertices_plane(child) - + def parseOpencvMatrix(node): - rows = int(node.find('rows').text) - cols = int(node.find('cols').text) - data = node.find('data').text.split(' ') + rows = int(node.find("rows").text) + cols = int(node.find("cols").text) + data = node.find("data").text.split(" ") mat = [] for d in data: - d = d.replace('\n', '') - if len(d)<1: + d = d.replace("\n", "") + if len(d) < 1: continue mat.append(float(d)) mat = np.reshape(mat, [rows, cols]) return mat + def get_lidar_extrinsic() -> np.ndarray: cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" if not cam2pose_txt.exists(): raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - + cam2velo_txt = PATH_CALIB_ROOT / "calib_cam_to_velo.txt" if not cam2velo_txt.exists(): raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") - - lastrow = np.array([0,0,0,1]).reshape(1,4) - with open(cam2pose_txt, 'r') as f: + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + + with open(cam2pose_txt, "r") as f: image_00 = next(f) values = list(map(float, image_00.strip().split()[1:])) matrix = np.array(values).reshape(3, 4) cam2pose = np.concatenate((matrix, lastrow)) cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose - - cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3,4), lastrow)) - extrinsic = cam2pose @ np.linalg.inv(cam2velo) - return extrinsic \ No newline at end of file + + cam2velo = np.concatenate((np.loadtxt(cam2velo_txt).reshape(3, 4), lastrow)) + extrinsic = cam2pose @ np.linalg.inv(cam2velo) + + return extrinsic diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py index 7a58b113..6feafc1d 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py @@ -5,58 +5,50 @@ from collections import namedtuple - -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Definitions -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # a label and all meta information -Label = namedtuple( 'Label' , [ - - 'name' , # The identifier of this label, e.g. 'car', 'person', ... . - # We use them to uniquely name a class - - 'id' , # An integer ID that is associated with this label. - # The IDs are used to represent the label in ground truth images - # An ID of -1 means that this label does not have an ID and thus - # is ignored when creating ground truth images (e.g. license plate). - # Do not modify these IDs, since exactly these IDs are expected by the - # evaluation server. - - 'kittiId' , # An integer ID that is associated with this label for KITTI-360 - # NOT FOR RELEASING - - 'trainId' , # Feel free to modify these IDs as suitable for your method. Then create - # ground truth images with train IDs, using the tools provided in the - # 'preparation' folder. However, make sure to validate or submit results - # to our evaluation server using the regular IDs above! - # For trainIds, multiple labels might have the same ID. Then, these labels - # are mapped to the same class in the ground truth images. For the inverse - # mapping, we use the label that is defined first in the list below. - # For example, mapping all void-type classes to the same ID in training, - # might make sense for some approaches. - # Max value is 255! - - 'category' , # The name of the category that this label belongs to - - 'categoryId' , # The ID of this category. Used to create ground truth images - # on category level. - - 'hasInstances', # Whether this label distinguishes between single instances or not - - 'ignoreInEval', # Whether pixels having this class as ground truth label are ignored - # during evaluations or not - - 'ignoreInInst', # Whether pixels having this class as ground truth label are ignored - # during evaluations of instance segmentation or not - - 'color' , # The color of this label - ] ) - - -#-------------------------------------------------------------------------------- +Label = namedtuple( + "Label", + [ + "name", # The identifier of this label, e.g. 'car', 'person', ... . + # We use them to uniquely name a class + "id", # An integer ID that is associated with this label. + # The IDs are used to represent the label in ground truth images + # An ID of -1 means that this label does not have an ID and thus + # is ignored when creating ground truth images (e.g. license plate). + # Do not modify these IDs, since exactly these IDs are expected by the + # evaluation server. + "kittiId", # An integer ID that is associated with this label for KITTI-360 + # NOT FOR RELEASING + "trainId", # Feel free to modify these IDs as suitable for your method. Then create + # ground truth images with train IDs, using the tools provided in the + # 'preparation' folder. However, make sure to validate or submit results + # to our evaluation server using the regular IDs above! + # For trainIds, multiple labels might have the same ID. Then, these labels + # are mapped to the same class in the ground truth images. For the inverse + # mapping, we use the label that is defined first in the list below. + # For example, mapping all void-type classes to the same ID in training, + # might make sense for some approaches. + # Max value is 255! + "category", # The name of the category that this label belongs to + "categoryId", # The ID of this category. Used to create ground truth images + # on category level. + "hasInstances", # Whether this label distinguishes between single instances or not + "ignoreInEval", # Whether pixels having this class as ground truth label are ignored + # during evaluations or not + "ignoreInInst", # Whether pixels having this class as ground truth label are ignored + # during evaluations of instance segmentation or not + "color", # The color of this label + ], +) + + +# -------------------------------------------------------------------------------- # A list of all labels -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Please adapt the train IDs as appropriate for your approach. # Note that you might want to ignore labels with ID 255 during training. @@ -66,68 +58,68 @@ labels = [ # name id kittiId, trainId category catId hasInstances ignoreInEval ignoreInInst color - Label( 'unlabeled' , 0 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), - Label( 'ego vehicle' , 1 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), - Label( 'rectification border' , 2 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), - Label( 'out of roi' , 3 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), - Label( 'static' , 4 , -1 , 255 , 'void' , 0 , False , True , True , ( 0, 0, 0) ), - Label( 'dynamic' , 5 , -1 , 255 , 'void' , 0 , False , True , True , (111, 74, 0) ), - Label( 'ground' , 6 , -1 , 255 , 'void' , 0 , False , True , True , ( 81, 0, 81) ), - Label( 'road' , 7 , 1 , 0 , 'flat' , 1 , False , False , False , (128, 64,128) ), - Label( 'sidewalk' , 8 , 3 , 1 , 'flat' , 1 , False , False , False , (244, 35,232) ), - Label( 'parking' , 9 , 2 , 255 , 'flat' , 1 , False , True , True , (250,170,160) ), - Label( 'rail track' , 10 , 10, 255 , 'flat' , 1 , False , True , True , (230,150,140) ), - Label( 'building' , 11 , 11, 2 , 'construction' , 2 , True , False , False , ( 70, 70, 70) ), - Label( 'wall' , 12 , 7 , 3 , 'construction' , 2 , False , False , False , (102,102,156) ), - Label( 'fence' , 13 , 8 , 4 , 'construction' , 2 , False , False , False , (190,153,153) ), - Label( 'guard rail' , 14 , 30, 255 , 'construction' , 2 , False , True , True , (180,165,180) ), - Label( 'bridge' , 15 , 31, 255 , 'construction' , 2 , False , True , True , (150,100,100) ), - Label( 'tunnel' , 16 , 32, 255 , 'construction' , 2 , False , True , True , (150,120, 90) ), - Label( 'pole' , 17 , 21, 5 , 'object' , 3 , True , False , True , (153,153,153) ), - Label( 'polegroup' , 18 , -1 , 255 , 'object' , 3 , False , True , True , (153,153,153) ), - Label( 'traffic light' , 19 , 23, 6 , 'object' , 3 , True , False , True , (250,170, 30) ), - Label( 'traffic sign' , 20 , 24, 7 , 'object' , 3 , True , False , True , (220,220, 0) ), - Label( 'vegetation' , 21 , 5 , 8 , 'nature' , 4 , False , False , False , (107,142, 35) ), - Label( 'terrain' , 22 , 4 , 9 , 'nature' , 4 , False , False , False , (152,251,152) ), - Label( 'sky' , 23 , 9 , 10 , 'sky' , 5 , False , False , False , ( 70,130,180) ), - Label( 'person' , 24 , 19, 11 , 'human' , 6 , True , False , False , (220, 20, 60) ), - Label( 'rider' , 25 , 20, 12 , 'human' , 6 , True , False , False , (255, 0, 0) ), - Label( 'car' , 26 , 13, 13 , 'vehicle' , 7 , True , False , False , ( 0, 0,142) ), - Label( 'truck' , 27 , 14, 14 , 'vehicle' , 7 , True , False , False , ( 0, 0, 70) ), - Label( 'bus' , 28 , 34, 15 , 'vehicle' , 7 , True , False , False , ( 0, 60,100) ), - Label( 'caravan' , 29 , 16, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0, 90) ), - Label( 'trailer' , 30 , 15, 255 , 'vehicle' , 7 , True , True , True , ( 0, 0,110) ), - Label( 'train' , 31 , 33, 16 , 'vehicle' , 7 , True , False , False , ( 0, 80,100) ), - Label( 'motorcycle' , 32 , 17, 17 , 'vehicle' , 7 , True , False , False , ( 0, 0,230) ), - Label( 'bicycle' , 33 , 18, 18 , 'vehicle' , 7 , True , False , False , (119, 11, 32) ), - Label( 'garage' , 34 , 12, 2 , 'construction' , 2 , True , True , True , ( 64,128,128) ), - Label( 'gate' , 35 , 6 , 4 , 'construction' , 2 , False , True , True , (190,153,153) ), - Label( 'stop' , 36 , 29, 255 , 'construction' , 2 , True , True , True , (150,120, 90) ), - Label( 'smallpole' , 37 , 22, 5 , 'object' , 3 , True , True , True , (153,153,153) ), - Label( 'lamp' , 38 , 25, 255 , 'object' , 3 , True , True , True , (0, 64, 64) ), - Label( 'trash bin' , 39 , 26, 255 , 'object' , 3 , True , True , True , (0, 128,192) ), - Label( 'vending machine' , 40 , 27, 255 , 'object' , 3 , True , True , True , (128, 64, 0) ), - Label( 'box' , 41 , 28, 255 , 'object' , 3 , True , True , True , (64, 64,128) ), - Label( 'unknown construction' , 42 , 35, 255 , 'void' , 0 , False , True , True , (102, 0, 0) ), - Label( 'unknown vehicle' , 43 , 36, 255 , 'void' , 0 , False , True , True , ( 51, 0, 51) ), - Label( 'unknown object' , 44 , 37, 255 , 'void' , 0 , False , True , True , ( 32, 32, 32) ), - Label( 'license plate' , -1 , -1, -1 , 'vehicle' , 7 , False , True , True , ( 0, 0,142) ), + Label("unlabeled", 0, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("ego vehicle", 1, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("rectification border", 2, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("out of roi", 3, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("static", 4, -1, 255, "void", 0, False, True, True, (0, 0, 0)), + Label("dynamic", 5, -1, 255, "void", 0, False, True, True, (111, 74, 0)), + Label("ground", 6, -1, 255, "void", 0, False, True, True, (81, 0, 81)), + Label("road", 7, 1, 0, "flat", 1, False, False, False, (128, 64, 128)), + Label("sidewalk", 8, 3, 1, "flat", 1, False, False, False, (244, 35, 232)), + Label("parking", 9, 2, 255, "flat", 1, False, True, True, (250, 170, 160)), + Label("rail track", 10, 10, 255, "flat", 1, False, True, True, (230, 150, 140)), + Label("building", 11, 11, 2, "construction", 2, True, False, False, (70, 70, 70)), + Label("wall", 12, 7, 3, "construction", 2, False, False, False, (102, 102, 156)), + Label("fence", 13, 8, 4, "construction", 2, False, False, False, (190, 153, 153)), + Label("guard rail", 14, 30, 255, "construction", 2, False, True, True, (180, 165, 180)), + Label("bridge", 15, 31, 255, "construction", 2, False, True, True, (150, 100, 100)), + Label("tunnel", 16, 32, 255, "construction", 2, False, True, True, (150, 120, 90)), + Label("pole", 17, 21, 5, "object", 3, True, False, True, (153, 153, 153)), + Label("polegroup", 18, -1, 255, "object", 3, False, True, True, (153, 153, 153)), + Label("traffic light", 19, 23, 6, "object", 3, True, False, True, (250, 170, 30)), + Label("traffic sign", 20, 24, 7, "object", 3, True, False, True, (220, 220, 0)), + Label("vegetation", 21, 5, 8, "nature", 4, False, False, False, (107, 142, 35)), + Label("terrain", 22, 4, 9, "nature", 4, False, False, False, (152, 251, 152)), + Label("sky", 23, 9, 10, "sky", 5, False, False, False, (70, 130, 180)), + Label("person", 24, 19, 11, "human", 6, True, False, False, (220, 20, 60)), + Label("rider", 25, 20, 12, "human", 6, True, False, False, (255, 0, 0)), + Label("car", 26, 13, 13, "vehicle", 7, True, False, False, (0, 0, 142)), + Label("truck", 27, 14, 14, "vehicle", 7, True, False, False, (0, 0, 70)), + Label("bus", 28, 34, 15, "vehicle", 7, True, False, False, (0, 60, 100)), + Label("caravan", 29, 16, 255, "vehicle", 7, True, True, True, (0, 0, 90)), + Label("trailer", 30, 15, 255, "vehicle", 7, True, True, True, (0, 0, 110)), + Label("train", 31, 33, 16, "vehicle", 7, True, False, False, (0, 80, 100)), + Label("motorcycle", 32, 17, 17, "vehicle", 7, True, False, False, (0, 0, 230)), + Label("bicycle", 33, 18, 18, "vehicle", 7, True, False, False, (119, 11, 32)), + Label("garage", 34, 12, 2, "construction", 2, True, True, True, (64, 128, 128)), + Label("gate", 35, 6, 4, "construction", 2, False, True, True, (190, 153, 153)), + Label("stop", 36, 29, 255, "construction", 2, True, True, True, (150, 120, 90)), + Label("smallpole", 37, 22, 5, "object", 3, True, True, True, (153, 153, 153)), + Label("lamp", 38, 25, 255, "object", 3, True, True, True, (0, 64, 64)), + Label("trash bin", 39, 26, 255, "object", 3, True, True, True, (0, 128, 192)), + Label("vending machine", 40, 27, 255, "object", 3, True, True, True, (128, 64, 0)), + Label("box", 41, 28, 255, "object", 3, True, True, True, (64, 64, 128)), + Label("unknown construction", 42, 35, 255, "void", 0, False, True, True, (102, 0, 0)), + Label("unknown vehicle", 43, 36, 255, "void", 0, False, True, True, (51, 0, 51)), + Label("unknown object", 44, 37, 255, "void", 0, False, True, True, (32, 32, 32)), + Label("license plate", -1, -1, -1, "vehicle", 7, False, True, True, (0, 0, 142)), ] -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Create dictionaries for a fast lookup -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Please refer to the main method below for example usages! # name to label object -name2label = { label.name : label for label in labels } +name2label = {label.name: label for label in labels} # id to label object -id2label = { label.id : label for label in labels } +id2label = {label.id: label for label in labels} # trainId to label object -trainId2label = { label.trainId : label for label in reversed(labels) } +trainId2label = {label.trainId: label for label in reversed(labels)} # KITTI-360 ID to cityscapes ID -kittiId2label = { label.kittiId : label for label in labels } +kittiId2label = {label.kittiId: label for label in labels} # category to list of label objects category2labels = {} for label in labels: @@ -137,9 +129,10 @@ else: category2labels[category] = [label] -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- # Assure single instance name -#-------------------------------------------------------------------------------- +# -------------------------------------------------------------------------------- + # returns the label name that describes a single instance (if possible) # e.g. input | output @@ -149,7 +142,7 @@ # foo | None # foogroup | None # skygroup | None -def assureSingleInstanceName( name ): +def assureSingleInstanceName(name): # if the name is known, it is not a group if name in name2label: return name @@ -157,9 +150,9 @@ def assureSingleInstanceName( name ): if not name.endswith("group"): return None # remove group - name = name[:-len("group")] + name = name[: -len("group")] # test if the new name exists - if not name in name2label: + if name not in name2label: return None # test if the new name denotes a label that actually has instances if not name2label[name].hasInstances: @@ -167,11 +160,12 @@ def assureSingleInstanceName( name ): # all good then return name + from py123d.datatypes.detections.box_detection_types import BoxDetectionType BBOX_LABLES_TO_DETECTION_NAME_DICT = { - 'car': 'car', - 'truck': 'truck', + "car": "car", + "truck": "truck", "bicycle": "bicycle", "trafficLight": "traffic light", "trailer": "trailer", diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py index c7653cc2..09975ca5 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py @@ -1,26 +1,25 @@ import os +import xml.etree.ElementTree as ET from pathlib import Path from typing import List import geopandas as gpd -import numpy as np import pandas as pd -import xml.etree.ElementTree as ET import shapely.geometry as geom +from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D +from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( get_road_edge_linear_rings, split_line_geometry_by_max_length, ) -from py123d.datatypes.maps.map_datatypes import RoadEdgeType -from py123d.geometry.polyline import Polyline3D -from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D -from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.datatypes.maps.cache.cache_map_objects import ( CacheGenericDrivable, - CacheWalkway, CacheRoadEdge, + CacheWalkway, ) +from py123d.datatypes.maps.map_datatypes import RoadEdgeType +from py123d.geometry.polyline import Polyline3D MAX_ROAD_EDGE_LENGTH = 100.0 # meters, used to filter out very long road edges @@ -38,6 +37,7 @@ # "driveway", ] + def _get_none_data() -> gpd.GeoDataFrame: ids = [] geometries = [] @@ -45,6 +45,7 @@ def _get_none_data() -> gpd.GeoDataFrame: gdf = gpd.GeoDataFrame(data, geometry=geometries) return gdf + def _extract_generic_drivable_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: ids: List[int] = [] outlines: List[geom.LineString] = [] @@ -59,6 +60,7 @@ def _extract_generic_drivable_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoData gdf = gpd.GeoDataFrame(data, geometry=geometries) return gdf + def _extract_walkway_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: ids: List[int] = [] outlines: List[geom.LineString] = [] @@ -74,6 +76,7 @@ def _extract_walkway_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: gdf = gpd.GeoDataFrame(data, geometry=geometries) return gdf + def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: geometries: List[geom.Polygon] = [] for obj in objs: @@ -97,63 +100,52 @@ def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWrite """ Convert KITTI-360 map data using the provided map writer. This function extracts map data from KITTI-360 XML files and writes them using the map writer interface. - + :param log_name: The name of the log to convert :param map_writer: The map writer to use for writing the converted map """ xml_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" if not xml_path.exists(): xml_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" - + if not xml_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") - + tree = ET.parse(xml_path) root = tree.getroot() objs: List[KITTI360_MAP_Bbox3D] = [] - + for child in root: - label = child.find('label').text + label = child.find("label").text if child.find("transform") is None or label not in KITTI360_MAP_BBOX: continue obj = KITTI360_MAP_Bbox3D() obj.parseBbox(child) objs.append(obj) - generic_drivable_gdf = _extract_generic_drivable_df(objs) walkway_gdf = _extract_walkway_df(objs) road_edge_gdf = _extract_road_edge_df(objs) - + for idx, row in generic_drivable_gdf.iterrows(): if not row.geometry.is_empty: - map_writer.write_generic_drivable( - CacheGenericDrivable( - object_id=idx, - geometry=row.geometry - ) - ) - + map_writer.write_generic_drivable(CacheGenericDrivable(object_id=idx, geometry=row.geometry)) + for idx, row in walkway_gdf.iterrows(): if not row.geometry.is_empty: - map_writer.write_walkway( - CacheWalkway( - object_id=idx, - geometry=row.geometry - ) - ) - + map_writer.write_walkway(CacheWalkway(object_id=idx, geometry=row.geometry)) + for idx, row in road_edge_gdf.iterrows(): if not row.geometry.is_empty: - if hasattr(row.geometry, 'exterior'): + if hasattr(row.geometry, "exterior"): road_edge_line = row.geometry.exterior else: road_edge_line = row.geometry - + map_writer.write_road_edge( CacheRoadEdge( object_id=idx, road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, - polyline=Polyline3D.from_linestring(road_edge_line) + polyline=Polyline3D.from_linestring(road_edge_line), ) - ) \ No newline at end of file + ) diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py b/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py index 46318ea8..5a0cf7e1 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py +++ b/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py @@ -1,34 +1,29 @@ +import logging from pathlib import Path - from typing import Dict -import numpy as np -import logging -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType -from py123d.conversion.datasets.kitti_360.kitti_360_helper import get_lidar_extrinsic -def load_kitti360_lidar_pcs_from_file(filepath: Path) -> Dict[LiDARType, np.ndarray]: - if not filepath.exists(): - logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") - return {LiDARType.LIDAR_TOP: np.zeros((1, 4), dtype=np.float32)} - - pcd = np.fromfile(filepath, dtype=np.float32) - pcd = np.reshape(pcd,[-1,4]) # [N,4] - - xyz = pcd[:, :3] - intensity = pcd[:, 3] - - ones = np.ones((xyz.shape[0], 1), dtype=pcd.dtype) - points_h = np.concatenate([xyz, ones], axis=1) #[N,4] +import numpy as np - transformed_h = get_lidar_extrinsic() @ points_h.T #[4,N] - # transformed_h = lidar_metadata.extrinsic.transformation_matrix @ points_h.T #[4,N] +from py123d.datatypes.scene.scene_metadata import LogMetadata +from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar.lidar_index import Kitti360LidarIndex +from py123d.geometry.se import StateSE3 +from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins - transformed_xyz = transformed_h[:3, :] # (3,N) - intensity_row = intensity[np.newaxis, :] # (1,N) +def load_kitti360_lidar_pcs_from_file(filepath: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: + if not filepath.exists(): + logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") + return {LiDARType.LIDAR_TOP: np.zeros((1, len(Kitti360LidarIndex)), dtype=np.float32)} - point_cloud_4xN = np.vstack([transformed_xyz, intensity_row]).astype(np.float32) # (4,N) + lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic + lidar_pc = np.fromfile(filepath, dtype=np.float32) + lidar_pc = np.reshape(lidar_pc, [-1, len(Kitti360LidarIndex)]) - point_cloud_Nx4 = point_cloud_4xN.T # (N,4) + lidar_pc[..., Kitti360LidarIndex.XYZ] = convert_points_3d_array_between_origins( + from_origin=lidar_extrinsic, + to_origin=StateSE3(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0), + points_3d_array=lidar_pc[..., Kitti360LidarIndex.XYZ], + ) - return {LiDARType.LIDAR_TOP: point_cloud_Nx4} + return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py b/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py index 2f959b06..e99d6af5 100644 --- a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py +++ b/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py @@ -9,18 +9,17 @@ """ from __future__ import annotations + +import concurrent.futures +import logging import os import pickle -import logging -import copy +import xml.etree.ElementTree as ET from pathlib import Path -from typing import Dict, List, Tuple, Optional, Any -from collections import defaultdict -import concurrent.futures +from typing import Any, Dict, List, Optional, Tuple import numpy as np import numpy.typing as npt -import xml.etree.ElementTree as ET KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) DIR_3D_RAW = "data_3d_raw" @@ -31,22 +30,34 @@ PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360Bbox3D, KITTI3602NUPLAN_IMU_CALIBRATION, get_lidar_extrinsic -from py123d.conversion.datasets.kitti_360.kitti_360_labels import KITTI360_DETECTION_NAME_DICT, kittiId2label, BBOX_LABLES_TO_DETECTION_NAME_DICT +from py123d.conversion.datasets.kitti_360.kitti_360_helper import ( + KITTI3602NUPLAN_IMU_CALIBRATION, + KITTI360Bbox3D, + get_lidar_extrinsic, +) +from py123d.conversion.datasets.kitti_360.kitti_360_labels import ( + BBOX_LABLES_TO_DETECTION_NAME_DICT, + KITTI360_DETECTION_NAME_DICT, + kittiId2label, +) + def _bbox_xml_path(log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": return PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" return PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + def _lidar_frame_path(log_name: str, frame_idx: int) -> Path: return PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" + def _load_lidar_xyz(filepath: Path) -> np.ndarray: """Load one LiDAR frame and return Nx3 xyz.""" arr = np.fromfile(filepath, dtype=np.float32) return arr.reshape(-1, 4)[:, :3] + def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: """Parse XML and collect static objects with valid class names.""" xml_path = _bbox_xml_path(log_name) @@ -58,13 +69,13 @@ def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: static_objs: List[KITTI360Bbox3D] = [] for child in root: - if child.find('semanticId') is not None: - semanticIdKITTI = int(child.find('semanticId').text) + if child.find("semanticId") is not None: + semanticIdKITTI = int(child.find("semanticId").text) name = kittiId2label[semanticIdKITTI].name else: - lable = child.find('label').text - name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, 'unknown') - timestamp = int(child.find('timestamp').text) # -1 for static objects + lable = child.find("label").text + name = BBOX_LABLES_TO_DETECTION_NAME_DICT.get(lable, "unknown") + timestamp = int(child.find("timestamp").text) # -1 for static objects if child.find("transform") is None or name not in KITTI360_DETECTION_NAME_DICT or timestamp != -1: continue obj = KITTI360Bbox3D() @@ -72,17 +83,18 @@ def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: static_objs.append(obj) return static_objs + def _collect_ego_states(log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: """Load ego states from poses.txt.""" pose_file = PATH_POSES_ROOT / log_name / "poses.txt" if not pose_file.exists(): raise FileNotFoundError(f"Pose file not found: {pose_file}") - + poses = np.loadtxt(pose_file) poses_time = poses[:, 0].astype(np.int32) valid_timestamp: List[int] = list(poses_time) - + ego_states = [] for time_idx in range(len(valid_timestamp)): pos = time_idx @@ -90,15 +102,15 @@ def _collect_ego_states(log_name: str) -> Tuple[npt.NDArray[np.float64], list[in r00, r01, r02 = poses[pos, 1:4] r10, r11, r12 = poses[pos, 5:8] r20, r21, r22 = poses[pos, 9:12] - R_mat = np.array([[r00, r01, r02], - [r10, r11, r12], - [r20, r21, r22]], dtype=np.float64) - R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3,:3] - ego_state_xyz = np.array([ - poses[pos, 4], - poses[pos, 8], - poses[pos, 12], - ]) + R_mat = np.array([[r00, r01, r02], [r10, r11, r12], [r20, r21, r22]], dtype=np.float64) + R_mat_cali = R_mat @ KITTI3602NUPLAN_IMU_CALIBRATION[:3, :3] + ego_state_xyz = np.array( + [ + poses[pos, 4], + poses[pos, 8], + poses[pos, 12], + ] + ) state_item[:3, :3] = R_mat_cali state_item[:3, 3] = ego_state_xyz @@ -147,23 +159,25 @@ def process_one_frame(time_idx: int) -> None: if not lidar_path.exists(): logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") return - + lidar_xyz = _load_lidar_xyz(lidar_path) # lidar to pose lidar_h = np.concatenate((lidar_xyz, np.ones((lidar_xyz.shape[0], 1), dtype=lidar_xyz.dtype)), axis=1) lidar_in_imu = lidar_h @ lidar_extrinsic.T - lidar_in_imu = lidar_in_imu[:,:3] + lidar_in_imu = lidar_in_imu[:, :3] # pose to world - lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3,:3].T + ego_states[time_idx][:3,3] + lidar_in_world = lidar_in_imu @ ego_states[time_idx][:3, :3].T + ego_states[time_idx][:3, 3] for obj in static_objs: if not any(record["timestamp"] == valid_time_idx for record in obj.valid_frames["records"]): continue visible, points_in_box = obj.box_visible_in_point_cloud(lidar_in_world) if not visible: - obj.valid_frames["records"] = [record for record in obj.valid_frames["records"] if record["timestamp"] != valid_time_idx] + obj.valid_frames["records"] = [ + record for record in obj.valid_frames["records"] if record["timestamp"] != valid_time_idx + ] else: for record in obj.valid_frames["records"]: if record["timestamp"] == valid_time_idx: @@ -172,7 +186,7 @@ def process_one_frame(time_idx: int) -> None: max_workers = os.cpu_count() * 2 with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: - results = list(executor.map(process_one_frame, range(len(valid_timestamp)))) + list(executor.map(process_one_frame, range(len(valid_timestamp)))) # 4) Save pickle static_records: List[Dict[str, Any]] = [] @@ -192,8 +206,10 @@ def process_one_frame(time_idx: int) -> None: pickle.dump(payload, f) logging.info(f"[preprocess] saved: {out_path}") + if __name__ == "__main__": import argparse + logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") diff --git a/src/py123d/conversion/log_writer/arrow_log_writer.py b/src/py123d/conversion/log_writer/arrow_log_writer.py index 46d39b75..532b7dda 100644 --- a/src/py123d/conversion/log_writer/arrow_log_writer.py +++ b/src/py123d/conversion/log_writer/arrow_log_writer.py @@ -15,8 +15,8 @@ from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import add_log_metadata_to_arrow_schema from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType from py123d.datatypes.sensors.lidar.lidar import LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3, EgoStateSE3Index diff --git a/src/py123d/conversion/map_writer/gpkg_map_writer.py b/src/py123d/conversion/map_writer/gpkg_map_writer.py index 5e68a411..d5acf041 100644 --- a/src/py123d/conversion/map_writer/gpkg_map_writer.py +++ b/src/py123d/conversion/map_writer/gpkg_map_writer.py @@ -188,45 +188,66 @@ def _write_line_layer(self, layer: MapLayer, line_object: AbstractLineMapObject) self._map_data[layer]["geometry"].append(line_object.shapely_linestring) -def _map_ids_to_integer( - map_dfs: Dict[MapLayer, gpd.GeoDataFrame], -) -> None: +def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: + """Helper function to remap string IDs to integers in the map dataframes.""" # initialize id mappings lane_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE]["id"]) + lane_group_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE_GROUP]["id"]) + intersection_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.INTERSECTION]["id"]) + walkway_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.WALKWAY]["id"]) carpark_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.CARPARK]["id"]) generic_drivable_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.GENERIC_DRIVABLE]["id"]) - lane_group_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.LANE_GROUP]["id"]) - - # Adjust cross reference in map_dfs[MapLayer.LANE] and map_dfs[MapLayer.LANE_GROUP] - map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].map( - lane_group_id_mapping.str_to_int - ) - map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( - lambda x: lane_id_mapping.map_list(x) - ) - - # Adjust predecessor/successor in map_dfs[MapLayer.LANE] and map_dfs[MapLayer.LANE_GROUP] - for column in ["predecessor_ids", "successor_ids"]: - map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) - map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( + road_line_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_LINE]["id"]) + road_edge_id_mapping = IntIDMapping.from_series(map_dfs[MapLayer.ROAD_EDGE]["id"]) + + # 1. Remap lane ids in LANE layer + if len(map_dfs[MapLayer.LANE]) > 0: + map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].map(lane_id_mapping.str_to_int) + map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].map( + lane_group_id_mapping.str_to_int + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) + for column in ["left_lane_id", "right_lane_id"]: + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply( + lambda x: str(lane_id_mapping.str_to_int[x]) if pd.notna(x) and x is not None else x + ) + + # 2. Remap lane group ids in LANE_GROUP + if len(map_dfs[MapLayer.LANE_GROUP]) > 0: + map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].map(lane_group_id_mapping.str_to_int) + map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( + lambda x: lane_id_mapping.map_list(x) + ) + map_dfs[MapLayer.LANE_GROUP]["intersection_id"] = map_dfs[MapLayer.LANE_GROUP]["intersection_id"].map( + intersection_id_mapping.str_to_int + ) + for column in ["predecessor_ids", "successor_ids"]: + map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( + lambda x: lane_group_id_mapping.map_list(x) + ) + + # 3. Remap lane group ids in INTERSECTION + if len(map_dfs[MapLayer.INTERSECTION]) > 0: + map_dfs[MapLayer.INTERSECTION]["id"] = map_dfs[MapLayer.INTERSECTION]["id"].map( + intersection_id_mapping.str_to_int + ) + map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( lambda x: lane_group_id_mapping.map_list(x) ) - for column in ["left_lane_id", "right_lane_id"]: - map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply( - lambda x: str(lane_id_mapping.str_to_int[x]) if pd.notna(x) and x is not None else x + # 4. Remap ids in other layers + if len(map_dfs[MapLayer.WALKWAY]) > 0: + map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].map(walkway_id_mapping.str_to_int) + if len(map_dfs[MapLayer.CARPARK]) > 0: + map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].map(carpark_id_mapping.str_to_int) + if len(map_dfs[MapLayer.GENERIC_DRIVABLE]) > 0: + map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].map( + generic_drivable_id_mapping.str_to_int ) - - map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].map(lane_id_mapping.str_to_int) - map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].map(walkway_id_mapping.str_to_int) - map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].map(carpark_id_mapping.str_to_int) - map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].map( - generic_drivable_id_mapping.str_to_int - ) - map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].map(lane_group_id_mapping.str_to_int) - - map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( - lambda x: lane_group_id_mapping.map_list(x) - ) + if len(map_dfs[MapLayer.ROAD_LINE]) > 0: + map_dfs[MapLayer.ROAD_LINE]["id"] = map_dfs[MapLayer.ROAD_LINE]["id"].map(road_line_id_mapping.str_to_int) + if len(map_dfs[MapLayer.ROAD_EDGE]) > 0: + map_dfs[MapLayer.ROAD_EDGE]["id"] = map_dfs[MapLayer.ROAD_EDGE]["id"].map(road_edge_id_mapping.str_to_int) diff --git a/src/py123d/conversion/registry/lidar_index_registry.py b/src/py123d/conversion/registry/lidar_index_registry.py index bbc97ab4..7a7891f8 100644 --- a/src/py123d/conversion/registry/lidar_index_registry.py +++ b/src/py123d/conversion/registry/lidar_index_registry.py @@ -60,6 +60,7 @@ class WOPDLidarIndex(LiDARIndex): Y = 4 Z = 5 + @register_lidar_index class Kitti360LidarIndex(LiDARIndex): X = 0 @@ -67,6 +68,7 @@ class Kitti360LidarIndex(LiDARIndex): Z = 2 INTENSITY = 3 + @register_lidar_index class AVSensorLidarIndex(LiDARIndex): """Argoverse Sensor LiDAR Indexing Scheme. diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py index 2cd2a0e5..cd918c05 100644 --- a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -36,7 +36,7 @@ def load_lidar_pcs_from_file( assert sensor_root is not None, f"Dataset path for sensor loading not found for dataset: {log_metadata.dataset}" full_lidar_path = Path(sensor_root) / relative_path - assert full_lidar_path.exists(), f"LiDAR file not found: {full_lidar_path}" + assert full_lidar_path.exists(), f"LiDAR file not found: {sensor_root} / {relative_path}" # NOTE: We move data specific import into if-else block, to avoid data specific import errors if log_metadata.dataset == "nuplan": @@ -58,17 +58,17 @@ def load_lidar_pcs_from_file( from py123d.conversion.datasets.pandaset.pandaset_sensor_io import load_pandaset_lidars_pcs_from_file lidar_pcs_dict = load_pandaset_lidars_pcs_from_file(full_lidar_path, index) - + elif log_metadata.dataset == "kitti360": from py123d.conversion.datasets.kitti_360.kitti_360_sensor_io import load_kitti360_lidar_pcs_from_file - - lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path) - + + lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path, log_metadata) + elif log_metadata.dataset == "nuscenes": from py123d.conversion.datasets.nuscenes.nuscenes_sensor_io import load_nuscenes_lidar_pcs_from_file lidar_pcs_dict = load_nuscenes_lidar_pcs_from_file(full_lidar_path, log_metadata) - + else: raise NotImplementedError(f"Loading LiDAR data for dataset {log_metadata.dataset} is not implemented.") diff --git a/src/py123d/datatypes/maps/abstract_map_objects.py b/src/py123d/datatypes/maps/abstract_map_objects.py index de43bc81..baea8d87 100644 --- a/src/py123d/datatypes/maps/abstract_map_objects.py +++ b/src/py123d/datatypes/maps/abstract_map_objects.py @@ -2,11 +2,10 @@ import abc from typing import List, Optional, Tuple, Union -from typing_extensions import TypeAlias - import shapely.geometry as geom import trimesh +from typing_extensions import TypeAlias from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType from py123d.geometry import Polyline2D, Polyline3D, PolylineSE2 diff --git a/src/py123d/datatypes/scene/abstract_scene.py b/src/py123d/datatypes/scene/abstract_scene.py index 8bb2d381..cdad4033 100644 --- a/src/py123d/datatypes/scene/abstract_scene.py +++ b/src/py123d/datatypes/scene/abstract_scene.py @@ -7,8 +7,8 @@ from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.maps.abstract_map import AbstractMap from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -54,7 +54,9 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: raise NotImplementedError @abc.abstractmethod - def get_camera_at_iteration(self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType]) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + def get_camera_at_iteration( + self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] + ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: raise NotImplementedError @abc.abstractmethod diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene.py b/src/py123d/datatypes/scene/arrow/arrow_scene.py index 87aa038f..a3c4db55 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene.py @@ -19,8 +19,8 @@ ) from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -38,11 +38,11 @@ def __init__( self._log_metadata: LogMetadata = get_log_metadata_from_arrow(arrow_file_path) with pa.memory_map(str(self._arrow_file_path), "r") as source: - reader = pa.ipc.open_file(source) - table = reader.read_all() + reader = pa.ipc.open_file(source) + table = reader.read_all() num_rows = table.num_rows - initial_uuid = table['uuid'][0].as_py() - + initial_uuid = table["uuid"][0].as_py() + if scene_extraction_metadata is None: scene_extraction_metadata = SceneExtractionMetadata( initial_uuid=initial_uuid, @@ -128,7 +128,9 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: route_lane_group_ids = table["route_lane_group_ids"][self._get_table_index(iteration)].as_py() return route_lane_group_ids - def get_camera_at_iteration(self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType]) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + def get_camera_at_iteration( + self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] + ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: camera: Optional[Union[PinholeCamera, FisheyeMEICamera]] = None if camera_type in self.available_camera_types: camera = get_camera_from_arrow_table( diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py index 2d3eb1aa..1631b9e3 100644 --- a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -23,8 +23,8 @@ TrafficLightStatus, ) from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType from py123d.datatypes.sensors.lidar.lidar_index import DefaultLidarIndex from py123d.datatypes.time.time_point import TimePoint @@ -138,7 +138,7 @@ def get_camera_from_arrow_table( raise NotImplementedError("Only string file paths for camera data are supported.") camera_metadata = log_metadata.camera_metadata[camera_type] - if hasattr(camera_metadata, 'mirror_parameter') and camera_metadata.mirror_parameter is not None: + if hasattr(camera_metadata, "mirror_parameter") and camera_metadata.mirror_parameter is not None: return FisheyeMEICamera( metadata=camera_metadata, image=image, diff --git a/src/py123d/datatypes/scene/scene_filter.py b/src/py123d/datatypes/scene/scene_filter.py index f3d516a2..d4bada57 100644 --- a/src/py123d/datatypes/scene/scene_filter.py +++ b/src/py123d/datatypes/scene/scene_filter.py @@ -1,9 +1,9 @@ from dataclasses import dataclass from typing import List, Optional, Union -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType -from py123d.datatypes.sensors.camera.utils import get_camera_type_by_value, deserialize_camera_type +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.camera.utils import deserialize_camera_type, get_camera_type_by_value # TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) diff --git a/src/py123d/datatypes/scene/scene_metadata.py b/src/py123d/datatypes/scene/scene_metadata.py index ee91c70a..c7f4ae76 100644 --- a/src/py123d/datatypes/scene/scene_metadata.py +++ b/src/py123d/datatypes/scene/scene_metadata.py @@ -1,12 +1,12 @@ from __future__ import annotations from dataclasses import asdict, dataclass, field -from typing import Dict, Union, Optional +from typing import Dict, Optional, Union import py123d from py123d.datatypes.maps.map_metadata import MapMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraMetadata, PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraMetadata, PinholeCameraType from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -21,7 +21,9 @@ class LogMetadata: timestep_seconds: float vehicle_parameters: Optional[VehicleParameters] = None - camera_metadata: Union[Dict[PinholeCameraType, PinholeCameraMetadata], Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata]] = field(default_factory=dict) + camera_metadata: Union[ + Dict[PinholeCameraType, PinholeCameraMetadata], Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] + ] = field(default_factory=dict) lidar_metadata: Dict[LiDARType, LiDARMetadata] = field(default_factory=dict) map_metadata: Optional[MapMetadata] = None diff --git a/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py b/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py index 038ec2a4..afb27960 100644 --- a/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py +++ b/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py @@ -11,12 +11,14 @@ from py123d.common.utils.mixin import ArrayMixin from py123d.geometry.se import StateSE3 + class FisheyeMEICameraType(SerialIntEnum): """ Enum for fisheye cameras in d123. """ - #NOTE Use higher values to avoid conflicts with PinholeCameraType - CAM_L = 10 + + # NOTE Use higher values to avoid conflicts with PinholeCameraType + CAM_L = 10 CAM_R = 11 @@ -138,10 +140,14 @@ class FisheyeMEICameraMetadata: def from_dict(cls, data_dict: Dict[str, Any]) -> FisheyeMEICameraMetadata: data_dict["camera_type"] = FisheyeMEICameraType(data_dict["camera_type"]) data_dict["distortion"] = ( - FisheyeMEIDistortion.from_array(np.array(data_dict["distortion"])) if data_dict["distortion"] is not None else None + FisheyeMEIDistortion.from_array(np.array(data_dict["distortion"])) + if data_dict["distortion"] is not None + else None ) data_dict["projection"] = ( - FisheyeMEIProjection.from_array(np.array(data_dict["projection"])) if data_dict["projection"] is not None else None + FisheyeMEIProjection.from_array(np.array(data_dict["projection"])) + if data_dict["projection"] is not None + else None ) return FisheyeMEICameraMetadata(**data_dict) @@ -153,15 +159,15 @@ def to_dict(self) -> Dict[str, Any]: return data_dict def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float64]: - ''' camera coordinate to image plane ''' + """camera coordinate to image plane""" norm = np.linalg.norm(points_3d, axis=1) - x = points_3d[:,0] / norm - y = points_3d[:,1] / norm - z = points_3d[:,2] / norm + x = points_3d[:, 0] / norm + y = points_3d[:, 1] / norm + z = points_3d[:, 2] / norm - x /= z+self.mirror_parameter - y /= z+self.mirror_parameter + x /= z + self.mirror_parameter + y /= z + self.mirror_parameter if self.distortion is not None: k1 = self.distortion.k1 @@ -178,11 +184,11 @@ def cam2image(self, points_3d: npt.NDArray[np.float64]) -> npt.NDArray[np.float6 gamma1 = gamma2 = 1.0 u0 = v0 = 0.0 - ro2 = x*x + y*y - x *= 1 + k1*ro2 + k2*ro2*ro2 - y *= 1 + k1*ro2 + k2*ro2*ro2 + ro2 = x * x + y * y + x *= 1 + k1 * ro2 + k2 * ro2 * ro2 + y *= 1 + k1 * ro2 + k2 * ro2 * ro2 - x = gamma1*x + u0 - y = gamma2*y + v0 + x = gamma1 * x + u0 + y = gamma2 * y + v0 - return x, y, norm * points_3d[:,2] / np.abs(points_3d[:,2]) + return x, y, norm * points_3d[:, 2] / np.abs(points_3d[:, 2]) diff --git a/src/py123d/datatypes/sensors/camera/utils.py b/src/py123d/datatypes/sensors/camera/utils.py index 504d1e46..9ed591b0 100644 --- a/src/py123d/datatypes/sensors/camera/utils.py +++ b/src/py123d/datatypes/sensors/camera/utils.py @@ -1,21 +1,25 @@ from typing import Union -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType + def get_camera_type_by_value(value: int) -> Union[PinholeCameraType, FisheyeMEICameraType]: """Dynamically determine camera type based on value range.""" pinhole_values = [member.value for member in PinholeCameraType] fisheye_values = [member.value for member in FisheyeMEICameraType] - + if value in pinhole_values: return PinholeCameraType(value) elif value in fisheye_values: return FisheyeMEICameraType(value) else: - raise ValueError(f"Invalid camera type value: {value}. " - f"Valid PinholeCameraType values: {pinhole_values}, " - f"Valid FisheyeMEICameraType values: {fisheye_values}") + raise ValueError( + f"Invalid camera type value: {value}. " + f"Valid PinholeCameraType values: {pinhole_values}, " + f"Valid FisheyeMEICameraType values: {fisheye_values}" + ) + def deserialize_camera_type(camera_str: str) -> Union[PinholeCameraType, FisheyeMEICameraType]: """Deserialize camera type string to appropriate enum.""" @@ -23,14 +27,16 @@ def deserialize_camera_type(camera_str: str) -> Union[PinholeCameraType, Fisheye return PinholeCameraType.deserialize(camera_str) except (ValueError, KeyError): pass - + try: return FisheyeMEICameraType.deserialize(camera_str) except (ValueError, KeyError): pass - + pinhole_names = [member.name.lower() for member in PinholeCameraType] fisheye_names = [member.name.lower() for member in FisheyeMEICameraType] - raise ValueError(f"Unknown camera type: '{camera_str}'. " - f"Valid PinholeCameraType names: {pinhole_names}, " - f"Valid FisheyeMEICameraType names: {fisheye_names}") \ No newline at end of file + raise ValueError( + f"Unknown camera type: '{camera_str}'. " + f"Valid PinholeCameraType names: {pinhole_names}, " + f"Valid FisheyeMEICameraType names: {fisheye_names}" + ) diff --git a/src/py123d/datatypes/vehicle_state/vehicle_parameters.py b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py index b68d1125..ca2a1944 100644 --- a/src/py123d/datatypes/vehicle_state/vehicle_parameters.py +++ b/src/py123d/datatypes/vehicle_state/vehicle_parameters.py @@ -92,19 +92,24 @@ def get_wopd_chrysler_pacifica_parameters() -> VehicleParameters: rear_axle_to_center_longitudinal=1.461, ) -def get_kitti360_station_wagon_parameters() -> VehicleParameters: - #NOTE: Parameters are estimated from the vehicle model. - #https://www.cvlibs.net/datasets/kitti-360/documentation.php + +def get_kitti360_vw_passat_parameters() -> VehicleParameters: + # The KITTI-360 dataset uses a 2006 VW Passat Variant B6. + # https://en.wikipedia.org/wiki/Volkswagen_Passat_(B6) + # [1] https://scispace.com/pdf/team-annieway-s-autonomous-system-18ql8b7kki.pdf + # NOTE: Parameters are estimated from the vehicle model. + # https://www.cvlibs.net/datasets/kitti-360/documentation.php return VehicleParameters( - vehicle_name="kitti360_station_wagon", - width=1.800, - length=3.500, - height=1.400, - wheel_base=2.710, - rear_axle_to_center_vertical=0.45, - rear_axle_to_center_longitudinal=2.71/2 + 0.05, + vehicle_name="kitti360_vw_passat", + width=1.820, + length=4.775, + height=1.516, + wheel_base=2.709, + rear_axle_to_center_vertical=1.516 / 2 - 0.9, + rear_axle_to_center_longitudinal=1.3369, ) + def get_av2_ford_fusion_hybrid_parameters() -> VehicleParameters: # NOTE: Parameters are estimated from the vehicle model. # https://en.wikipedia.org/wiki/Ford_Fusion_Hybrid#Second_generation diff --git a/src/py123d/geometry/transform/transform_se3.py b/src/py123d/geometry/transform/transform_se3.py index 8bf907ba..8f394772 100644 --- a/src/py123d/geometry/transform/transform_se3.py +++ b/src/py123d/geometry/transform/transform_se3.py @@ -206,10 +206,11 @@ def convert_points_3d_array_between_origins( assert points_3d_array.ndim >= 1 assert points_3d_array.shape[-1] == len(Point3DIndex) - abs_points = points_3d_array @ R_from.T + t_from - new_rel_points = (abs_points - t_to) @ R_to + R_rel = R_to.T @ R_from # Relative rotation matrix + t_rel = R_to.T @ (t_from - t_to) # Relative translation - return new_rel_points + conv_points_3d_array = (R_rel @ points_3d_array.T).T + t_rel + return conv_points_3d_array def translate_se3_along_z(state_se3: StateSE3, distance: float) -> StateSE3: diff --git a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml index 1fadc982..ae1b1033 100644 --- a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml +++ b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml @@ -4,4 +4,3 @@ _convert_: 'all' # dataset_path: ${dataset_paths.py123d_data_root} logs_root: ${dataset_paths.py123d_logs_root} maps_root: ${dataset_paths.py123d_maps_root} - diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml index 77cea31c..e85cfcab 100644 --- a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -14,7 +14,7 @@ kitti360_dataset: # Map include_map: true - + # Ego include_ego: true diff --git a/src/py123d/script/run_conversion.py b/src/py123d/script/run_conversion.py index bb4fe510..c2510b9b 100644 --- a/src/py123d/script/run_conversion.py +++ b/src/py123d/script/run_conversion.py @@ -37,10 +37,17 @@ def main(cfg: DictConfig) -> None: logger.info(f"Processing dataset: {dataset_converter.__class__.__name__}") map_args = [{"map_index": i} for i in range(dataset_converter.get_number_of_maps())] + logger.info( + f"Found maps: {dataset_converter.get_number_of_maps()} for dataset: {dataset_converter.__class__.__name__}" + ) + worker_map(worker, partial(_convert_maps, cfg=cfg, dataset_converter=dataset_converter), map_args) logger.info(f"Finished maps: {dataset_converter.__class__.__name__}") log_args = [{"log_index": i} for i in range(dataset_converter.get_number_of_logs())] + logger.info( + f"Found logs: {dataset_converter.get_number_of_logs()} for dataset: {dataset_converter.__class__.__name__}" + ) worker_map(worker, partial(_convert_logs, cfg=cfg, dataset_converter=dataset_converter), log_args) logger.info(f"Finished logs: {dataset_converter.__class__.__name__}") diff --git a/src/py123d/script/run_viser.py b/src/py123d/script/run_viser.py index 302d3ce7..cc89e024 100644 --- a/src/py123d/script/run_viser.py +++ b/src/py123d/script/run_viser.py @@ -36,4 +36,4 @@ def main(cfg: DictConfig) -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/src/py123d/visualization/matplotlib/utils.py b/src/py123d/visualization/matplotlib/utils.py index 3c552eac..81c60260 100644 --- a/src/py123d/visualization/matplotlib/utils.py +++ b/src/py123d/visualization/matplotlib/utils.py @@ -34,19 +34,20 @@ def _add_element_helper(element: geom.Polygon): # Create path with exterior and interior rings def create_polygon_path(polygon): # Get exterior coordinates - exterior_coords = list(polygon.exterior.coords) + # NOTE: Only take first two dimensions in case of 3D coords + exterior_coords = np.array(polygon.exterior.coords)[:, :2].tolist() # Start with exterior ring - vertices = exterior_coords + vertices_2d = exterior_coords codes = [Path.MOVETO] + [Path.LINETO] * (len(exterior_coords) - 2) + [Path.CLOSEPOLY] # Add interior rings (holes) for interior in polygon.interiors: interior_coords = list(interior.coords) - vertices.extend(interior_coords) + vertices_2d.extend(interior_coords) codes.extend([Path.MOVETO] + [Path.LINETO] * (len(interior_coords) - 2) + [Path.CLOSEPOLY]) - return Path(vertices, codes) + return Path(vertices_2d, codes) path = create_polygon_path(element) diff --git a/test_viser.py b/test_viser.py index d5375bd7..6db46ac4 100644 --- a/test_viser.py +++ b/test_viser.py @@ -6,7 +6,8 @@ # from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType if __name__ == "__main__": - splits = ["nuscenes-mini_val", "nuscenes-mini_train"] + splits = ["kitti360"] + # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] # splits = ["nuplan-mini_test", "nuplan-mini_train", "nuplan-mini_val"] # splits = ["nuplan_private_test"] # splits = ["carla_test"] @@ -14,17 +15,16 @@ # splits = ["av2-sensor_train"] # splits = ["pandaset_test", "pandaset_val", "pandaset_train"] # log_names = ["2021.08.24.13.12.55_veh-45_00386_00472"] - log_names = None - + log_names = ["2013_05_28_drive_0000_sync"] scene_uuids = None scene_filter = SceneFilter( split_names=splits, log_names=log_names, scene_uuids=scene_uuids, - duration_s=None, + duration_s=10.0, history_s=0.0, - timestamp_threshold_s=10.0, + timestamp_threshold_s=30.0, shuffle=True, # camera_types=[PinholeCameraType.CAM_F0], ) From a80de1d5e4e818ef55fd1ba55483e6c0aaddb329 Mon Sep 17 00:00:00 2001 From: Daniel Dauner Date: Sat, 1 Nov 2025 21:52:08 +0100 Subject: [PATCH 29/32] Remove the underscore for consistent `kitti360` naming in the package. --- .../datasets/{kitti_360 => kitti360}/__init__.py | 0 .../kitti360_converter.py} | 10 +++++----- .../kitti360_map_conversion.py} | 2 +- .../kitti360_sensor_io.py} | 0 .../conversion/datasets/kitti360/utils/__init__.py | 0 .../utils/kitti360_helper.py} | 2 +- .../utils/kitti360_labels.py} | 0 .../utils}/preprocess_detection.py | 4 ++-- src/py123d/conversion/sensor_io/lidar/file_lidar_io.py | 2 +- .../config/conversion/datasets/kitti360_dataset.yaml | 2 +- 10 files changed, 11 insertions(+), 11 deletions(-) rename src/py123d/conversion/datasets/{kitti_360 => kitti360}/__init__.py (100%) rename src/py123d/conversion/datasets/{kitti_360/kitti_360_data_converter.py => kitti360/kitti360_converter.py} (98%) rename src/py123d/conversion/datasets/{kitti_360/kitti_360_map_conversion.py => kitti360/kitti360_map_conversion.py} (98%) rename src/py123d/conversion/datasets/{kitti_360/kitti_360_sensor_io.py => kitti360/kitti360_sensor_io.py} (100%) create mode 100644 src/py123d/conversion/datasets/kitti360/utils/__init__.py rename src/py123d/conversion/datasets/{kitti_360/kitti_360_helper.py => kitti360/utils/kitti360_helper.py} (98%) rename src/py123d/conversion/datasets/{kitti_360/kitti_360_labels.py => kitti360/utils/kitti360_labels.py} (100%) rename src/py123d/conversion/datasets/{kitti_360 => kitti360/utils}/preprocess_detection.py (98%) diff --git a/src/py123d/conversion/datasets/kitti_360/__init__.py b/src/py123d/conversion/datasets/kitti360/__init__.py similarity index 100% rename from src/py123d/conversion/datasets/kitti_360/__init__.py rename to src/py123d/conversion/datasets/kitti360/__init__.py diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py similarity index 98% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py rename to src/py123d/conversion/datasets/kitti360/kitti360_converter.py index d4d17d99..2bda2124 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_data_converter.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py @@ -13,18 +13,18 @@ from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig -from py123d.conversion.datasets.kitti_360.kitti_360_helper import ( +from py123d.conversion.datasets.kitti360.kitti360_map_conversion import convert_kitti360_map_with_writer +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( KITTI3602NUPLAN_IMU_CALIBRATION, KITTI360Bbox3D, get_lidar_extrinsic, ) -from py123d.conversion.datasets.kitti_360.kitti_360_labels import ( +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( BBOX_LABLES_TO_DETECTION_NAME_DICT, KITTI360_DETECTION_NAME_DICT, kittiId2label, ) -from py123d.conversion.datasets.kitti_360.kitti_360_map_conversion import convert_kitti360_map_with_writer -from py123d.conversion.datasets.kitti_360.preprocess_detection import process_detection +from py123d.conversion.datasets.kitti360.utils.preprocess_detection import process_detection from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.conversion.registry.lidar_index_registry import Kitti360LidarIndex @@ -122,7 +122,7 @@ def get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: ) -class Kitti360DataConverter(AbstractDatasetConverter): +class Kitti360Converter(AbstractDatasetConverter): def __init__( self, splits: List[str], diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py similarity index 98% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py rename to src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py index 09975ca5..08562314 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_map_conversion.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py @@ -7,7 +7,7 @@ import pandas as pd import shapely.geometry as geom -from py123d.conversion.datasets.kitti_360.kitti_360_helper import KITTI360_MAP_Bbox3D +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import KITTI360_MAP_Bbox3D from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import ( get_road_edge_linear_rings, diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py similarity index 100% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_sensor_io.py rename to src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py diff --git a/src/py123d/conversion/datasets/kitti360/utils/__init__.py b/src/py123d/conversion/datasets/kitti360/utils/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py similarity index 98% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py rename to src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py index 09d7d1e4..75eba581 100644 --- a/src/py123d/conversion/datasets/kitti_360/kitti_360_helper.py +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py @@ -6,7 +6,7 @@ import numpy as np from scipy.linalg import polar -from py123d.conversion.datasets.kitti_360.kitti_360_labels import BBOX_LABLES_TO_DETECTION_NAME_DICT, kittiId2label +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import BBOX_LABLES_TO_DETECTION_NAME_DICT, kittiId2label from py123d.geometry import BoundingBoxSE3, StateSE3 from py123d.geometry.polyline import Polyline3D from py123d.geometry.rotation import EulerAngles diff --git a/src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py similarity index 100% rename from src/py123d/conversion/datasets/kitti_360/kitti_360_labels.py rename to src/py123d/conversion/datasets/kitti360/utils/kitti360_labels.py diff --git a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py similarity index 98% rename from src/py123d/conversion/datasets/kitti_360/preprocess_detection.py rename to src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py index e99d6af5..324cb337 100644 --- a/src/py123d/conversion/datasets/kitti_360/preprocess_detection.py +++ b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py @@ -30,12 +30,12 @@ PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES -from py123d.conversion.datasets.kitti_360.kitti_360_helper import ( +from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( KITTI3602NUPLAN_IMU_CALIBRATION, KITTI360Bbox3D, get_lidar_extrinsic, ) -from py123d.conversion.datasets.kitti_360.kitti_360_labels import ( +from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( BBOX_LABLES_TO_DETECTION_NAME_DICT, KITTI360_DETECTION_NAME_DICT, kittiId2label, diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py index cd918c05..ab94e578 100644 --- a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -60,7 +60,7 @@ def load_lidar_pcs_from_file( lidar_pcs_dict = load_pandaset_lidars_pcs_from_file(full_lidar_path, index) elif log_metadata.dataset == "kitti360": - from py123d.conversion.datasets.kitti_360.kitti_360_sensor_io import load_kitti360_lidar_pcs_from_file + from py123d.conversion.datasets.kitti360.kitti360_sensor_io import load_kitti360_lidar_pcs_from_file lidar_pcs_dict = load_kitti360_lidar_pcs_from_file(full_lidar_path, log_metadata) diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml index e85cfcab..729b9587 100644 --- a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -1,5 +1,5 @@ kitti360_dataset: - _target_: py123d.conversion.datasets.kitti_360.kitti_360_data_converter.Kitti360DataConverter + _target_: py123d.conversion.datasets.kitti360.kitti360_data_converter.Kitti360Converter _convert_: 'all' splits: ["kitti360"] From 0455f90ea19c162f0f483696ca7cb08b0d2c00b4 Mon Sep 17 00:00:00 2001 From: Daniel Dauner Date: Sun, 2 Nov 2025 18:37:48 +0100 Subject: [PATCH 30/32] Extract 3D road edges for kitti, and a few minor general changes. --- notebooks/bev_matplotlib.ipynb | 140 ++------------- notebooks/bev_render.ipynb | 35 ++-- notebooks/camera_render.ipynb | 165 ++++++++++++++++++ pyproject.toml | 3 + .../av2/{utils => }/av2_map_conversion.py | 0 .../datasets/av2/av2_sensor_converter.py | 2 +- .../kitti360/kitti360_map_conversion.py | 135 ++++++-------- .../datasets/pandaset/pandaset_converter.py | 4 +- .../datasets/pandaset/pandaset_sensor_io.py | 2 +- .../{ => utils}/pandaset_constants.py | 0 .../pandaset/{ => utils}/pandaset_utlis.py | 0 .../scene_builder/default_scene_builder.yaml | 1 - .../conversion/datasets/kitti360_dataset.yaml | 2 +- src/py123d/visualization/matplotlib/camera.py | 17 +- src/py123d/visualization/matplotlib/plots.py | 3 +- .../viser/elements/render_elements.py | 51 +++++- .../visualization/viser/viser_viewer.py | 64 +++---- test_viser.py | 11 +- 18 files changed, 349 insertions(+), 286 deletions(-) create mode 100644 notebooks/camera_render.ipynb rename src/py123d/conversion/datasets/av2/{utils => }/av2_map_conversion.py (100%) rename src/py123d/conversion/datasets/pandaset/{ => utils}/pandaset_constants.py (100%) rename src/py123d/conversion/datasets/pandaset/{ => utils}/pandaset_utlis.py (100%) diff --git a/notebooks/bev_matplotlib.ipynb b/notebooks/bev_matplotlib.ipynb index 53fdcd15..44acc015 100644 --- a/notebooks/bev_matplotlib.ipynb +++ b/notebooks/bev_matplotlib.ipynb @@ -22,39 +22,31 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "# splits = [\"wopd_val\"]\n", - "# splits = [\"carla_test\"]\n", - "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", - "# splits = [\"av2-sensor-mini_train\"]\n", - "# splits = [\"pandaset_train\"]\n", - "\n", - "# log_names = None\n", - "\n", - "from py123d.common.multithreading.worker_ray import RayDistributed\n", - "\n", - "\n", "splits = [\"kitti360\"]\n", - "\n", - "log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "# splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "log_names = None\n", "scene_uuids = None\n", "\n", "scene_filter = SceneFilter(\n", " split_names=splits,\n", " log_names=log_names,\n", " scene_uuids=scene_uuids,\n", - " duration_s=10.0,\n", + " duration_s=30.0,\n", " history_s=0.0,\n", - " timestamp_threshold_s=30,\n", + " timestamp_threshold_s=30.0,\n", " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", - "# worker = RayDistributed()\n", "scenes = scene_builder.get_scenes(scene_filter, worker)\n", - "\n", - "print(f\"Found {len(scenes)} scenes\")" + "print(f\"Found {len(scenes)} scenes\")\n" ] }, { @@ -291,113 +283,7 @@ "metadata": {}, "outputs": [], "source": [ - "import shapely\n", - "from py123d.conversion.utils.map_utils.road_edge.road_edge_2d_utils import get_road_edge_linear_rings\n", - "\n", - "# from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d\n", - "from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import (\n", - " _interpolate_z_on_segment,\n", - " _split_continuous_segments,\n", - ")\n", - "from py123d.geometry.geometry_index import Point3DIndex\n", - "from py123d.geometry.occupancy_map import OccupancyMap2D\n", - "from py123d.geometry.polyline import Polyline3D\n", - "\n", - "\n", - "fix, ax = plt.subplots()\n", - "\n", - "\n", - "def lift_outlines_to_3d(\n", - " outlines_2d: List[shapely.LinearRing],\n", - " boundaries: List[Polyline3D],\n", - " max_distance: float = 10.0,\n", - ") -> List[Polyline3D]:\n", - " \"\"\"Lift 2D road edges to 3D by querying elevation from boundary segments.\n", - "\n", - " :param road_edges_2d: List of 2D road edge geometries.\n", - " :param boundaries: List of 3D boundary geometries.\n", - " :param max_distance: Maximum 2D distance for edge-boundary association.\n", - " :return: List of lifted 3D road edge geometries.\n", - " \"\"\"\n", - "\n", - " outlines_3d: List[Polyline3D] = []\n", - "\n", - " if len(outlines_2d) >= 1 and len(boundaries) >= 1:\n", - "\n", - " # 1. Build comprehensive spatial index with all boundary segments\n", - " # NOTE @DanielDauner: We split each boundary polyline into small segments.\n", - " # The spatial indexing uses axis-aligned bounding boxes, where small geometries lead to better performance.\n", - " boundary_segments = []\n", - " for boundary in boundaries:\n", - " coords = boundary.array.reshape(-1, 1, 3)\n", - " segment_coords_boundary = np.concatenate([coords[:-1], coords[1:]], axis=1)\n", - " boundary_segments.append(segment_coords_boundary)\n", - "\n", - " boundary_segments = np.concatenate(boundary_segments, axis=0)\n", - " boundary_segment_linestrings = shapely.creation.linestrings(boundary_segments)\n", - " occupancy_map = OccupancyMap2D(boundary_segment_linestrings)\n", - "\n", - " for linear_ring in outlines_2d:\n", - " points_2d = np.array(linear_ring.coords, dtype=np.float64)\n", - " points_3d = np.zeros((len(points_2d), len(Point3DIndex)), dtype=np.float64)\n", - " points_3d[..., Point3DIndex.XY] = points_2d\n", - "\n", - " # 3. Batch query for all points\n", - " query_points = shapely.creation.points(points_2d)\n", - " results = occupancy_map.query_nearest(query_points, max_distance=max_distance, exclusive=True)\n", - "\n", - " for query_idx, geometry_idx in zip(*results):\n", - " query_point = query_points[query_idx]\n", - " segment_coords = boundary_segments[geometry_idx]\n", - " best_z = _interpolate_z_on_segment(query_point, segment_coords)\n", - " points_3d[query_idx, 2] = best_z\n", - "\n", - " outlines_3d.append(Polyline3D.from_array(points_3d))\n", - "\n", - " return outlines_3d\n", - "\n", - "\n", - "def _extract_intersection_outline(lane_groups: List[AbstractLaneGroup], junction_id: str = 0) -> Polyline3D:\n", - " \"\"\"Helper method to extract intersection outline in 3D from lane group helpers.\"\"\"\n", - "\n", - " # 1. Extract the intersection outlines in 2D\n", - " intersection_polygons: List[shapely.Polygon] = [\n", - " lane_group_helper.shapely_polygon for lane_group_helper in lane_groups\n", - " ]\n", - " # for intersection_polygon in intersection_polygons:\n", - " # ax.plot(*intersection_polygon.exterior.xy)\n", - "\n", - " # for lane_group_helper in lane_groups:\n", - " # ax.plot(*lane_group_helper.outline.linestring.xy, color=\"blue\")\n", - " intersection_edges = get_road_edge_linear_rings(intersection_polygons, add_interiors=False)\n", - "\n", - " # for linear_ring in intersection_edges:\n", - " # ax.plot(*linear_ring.xy, color=\"blue\")\n", - "\n", - " # 2. Lift the 2D outlines to 3D\n", - " lane_group_outlines: List[Polyline3D] = [lane_group_helper.outline_3d for lane_group_helper in lane_groups]\n", - " intersection_outlines = lift_outlines_to_3d(intersection_edges, lane_group_outlines)\n", - "\n", - " print(len(intersection_outlines))\n", - "\n", - " # NOTE: When the intersection has multiple non-overlapping outlines, we cannot return a single outline in 3D.\n", - " # For now, we return the longest outline.\n", - "\n", - " valid_outlines = [outline for outline in intersection_outlines if outline.array.shape[0] > 2]\n", - " assert len(valid_outlines) > 0, f\"No valid intersection outlines found for Junction {junction_id}!\"\n", - "\n", - " longest_outline = max(valid_outlines, key=lambda outline: outline.length)\n", - "\n", - " # for linear_ring in intersection_outlines:\n", - " # ax.plot(*linear_ring.linestring.xy, color=\"red\")\n", - "\n", - " # ax.plot(*longest_outline.linestring.xy, color=\"red\")\n", - " # longest_outline.line\n", - " print(longest_outline.array[:, 2])\n", - " return longest_outline\n", - "\n", - "\n", - "_extract_intersection_outline(lane_groups)" + "asd" ] }, { diff --git a/notebooks/bev_render.ipynb b/notebooks/bev_render.ipynb index 6e84c122..c6eb260f 100644 --- a/notebooks/bev_render.ipynb +++ b/notebooks/bev_render.ipynb @@ -21,35 +21,36 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "# splits = [\"wopd_val\"]\n", - "splits = [\"carla_test\"]\n", - "# splits = [\"nuplan-mini_test\"]\n", - "# splits = [\"av2-sensor-mini_train\"]\n", - "# splits = [\"pandaset_train\"]\n", - "# log_names = None\n", - "\n", - "\n", - "\n", + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", "log_names = None\n", - "scene_uuids = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", "\n", "scene_filter = SceneFilter(\n", " split_names=splits,\n", " log_names=log_names,\n", " scene_uuids=scene_uuids,\n", - " duration_s=20.0,\n", + " duration_s=None,\n", " history_s=0.0,\n", - " timestamp_threshold_s=20,\n", + " timestamp_threshold_s=None,\n", " shuffle=True,\n", - " # camera_types=[CameraType.CAM_F0],\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", - "# worker = RayDistributed()\n", "scenes = scene_builder.get_scenes(scene_filter, worker)\n", "\n", - "print(f\"Found {len(scenes)} scenes\")" + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" ] }, { @@ -61,7 +62,7 @@ "source": [ "from py123d.visualization.matplotlib.plots import render_scene_animation\n", "\n", - "for i in [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]:\n", + "for i in [0]:\n", " render_scene_animation(scenes[i], output_path=\"test\", format=\"mp4\", fps=20, step=1, radius=50)" ] }, diff --git a/notebooks/camera_render.ipynb b/notebooks/camera_render.ipynb new file mode 100644 index 00000000..4cb5fd50 --- /dev/null +++ b/notebooks/camera_render.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "0", + "metadata": {}, + "outputs": [], + "source": [ + "from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder\n", + "from py123d.datatypes.scene.scene_filter import SceneFilter\n", + "\n", + "from py123d.common.multithreading.worker_sequential import Sequential\n", + "from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType\n", + "\n", + "KITTI360_DATA_ROOT = \"/home/daniel/kitti_360/KITTI-360\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": {}, + "outputs": [], + "source": [ + "# splits = [\"kitti360\"]\n", + "# splits = [\"nuscenes-mini_val\", \"nuscenes-mini_train\"]\n", + "# splits = [\"nuplan-mini_test\", \"nuplan-mini_train\", \"nuplan-mini_val\"]\n", + "# splits = [\"nuplan_private_test\"]\n", + "# splits = [\"carla_test\"]\n", + "splits = [\"wopd_val\"]\n", + "# splits = [\"av2-sensor_train\"]\n", + "# splits = [\"pandaset_test\", \"pandaset_val\", \"pandaset_train\"]\n", + "# log_names = [\"2021.08.24.13.12.55_veh-45_00386_00472\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "# log_names = [\"2013_05_28_drive_0000_sync\"]\n", + "log_names = None\n", + "scene_uuids = [\"9727e2b3-46b0-51bd-84a9-c516c0993045\"]\n", + "\n", + "scene_filter = SceneFilter(\n", + " split_names=splits,\n", + " log_names=log_names,\n", + " scene_uuids=scene_uuids,\n", + " duration_s=None,\n", + " history_s=0.0,\n", + " timestamp_threshold_s=None,\n", + " shuffle=True,\n", + " # camera_types=[PinholeCameraType.CAM_F0],\n", + ")\n", + "scene_builder = ArrowSceneBuilder()\n", + "worker = Sequential()\n", + "scenes = scene_builder.get_scenes(scene_filter, worker)\n", + "\n", + "scenes = [scene for scene in scenes if scene.uuid in scene_uuids]\n", + "print(f\"Found {len(scenes)} scenes\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": {}, + "outputs": [], + "source": [ + "from matplotlib import pyplot as plt\n", + "from py123d.datatypes.scene.abstract_scene import AbstractScene\n", + "from py123d.visualization.matplotlib.camera import add_box_detections_to_camera_ax, add_camera_ax\n", + "import imageio\n", + "import numpy as np\n", + "\n", + "iteration = 0\n", + "scene = scenes[0]\n", + "\n", + "scene: AbstractScene\n", + "fps = 15 # frames per second\n", + "output_file = f\"camera_{scene.log_metadata.split}_{scene.uuid}.mp4\"\n", + "\n", + "writer = imageio.get_writer(output_file, fps=fps)\n", + "\n", + "scale = 3.0\n", + "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", + "\n", + "\n", + "camera_type = PinholeCameraType.CAM_F0\n", + "\n", + "for i in range(scene.number_of_iterations):\n", + " camera = scene.get_camera_at_iteration(i, camera_type)\n", + " box_detections = scene.get_box_detections_at_iteration(i)\n", + " ego_state = scene.get_ego_state_at_iteration(i)\n", + "\n", + " _, image = add_box_detections_to_camera_ax(\n", + " None,\n", + " camera,\n", + " box_detections,\n", + " ego_state,\n", + " return_image=True,\n", + " )\n", + " writer.append_data(image)\n", + "\n", + "writer.close()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "py123d", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.12" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pyproject.toml b/pyproject.toml index 9284be71..267ec19d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -109,6 +109,9 @@ waymo = [ "tensorflow==2.13.0", "waymo-open-dataset-tf-2-12-0==1.6.6", ] +ffmpeg = [ + "imageio[ffmpeg]", +] [tool.setuptools.packages.find] where = ["src"] diff --git a/src/py123d/conversion/datasets/av2/utils/av2_map_conversion.py b/src/py123d/conversion/datasets/av2/av2_map_conversion.py similarity index 100% rename from src/py123d/conversion/datasets/av2/utils/av2_map_conversion.py rename to src/py123d/conversion/datasets/av2/av2_map_conversion.py diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py index 172954a9..8fd3fd9c 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py @@ -6,6 +6,7 @@ from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig +from py123d.conversion.datasets.av2.av2_map_conversion import convert_av2_map from py123d.conversion.datasets.av2.utils.av2_constants import ( AV2_CAMERA_TYPE_MAPPING, AV2_SENSOR_SPLITS, @@ -18,7 +19,6 @@ find_closest_target_fpath, get_slice_with_timestamp_ns, ) -from py123d.conversion.datasets.av2.utils.av2_map_conversion import convert_av2_map from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter from py123d.conversion.registry.lidar_index_registry import AVSensorLidarIndex diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py index 08562314..847250eb 100644 --- a/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_map_conversion.py @@ -3,8 +3,7 @@ from pathlib import Path from typing import List -import geopandas as gpd -import pandas as pd +import numpy as np import shapely.geometry as geom from py123d.conversion.datasets.kitti360.utils.kitti360_helper import KITTI360_MAP_Bbox3D @@ -13,7 +12,9 @@ get_road_edge_linear_rings, split_line_geometry_by_max_length, ) +from py123d.conversion.utils.map_utils.road_edge.road_edge_3d_utils import lift_road_edges_to_3d from py123d.datatypes.maps.cache.cache_map_objects import ( + CacheCarpark, CacheGenericDrivable, CacheRoadEdge, CacheWalkway, @@ -34,68 +35,10 @@ "sidewalk", # "railtrack", # "ground", - # "driveway", + "driveway", ] -def _get_none_data() -> gpd.GeoDataFrame: - ids = [] - geometries = [] - data = pd.DataFrame({"id": ids}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def _extract_generic_drivable_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: - ids: List[int] = [] - outlines: List[geom.LineString] = [] - geometries: List[geom.Polygon] = [] - for obj in objs: - if obj.label != "road": - continue - ids.append(obj.id) - outlines.append(obj.vertices.linestring) - geometries.append(geom.Polygon(obj.vertices.array[:, :3])) - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def _extract_walkway_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: - ids: List[int] = [] - outlines: List[geom.LineString] = [] - geometries: List[geom.Polygon] = [] - for obj in objs: - if obj.label != "sidewalk": - continue - ids.append(obj.id) - outlines.append(obj.vertices.linestring) - geometries.append(geom.Polygon(obj.vertices.array[:, :3])) - - data = pd.DataFrame({"id": ids, "outline": outlines}) - gdf = gpd.GeoDataFrame(data, geometry=geometries) - return gdf - - -def _extract_road_edge_df(objs: list[KITTI360_MAP_Bbox3D]) -> gpd.GeoDataFrame: - geometries: List[geom.Polygon] = [] - for obj in objs: - if obj.label != "road": - continue - geometries.append(geom.Polygon(obj.vertices.array[:, :3])) - road_edge_linear_rings = get_road_edge_linear_rings(geometries) - road_edges = split_line_geometry_by_max_length(road_edge_linear_rings, MAX_ROAD_EDGE_LENGTH) - - ids = [] - road_edge_types = [] - for idx in range(len(road_edges)): - ids.append(idx) - road_edge_types.append(int(RoadEdgeType.ROAD_EDGE_BOUNDARY)) - - data = pd.DataFrame({"id": ids, "road_edge_type": road_edge_types}) - return gpd.GeoDataFrame(data, geometry=road_edges) - - def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWriter) -> None: """ Convert KITTI-360 map data using the provided map writer. @@ -123,29 +66,51 @@ def convert_kitti360_map_with_writer(log_name: str, map_writer: AbstractMapWrite obj.parseBbox(child) objs.append(obj) - generic_drivable_gdf = _extract_generic_drivable_df(objs) - walkway_gdf = _extract_walkway_df(objs) - road_edge_gdf = _extract_road_edge_df(objs) - - for idx, row in generic_drivable_gdf.iterrows(): - if not row.geometry.is_empty: - map_writer.write_generic_drivable(CacheGenericDrivable(object_id=idx, geometry=row.geometry)) - - for idx, row in walkway_gdf.iterrows(): - if not row.geometry.is_empty: - map_writer.write_walkway(CacheWalkway(object_id=idx, geometry=row.geometry)) - - for idx, row in road_edge_gdf.iterrows(): - if not row.geometry.is_empty: - if hasattr(row.geometry, "exterior"): - road_edge_line = row.geometry.exterior - else: - road_edge_line = row.geometry - - map_writer.write_road_edge( - CacheRoadEdge( - object_id=idx, - road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, - polyline=Polyline3D.from_linestring(road_edge_line), + # 1. Write roads, sidewalks, driveways, and collect road geometries + road_outlines_3d: List[Polyline3D] = [] + for obj in objs: + if obj.label == "road": + map_writer.write_generic_drivable( + CacheGenericDrivable( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), ) ) + road_outline_array = np.concatenate([obj.vertices.array[:, :3], obj.vertices.array[0:, :3]]) + road_outlines_3d.append(Polyline3D.from_array(road_outline_array)) + elif obj.label == "sidewalk": + map_writer.write_walkway( + CacheWalkway( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + elif obj.label == "driveway": + map_writer.write_carpark( + CacheCarpark( + object_id=obj.id, + outline=obj.vertices, + geometry=geom.Polygon(obj.vertices.array[:, :3]), + ) + ) + + # 2. Use road geometries to create road edges + + # NOTE @DanielDauner: We merge all drivable areas in 2D and lift the outlines to 3D. + # Currently the method assumes that the drivable areas do not overlap and all road surfaces are included. + road_polygons_2d = [geom.Polygon(road_outline.array[:, :2]) for road_outline in road_outlines_3d] + road_edges_2d = get_road_edge_linear_rings(road_polygons_2d) + road_edges_3d = lift_road_edges_to_3d(road_edges_2d, road_outlines_3d) + road_edges_linestrings_3d = [polyline.linestring for polyline in road_edges_3d] + road_edges_linestrings_3d = split_line_geometry_by_max_length(road_edges_linestrings_3d, MAX_ROAD_EDGE_LENGTH) + + for idx in range(len(road_edges_linestrings_3d)): + map_writer.write_road_edge( + CacheRoadEdge( + object_id=idx, + road_edge_type=RoadEdgeType.ROAD_EDGE_BOUNDARY, + polyline=Polyline3D.from_linestring(road_edges_linestrings_3d[idx]), + ) + ) diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py index 49a81d19..dcefb187 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py @@ -6,7 +6,7 @@ from py123d.conversion.abstract_dataset_converter import AbstractDatasetConverter from py123d.conversion.dataset_converter_config import DatasetConverterConfig -from py123d.conversion.datasets.pandaset.pandaset_constants import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_constants import ( PANDASET_BOX_DETECTION_FROM_STR, PANDASET_BOX_DETECTION_TO_DEFAULT, PANDASET_CAMERA_DISTORTIONS, @@ -16,7 +16,7 @@ PANDASET_LOG_NAMES, PANDASET_SPLITS, ) -from py123d.conversion.datasets.pandaset.pandaset_utlis import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( main_lidar_to_rear_axle, pandaset_pose_dict_to_state_se3, read_json, diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py index 30fff374..e07ff916 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from py123d.conversion.datasets.pandaset.pandaset_utlis import ( +from py123d.conversion.datasets.pandaset.utils.pandaset_utlis import ( main_lidar_to_rear_axle, pandaset_pose_dict_to_state_se3, read_json, diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_constants.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py similarity index 100% rename from src/py123d/conversion/datasets/pandaset/pandaset_constants.py rename to src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_utlis.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py similarity index 100% rename from src/py123d/conversion/datasets/pandaset/pandaset_utlis.py rename to src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py diff --git a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml index ae1b1033..cf2e553a 100644 --- a/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml +++ b/src/py123d/script/config/common/scene_builder/default_scene_builder.yaml @@ -1,6 +1,5 @@ _target_: py123d.datatypes.scene.arrow.arrow_scene_builder.ArrowSceneBuilder _convert_: 'all' -# dataset_path: ${dataset_paths.py123d_data_root} logs_root: ${dataset_paths.py123d_logs_root} maps_root: ${dataset_paths.py123d_maps_root} diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml index 729b9587..5b06890e 100644 --- a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -1,5 +1,5 @@ kitti360_dataset: - _target_: py123d.conversion.datasets.kitti360.kitti360_data_converter.Kitti360Converter + _target_: py123d.conversion.datasets.kitti360.kitti360_converter.Kitti360Converter _convert_: 'all' splits: ["kitti360"] diff --git a/src/py123d/visualization/matplotlib/camera.py b/src/py123d/visualization/matplotlib/camera.py index 9126655d..39bf98a3 100644 --- a/src/py123d/visualization/matplotlib/camera.py +++ b/src/py123d/visualization/matplotlib/camera.py @@ -73,20 +73,9 @@ def add_box_detections_to_camera_ax( camera: PinholeCamera, box_detections: BoxDetectionWrapper, ego_state_se3: EgoStateSE3, + return_image: bool = False, ) -> plt.Axes: - # box_labels = annotations.names - # boxes = _transform_annotations_to_camera( - # annotations.boxes, - # camera.sensor2lidar_rotation, - # camera.sensor2lidar_translation, - # ) - # box_positions, box_dimensions, box_heading = ( - # boxes[:, BoundingBoxIndex.POSITION], - # boxes[:, BoundingBoxIndex.DIMENSION], - # boxes[:, BoundingBoxIndex.HEADING], - # ) - box_detection_array = np.zeros((len(box_detections.box_detections), len(BoundingBoxSE3Index)), dtype=np.float64) detection_types = np.array( [detection.metadata.box_detection_type for detection in box_detections.box_detections], dtype=object @@ -123,6 +112,10 @@ def add_box_detections_to_camera_ax( box_corners, detection_types = box_corners[valid_corners], detection_types[valid_corners] image = _plot_rect_3d_on_img(camera.image.copy(), box_corners, detection_types) + if return_image: + # ax.imshow(image) + return ax, image + ax.imshow(image) return ax diff --git a/src/py123d/visualization/matplotlib/plots.py b/src/py123d/visualization/matplotlib/plots.py index cbbdca61..01100f01 100644 --- a/src/py123d/visualization/matplotlib/plots.py +++ b/src/py123d/visualization/matplotlib/plots.py @@ -25,7 +25,8 @@ def _plot_scene_on_ax(ax: plt.Axes, scene: AbstractScene, iteration: int = 0, ra point_2d = ego_vehicle_state.bounding_box.center.state_se2.point_2d if map_api is not None: add_default_map_on_ax(ax, map_api, point_2d, radius=radius, route_lane_group_ids=route_lane_group_ids) - add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) + if traffic_light_detections is not None: + add_traffic_lights_to_ax(ax, traffic_light_detections, map_api) add_box_detections_to_ax(ax, box_detections) add_ego_vehicle_to_ax(ax, ego_vehicle_state) diff --git a/src/py123d/visualization/viser/elements/render_elements.py b/src/py123d/visualization/viser/elements/render_elements.py index 6df316b2..f807033e 100644 --- a/src/py123d/visualization/viser/elements/render_elements.py +++ b/src/py123d/visualization/viser/elements/render_elements.py @@ -1,7 +1,10 @@ +import numpy as np + from py123d.conversion.utils.sensor_utils.camera_conventions import convert_camera_convention from py123d.datatypes.scene.abstract_scene import AbstractScene from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry.geometry_index import StateSE3Index +from py123d.geometry.rotation import EulerAngles from py123d.geometry.se import StateSE3 from py123d.geometry.transform.transform_se3 import translate_se3_along_body_frame from py123d.geometry.vector import Vector3D @@ -16,10 +19,56 @@ def get_ego_3rd_person_view_position( ego_pose = scene.get_ego_state_at_iteration(iteration).rear_axle_se3.array ego_pose[StateSE3Index.XYZ] -= scene_center_array ego_pose_se3 = StateSE3.from_array(ego_pose) - ego_pose_se3 = translate_se3_along_body_frame(ego_pose_se3, Vector3D(-10.0, 0.0, 5.0)) + ego_pose_se3 = translate_se3_along_body_frame(ego_pose_se3, Vector3D(-15.0, 0.0, 15)) + + # adjust the pitch to -10 degrees. + # euler_angles_array = ego_pose_se3.euler_angles.array + # euler_angles_array[1] += np.deg2rad(30) + # new_quaternion = EulerAngles.from_array(euler_angles_array).quaternion + + ego_pose_se3 = _pitch_se3_by_degrees(ego_pose_se3, 30.0) return convert_camera_convention( ego_pose_se3, from_convention="pXpZmY", to_convention="pZmYpX", ) + + +def get_ego_bev_view_position( + scene: AbstractScene, + iteration: int, + initial_ego_state: EgoStateSE3, +) -> StateSE3: + scene_center_array = initial_ego_state.center.point_3d.array + ego_center = scene.get_ego_state_at_iteration(iteration).center.array + ego_center[StateSE3Index.XYZ] -= scene_center_array + ego_center_planar = StateSE3.from_array(ego_center) + + planar_euler_angles = EulerAngles(0.0, 0.0, ego_center_planar.euler_angles.yaw) + quaternion = planar_euler_angles.quaternion + ego_center_planar._array[StateSE3Index.QUATERNION] = quaternion.array + + ego_center_planar = translate_se3_along_body_frame(ego_center_planar, Vector3D(0.0, 0.0, 50)) + ego_center_planar = _pitch_se3_by_degrees(ego_center_planar, 90.0) + + return convert_camera_convention( + ego_center_planar, + from_convention="pXpZmY", + to_convention="pZmYpX", + ) + + +def _pitch_se3_by_degrees(state_se3: StateSE3, degrees: float) -> StateSE3: + + quaternion = EulerAngles(0.0, np.deg2rad(degrees), state_se3.yaw).quaternion + + return StateSE3( + x=state_se3.x, + y=state_se3.y, + z=state_se3.z, + qw=quaternion.qw, + qx=quaternion.qx, + qy=quaternion.qy, + qz=quaternion.qz, + ) diff --git a/src/py123d/visualization/viser/viser_viewer.py b/src/py123d/visualization/viser/viser_viewer.py index b2008def..89e6d108 100644 --- a/src/py123d/visualization/viser/viser_viewer.py +++ b/src/py123d/visualization/viser/viser_viewer.py @@ -1,3 +1,4 @@ +import io import logging import time from typing import Dict, List, Optional @@ -10,7 +11,6 @@ from py123d.datatypes.maps.map_datatypes import MapLayer from py123d.datatypes.scene.abstract_scene import AbstractScene from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.visualization.viser.elements import ( add_box_detections_to_viser_server, @@ -19,35 +19,15 @@ add_lidar_pc_to_viser_server, add_map_to_viser_server, ) -from py123d.visualization.viser.elements.render_elements import get_ego_3rd_person_view_position +from py123d.visualization.viser.elements.render_elements import ( + get_ego_3rd_person_view_position, + get_ego_bev_view_position, +) from py123d.visualization.viser.viser_config import ViserConfig logger = logging.getLogger(__name__) -all_camera_types: List[PinholeCameraType] = [ - PinholeCameraType.CAM_F0, - PinholeCameraType.CAM_B0, - PinholeCameraType.CAM_L0, - PinholeCameraType.CAM_L1, - PinholeCameraType.CAM_L2, - PinholeCameraType.CAM_R0, - PinholeCameraType.CAM_R1, - PinholeCameraType.CAM_R2, - PinholeCameraType.CAM_STEREO_L, - PinholeCameraType.CAM_STEREO_R, -] - -all_lidar_types: List[LiDARType] = [ - LiDARType.LIDAR_MERGED, - LiDARType.LIDAR_TOP, - LiDARType.LIDAR_FRONT, - LiDARType.LIDAR_SIDE_LEFT, - LiDARType.LIDAR_SIDE_RIGHT, - LiDARType.LIDAR_BACK, -] - - def _build_viser_server(viser_config: ViserConfig) -> viser.ViserServer: server = viser.ViserServer( host=viser_config.server_host, @@ -140,7 +120,12 @@ def set_scene(self, scene: AbstractScene) -> None: "FPS options", ("10", "25", "50", "75", "100") ) - button = self._viser_server.gui.add_button("Render Scene") + with self._viser_server.gui.add_folder("Render", expand_by_default=False): + render_format = self._viser_server.gui.add_dropdown("Format", ["gif", "mp4"], initial_value="mp4") + render_view = self._viser_server.gui.add_dropdown( + "View", ["3rd Person", "BEV", "Manual"], initial_value="3rd Person" + ) + button = self._viser_server.gui.add_button("Render Scene") # Frame step buttons. @gui_next_frame.on_click @@ -217,6 +202,7 @@ def _(_) -> None: @button.on_click def _(event: viser.GuiEvent) -> None: + nonlocal server_rendering client = event.client assert client is not None @@ -227,12 +213,24 @@ def _(event: viser.GuiEvent) -> None: for i in tqdm(range(scene.number_of_iterations)): gui_timestep.value = i - ego_view = get_ego_3rd_person_view_position(scene, i, initial_ego_state) - client.camera.position = ego_view.point_3d.array - client.camera.wxyz = ego_view.quaternion.array - images.append(client.get_render(height=720, width=1280)) - - client.send_file_download("image.mp4", iio.imwrite("", images, extension=".mp4", fps=30)) + if render_view.value == "BEV": + ego_view = get_ego_bev_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + elif render_view.value == "3rd Person": + ego_view = get_ego_3rd_person_view_position(scene, i, initial_ego_state) + client.camera.position = ego_view.point_3d.array + client.camera.wxyz = ego_view.quaternion.array + images.append(client.get_render(height=1080, width=1920)) + format = render_format.value + buffer = io.BytesIO() + if format == "gif": + iio.imwrite(buffer, images, extension=".gif", loop=False) + elif format == "mp4": + iio.imwrite(buffer, images, extension=".mp4", fps=20) + content = buffer.getvalue() + scene_name = f"{scene.log_metadata.split}_{scene.uuid}" + client.send_file_download(f"{scene_name}.{format}", content, save_immediately=True) server_rendering = False camera_frustum_handles: Dict[PinholeCameraType, viser.CameraFrustumHandle] = {} @@ -284,6 +282,8 @@ def _(event: viser.GuiEvent) -> None: if gui_playing.value and not server_rendering: gui_timestep.value = (gui_timestep.value + 1) % num_frames + else: + time.sleep(0.1) self._viser_server.flush() self.next() diff --git a/test_viser.py b/test_viser.py index 6db46ac4..a2b83796 100644 --- a/test_viser.py +++ b/test_viser.py @@ -3,8 +3,6 @@ from py123d.datatypes.scene.scene_filter import SceneFilter from py123d.visualization.viser.viser_viewer import ViserViewer -# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType - if __name__ == "__main__": splits = ["kitti360"] # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] @@ -15,16 +13,19 @@ # splits = ["av2-sensor_train"] # splits = ["pandaset_test", "pandaset_val", "pandaset_train"] # log_names = ["2021.08.24.13.12.55_veh-45_00386_00472"] - log_names = ["2013_05_28_drive_0000_sync"] + # log_names = ["2013_05_28_drive_0000_sync"] + # log_names = ["2013_05_28_drive_0000_sync"] + log_names = None + # scene_uuids = ["60a37beb-6df4-5413-b753-9280125020cf"] scene_uuids = None scene_filter = SceneFilter( split_names=splits, log_names=log_names, scene_uuids=scene_uuids, - duration_s=10.0, + duration_s=None, history_s=0.0, - timestamp_threshold_s=30.0, + timestamp_threshold_s=None, shuffle=True, # camera_types=[PinholeCameraType.CAM_F0], ) From fbd431b3d1bb62ed4cb6e3e1e6be6975aa081dde Mon Sep 17 00:00:00 2001 From: Daniel Dauner Date: Mon, 3 Nov 2025 14:10:23 +0100 Subject: [PATCH 31/32] Implement fisheye cameras as separate modality. General refactorings of sensors. Remove reliance of environment file paths for kitti 360. --- notebooks/bev_matplotlib.ipynb | 14 +- notebooks/bev_render.ipynb | 2 +- notebooks/camera_matplotlib.ipynb | 20 +- notebooks/camera_render.ipynb | 2 +- src/py123d/common/utils/enums.py | 14 + .../conversion/dataset_converter_config.py | 18 +- .../datasets/av2/av2_sensor_converter.py | 42 +- .../conversion/datasets/av2/av2_sensor_io.py | 2 +- .../datasets/av2/utils/av2_constants.py | 20 +- .../datasets/kitti360/kitti360_converter.py | 595 ++++++++++-------- .../datasets/kitti360/kitti360_sensor_io.py | 12 +- .../kitti360/utils/kitti360_helper.py | 21 +- .../kitti360/utils/preprocess_detection.py | 53 +- .../datasets/nuplan/nuplan_converter.py | 36 +- .../datasets/nuplan/nuplan_sensor_io.py | 6 +- .../datasets/nuplan/utils/nuplan_constants.py | 2 +- .../datasets/nuscenes/nuscenes_converter.py | 22 +- .../datasets/nuscenes/nuscenes_sensor_io.py | 10 +- .../nuscenes/utils/nuscenes_constants.py | 14 +- .../datasets/pandaset/pandaset_converter.py | 20 +- .../datasets/pandaset/pandaset_sensor_io.py | 8 +- .../pandaset/utils/pandaset_constants.py | 16 +- .../datasets/pandaset/utils/pandaset_utlis.py | 10 +- .../datasets/wopd/utils/wopd_constants.py | 14 +- .../waymo_map_utils/wopd_map_utils copy.py | 390 ------------ .../datasets/wopd/waymo_sensor_io.py | 4 +- .../datasets/wopd/wopd_converter.py | 16 +- .../log_writer/abstract_log_writer.py | 8 +- .../conversion/log_writer/arrow_log_writer.py | 124 +++- .../conversion/log_writer/utils/__init__.py | 0 .../registry/lidar_index_registry.py | 16 +- .../sensor_io/camera/jpeg_camera_io.py | 2 +- .../sensor_io/lidar/draco_lidar_io.py | 2 +- .../sensor_io/lidar/file_lidar_io.py | 2 +- .../sensor_io/lidar/laz_lidar_io.py | 2 +- src/py123d/datatypes/scene/abstract_scene.py | 28 +- .../datatypes/scene/arrow/arrow_scene.py | 29 +- .../scene/arrow/arrow_scene_builder.py | 4 +- .../scene/arrow/utils/arrow_getters.py | 14 +- src/py123d/datatypes/scene/scene_filter.py | 35 +- src/py123d/datatypes/scene/scene_metadata.py | 48 +- src/py123d/datatypes/sensors/__init__.py | 15 +- .../datatypes/sensors/camera/__init__.py | 0 src/py123d/datatypes/sensors/camera/utils.py | 42 -- .../{camera => }/fisheye_mei_camera.py | 5 +- .../datatypes/sensors/{lidar => }/lidar.py | 0 .../datatypes/sensors/lidar/__init__.py | 0 .../datatypes/sensors/lidar/lidar_index.py | 103 --- .../sensors/{camera => }/pinhole_camera.py | 25 +- .../datasets/av2_sensor_dataset.yaml | 14 +- .../conversion/datasets/carla_dataset.yaml | 35 -- .../conversion/datasets/kitti360_dataset.yaml | 46 +- .../conversion/datasets/nuplan_dataset.yaml | 10 +- .../datasets/nuplan_mini_dataset.yaml | 12 +- .../conversion/datasets/nuscenes_dataset.yaml | 17 +- .../datasets/nuscenes_mini_dataset.yaml | 19 +- .../conversion/datasets/pandaset_dataset.yaml | 21 +- .../conversion/datasets/wopd_dataset.yaml | 9 +- src/py123d/visualization/matplotlib/camera.py | 2 +- .../viser/elements/sensor_elements.py | 8 +- .../visualization/viser/viser_config.py | 26 +- .../visualization/viser/viser_viewer.py | 2 +- test_viser.py | 4 +- 63 files changed, 885 insertions(+), 1227 deletions(-) delete mode 100644 src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py delete mode 100644 src/py123d/conversion/log_writer/utils/__init__.py delete mode 100644 src/py123d/datatypes/sensors/camera/__init__.py delete mode 100644 src/py123d/datatypes/sensors/camera/utils.py rename src/py123d/datatypes/sensors/{camera => }/fisheye_mei_camera.py (98%) rename src/py123d/datatypes/sensors/{lidar => }/lidar.py (100%) delete mode 100644 src/py123d/datatypes/sensors/lidar/__init__.py delete mode 100644 src/py123d/datatypes/sensors/lidar/lidar_index.py rename src/py123d/datatypes/sensors/{camera => }/pinhole_camera.py (96%) delete mode 100644 src/py123d/script/config/conversion/datasets/carla_dataset.yaml diff --git a/notebooks/bev_matplotlib.ipynb b/notebooks/bev_matplotlib.ipynb index 44acc015..94234bd5 100644 --- a/notebooks/bev_matplotlib.ipynb +++ b/notebooks/bev_matplotlib.ipynb @@ -12,7 +12,7 @@ "\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType " + "# from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType " ] }, { @@ -282,9 +282,7 @@ "id": "4", "metadata": {}, "outputs": [], - "source": [ - "asd" - ] + "source": [] }, { "cell_type": "code", @@ -301,14 +299,6 @@ "metadata": {}, "outputs": [], "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/notebooks/bev_render.ipynb b/notebooks/bev_render.ipynb index c6eb260f..1bc41014 100644 --- a/notebooks/bev_render.ipynb +++ b/notebooks/bev_render.ipynb @@ -11,7 +11,7 @@ "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "# from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType " + "# from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType " ] }, { diff --git a/notebooks/camera_matplotlib.ipynb b/notebooks/camera_matplotlib.ipynb index f9a0433a..b33cfdd8 100644 --- a/notebooks/camera_matplotlib.ipynb +++ b/notebooks/camera_matplotlib.ipynb @@ -11,7 +11,7 @@ "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType" + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType" ] }, { @@ -42,7 +42,7 @@ " history_s=0.0,\n", " timestamp_threshold_s=20,\n", " shuffle=True,\n", - " camera_types=[PinholeCameraType.CAM_F0],\n", + " pinhole_camera_types=[PinholeCameraType.PCAM_F0],\n", ")\n", "scene_builder = ArrowSceneBuilder()\n", "worker = Sequential()\n", @@ -67,24 +67,24 @@ "scene = scenes[0]\n", "\n", "scene: AbstractScene\n", - "print(scene.uuid, scene.available_camera_types)\n", + "print(scene.uuid, scene.available_pinhole_camera_types)\n", "\n", "scale = 3.0\n", "fig, ax = plt.subplots(2, 3, figsize=(scale * 6, scale * 2.5))\n", "\n", "\n", "camera_ax_mapping = {\n", - " PinholeCameraType.CAM_L0: ax[0, 0],\n", - " PinholeCameraType.CAM_F0: ax[0, 1],\n", - " PinholeCameraType.CAM_R0: ax[0, 2],\n", - " PinholeCameraType.CAM_L1: ax[1, 0],\n", - " PinholeCameraType.CAM_B0: ax[1, 1],\n", - " PinholeCameraType.CAM_R1: ax[1, 2],\n", + " PinholeCameraType.PCAM_L0: ax[0, 0],\n", + " PinholeCameraType.PCAM_F0: ax[0, 1],\n", + " PinholeCameraType.PCAM_R0: ax[0, 2],\n", + " PinholeCameraType.PCAM_L1: ax[1, 0],\n", + " PinholeCameraType.PCAM_B0: ax[1, 1],\n", + " PinholeCameraType.PCAM_R1: ax[1, 2],\n", "}\n", "\n", "\n", "for camera_type, ax_ in camera_ax_mapping.items():\n", - " camera = scene.get_camera_at_iteration(iteration, camera_type)\n", + " camera = scene.get_pinhole_camera_at_iteration(iteration, camera_type)\n", " box_detections = scene.get_box_detections_at_iteration(iteration)\n", " ego_state = scene.get_ego_state_at_iteration(iteration)\n", "\n", diff --git a/notebooks/camera_render.ipynb b/notebooks/camera_render.ipynb index 4cb5fd50..4365c424 100644 --- a/notebooks/camera_render.ipynb +++ b/notebooks/camera_render.ipynb @@ -11,7 +11,7 @@ "from py123d.datatypes.scene.scene_filter import SceneFilter\n", "\n", "from py123d.common.multithreading.worker_sequential import Sequential\n", - "from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType\n", + "from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType\n", "\n", "KITTI360_DATA_ROOT = \"/home/daniel/kitti_360/KITTI-360\"" ] diff --git a/src/py123d/common/utils/enums.py b/src/py123d/common/utils/enums.py index 33300c00..9f7d233e 100644 --- a/src/py123d/common/utils/enums.py +++ b/src/py123d/common/utils/enums.py @@ -2,6 +2,8 @@ from enum import IntEnum +from pyparsing import Union + class classproperty(object): def __init__(self, f): @@ -27,3 +29,15 @@ def deserialize(cls, key: str) -> SerialIntEnum: def from_int(cls, value: int) -> SerialIntEnum: """Get the enum from an int.""" return cls(value) + + @classmethod + def from_arbitrary(cls, value: Union[int, str, SerialIntEnum]) -> SerialIntEnum: + """Get the enum from an int, string, or enum instance.""" + if isinstance(value, cls): + return value + elif isinstance(value, int): + return cls.from_int(value) + elif isinstance(value, str): + return cls.deserialize(value) + else: + raise ValueError(f"Invalid value for {cls.__name__}: {value}") diff --git a/src/py123d/conversion/dataset_converter_config.py b/src/py123d/conversion/dataset_converter_config.py index 6539e3e4..d4924b01 100644 --- a/src/py123d/conversion/dataset_converter_config.py +++ b/src/py123d/conversion/dataset_converter_config.py @@ -23,9 +23,13 @@ class DatasetConverterConfig: # Traffic Lights include_traffic_lights: bool = False - # Cameras - include_cameras: bool = False - camera_store_option: Literal["path", "binary", "mp4"] = "path" + # Pinhole Cameras + include_pinhole_cameras: bool = False + pinhole_camera_store_option: Literal["path", "binary", "mp4"] = "path" + + # Fisheye MEI Cameras + include_fisheye_mei_cameras: bool = False + fisheye_mei_camera_store_option: Literal["path", "binary", "mp4"] = "path" # LiDARs include_lidars: bool = False @@ -37,11 +41,13 @@ class DatasetConverterConfig: include_route: bool = False def __post_init__(self): - assert self.camera_store_option != "mp4", "MP4 format is not yet supported, but planned for future releases." - assert self.camera_store_option in [ + assert ( + self.pinhole_camera_store_option != "mp4" + ), "MP4 format is not yet supported, but planned for future releases." + assert self.pinhole_camera_store_option in [ "path", "binary", - ], f"Invalid camera store option, got {self.camera_store_option}." + ], f"Invalid camera store option, got {self.pinhole_camera_store_option}." assert self.lidar_store_option in [ "path", diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py index 8fd3fd9c..9891e10c 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py @@ -21,18 +21,18 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import AVSensorLidarIndex +from py123d.conversion.registry.lidar_index_registry import AVSensorLiDARIndex from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -118,7 +118,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=map_metadata.location, timestep_seconds=0.1, vehicle_parameters=get_av2_ford_fusion_hybrid_parameters(), - camera_metadata=_get_av2_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_av2_pinhole_camera_metadata(source_log_path, self.dataset_converter_config), lidar_metadata=_get_av2_lidar_metadata(source_log_path, self.dataset_converter_config), map_metadata=map_metadata, ) @@ -151,7 +151,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_ns(int(lidar_timestamp_ns)), ego_state=ego_state, box_detections=_extract_av2_sensor_box_detections(annotations_df, lidar_timestamp_ns, ego_state), - cameras=_extract_av2_sensor_camera( + pinhole_cameras=_extract_av2_sensor_pinhole_cameras( lidar_timestamp_ns, egovehicle_se3_sensor_df, synchronization_df, @@ -185,27 +185,25 @@ def _get_av2_sensor_map_metadata(split: str, source_log_path: Path) -> MapMetada ) -def _get_av2_camera_metadata( +def _get_av2_pinhole_camera_metadata( source_log_path: Path, dataset_converter_config: DatasetConverterConfig ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: - camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - - if dataset_converter_config.include_cameras: + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: intrinsics_file = source_log_path / "calibration" / "intrinsics.feather" intrinsics_df = pd.read_feather(intrinsics_file) for _, row in intrinsics_df.iterrows(): row = row.to_dict() camera_type = AV2_CAMERA_TYPE_MAPPING[row["sensor_name"]] - camera_metadata[camera_type] = PinholeCameraMetadata( + pinhole_camera_metadata[camera_type] = PinholeCameraMetadata( camera_type=camera_type, width=row["width_px"], height=row["height_px"], intrinsics=PinholeIntrinsics(fx=row["fx_px"], fy=row["fy_px"], cx=row["cx_px"], cy=row["cy_px"]), distortion=PinholeDistortion(k1=row["k1"], k2=row["k2"], p1=0.0, p2=0.0, k3=row["k3"]), ) - - return camera_metadata + return pinhole_camera_metadata def _get_av2_lidar_metadata( @@ -226,7 +224,7 @@ def _get_av2_lidar_metadata( # top lidar: metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, - lidar_index=AVSensorLidarIndex, + lidar_index=AVSensorLiDARIndex, extrinsic=_row_dict_to_state_se3( calibration_df[calibration_df["sensor_name"] == "up_lidar"].iloc[0].to_dict() ), @@ -234,7 +232,7 @@ def _get_av2_lidar_metadata( # down lidar: metadata[LiDARType.LIDAR_DOWN] = LiDARMetadata( lidar_type=LiDARType.LIDAR_DOWN, - lidar_index=AVSensorLidarIndex, + lidar_index=AVSensorLiDARIndex, extrinsic=_row_dict_to_state_se3( calibration_df[calibration_df["sensor_name"] == "down_lidar"].iloc[0].to_dict() ), @@ -321,7 +319,7 @@ def _extract_av2_sensor_ego_state(city_se3_egovehicle_df: pd.DataFrame, lidar_ti ) -def _extract_av2_sensor_camera( +def _extract_av2_sensor_pinhole_cameras( lidar_timestamp_ns: int, egovehicle_se3_sensor_df: pd.DataFrame, synchronization_df: pd.DataFrame, @@ -333,7 +331,7 @@ def _extract_av2_sensor_camera( split = source_log_path.parent.name log_id = source_log_path.name - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: av2_sensor_data_root = source_log_path.parent.parent for _, row in egovehicle_se3_sensor_df.iterrows(): @@ -341,15 +339,15 @@ def _extract_av2_sensor_camera( if row["sensor_name"] not in AV2_CAMERA_TYPE_MAPPING: continue - camera_name = row["sensor_name"] - camera_type = AV2_CAMERA_TYPE_MAPPING[camera_name] + pinhole_camera_name = row["sensor_name"] + pinhole_camera_type = AV2_CAMERA_TYPE_MAPPING[pinhole_camera_name] relative_image_path = find_closest_target_fpath( split=split, log_id=log_id, src_sensor_name="lidar", src_timestamp_ns=lidar_timestamp_ns, - target_sensor_name=camera_name, + target_sensor_name=pinhole_camera_name, synchronization_df=synchronization_df, ) if relative_image_path is not None: @@ -359,12 +357,12 @@ def _extract_av2_sensor_camera( # TODO: Adjust for finer IMU timestamps to correct the camera extrinsic. camera_extrinsic = _row_dict_to_state_se3(row) camera_data = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(relative_image_path) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(absolute_image_path, "rb") as f: camera_data = f.read() - camera_dict[camera_type] = camera_data, camera_extrinsic + camera_dict[pinhole_camera_type] = camera_data, camera_extrinsic return camera_dict diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_io.py b/src/py123d/conversion/datasets/av2/av2_sensor_io.py index a17e4892..81a3de3a 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_io.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_io.py @@ -4,7 +4,7 @@ import numpy as np import pandas as pd -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType def load_av2_sensor_lidar_pcs_from_file(feather_path: Union[Path, str]) -> Dict[LiDARType, np.ndarray]: diff --git a/src/py123d/conversion/datasets/av2/utils/av2_constants.py b/src/py123d/conversion/datasets/av2/utils/av2_constants.py index 7f81f48c..5ac7af9d 100644 --- a/src/py123d/conversion/datasets/av2/utils/av2_constants.py +++ b/src/py123d/conversion/datasets/av2/utils/av2_constants.py @@ -3,7 +3,7 @@ from py123d.common.utils.enums import SerialIntEnum from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.maps.map_datatypes import RoadLineType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType AV2_SENSOR_SPLITS: Set[str] = {"av2-sensor_train", "av2-sensor_val", "av2-sensor_test"} @@ -80,15 +80,15 @@ class AV2SensorBoxDetectionType(SerialIntEnum): AV2_CAMERA_TYPE_MAPPING: Dict[str, PinholeCameraType] = { - "ring_front_center": PinholeCameraType.CAM_F0, - "ring_front_left": PinholeCameraType.CAM_L0, - "ring_front_right": PinholeCameraType.CAM_R0, - "ring_side_left": PinholeCameraType.CAM_L1, - "ring_side_right": PinholeCameraType.CAM_R1, - "ring_rear_left": PinholeCameraType.CAM_L2, - "ring_rear_right": PinholeCameraType.CAM_R2, - "stereo_front_left": PinholeCameraType.CAM_STEREO_L, - "stereo_front_right": PinholeCameraType.CAM_STEREO_R, + "ring_front_center": PinholeCameraType.PCAM_F0, + "ring_front_left": PinholeCameraType.PCAM_L0, + "ring_front_right": PinholeCameraType.PCAM_R0, + "ring_side_left": PinholeCameraType.PCAM_L1, + "ring_side_right": PinholeCameraType.PCAM_R1, + "ring_rear_left": PinholeCameraType.PCAM_L2, + "ring_rear_right": PinholeCameraType.PCAM_R2, + "stereo_front_left": PinholeCameraType.PCAM_STEREO_L, + "stereo_front_right": PinholeCameraType.PCAM_STEREO_R, } # AV2_LIDAR_TYPES: Dict[str, str] = { diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py index 2bda2124..d525ab3a 100644 --- a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py @@ -1,6 +1,5 @@ import datetime import logging -import os import pickle import re import xml.etree.ElementTree as ET @@ -17,7 +16,7 @@ from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( KITTI3602NUPLAN_IMU_CALIBRATION, KITTI360Bbox3D, - get_lidar_extrinsic, + get_kitti360_lidar_extrinsic, ) from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( BBOX_LABLES_TO_DETECTION_NAME_DICT, @@ -27,7 +26,7 @@ from py123d.conversion.datasets.kitti360.utils.preprocess_detection import process_detection from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import Kitti360LidarIndex +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex from py123d.datatypes.detections.box_detections import ( BoxDetectionMetadata, BoxDetectionSE3, @@ -35,19 +34,19 @@ ) from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import ( +from py123d.datatypes.sensors.fisheye_mei_camera import ( FisheyeMEICameraMetadata, FisheyeMEICameraType, FisheyeMEIDistortion, FisheyeMEIProjection, ) -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -59,15 +58,32 @@ KITTI360_DT: Final[float] = 0.1 -KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +KITTI360_PINHOLE_CAMERA_TYPES = { + PinholeCameraType.PCAM_STEREO_L: "image_00", + PinholeCameraType.PCAM_STEREO_R: "image_01", +} -KITTI360_CAMERA_TYPES = { - PinholeCameraType.CAM_STEREO_L: "image_00", - PinholeCameraType.CAM_STEREO_R: "image_01", - FisheyeMEICameraType.CAM_L: "image_02", - FisheyeMEICameraType.CAM_R: "image_03", +KITTI360_FISHEYE_MEI_CAMERA_TYPES = { + FisheyeMEICameraType.FCAM_L: "image_02", + FisheyeMEICameraType.FCAM_R: "image_03", } +KITTI360_SPLITS: List[str] = ["kitti360_train", "kitti360_val", "kitti360_test"] +KITTI360_ALL_SEQUENCES: Final[List[str]] = [ + "2013_05_28_drive_0000_sync", + "2013_05_28_drive_0002_sync", + "2013_05_28_drive_0003_sync", + "2013_05_28_drive_0004_sync", + "2013_05_28_drive_0005_sync", + "2013_05_28_drive_0006_sync", + "2013_05_28_drive_0007_sync", + "2013_05_28_drive_0008_sync", + "2013_05_28_drive_0009_sync", + "2013_05_28_drive_0010_sync", + "2013_05_28_drive_0018_sync", +] + +DIR_ROOT = "root" DIR_2D_RAW = "data_2d_raw" DIR_2D_SMT = "data_2d_semantics" DIR_3D_RAW = "data_3d_raw" @@ -76,50 +92,27 @@ DIR_POSES = "data_poses" DIR_CALIB = "calibration" -PATH_2D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_RAW -PATH_2D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_2D_SMT -PATH_3D_RAW_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_RAW -PATH_3D_SMT_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_SMT -PATH_3D_BBOX_ROOT: Path = KITTI360_DATA_ROOT / DIR_3D_BBOX -PATH_POSES_ROOT: Path = KITTI360_DATA_ROOT / DIR_POSES -PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB - -KITTI360_REQUIRED_MODALITY_ROOTS: Dict[str, Path] = { - DIR_2D_RAW: PATH_2D_RAW_ROOT, - DIR_3D_RAW: PATH_3D_RAW_ROOT, - DIR_POSES: PATH_POSES_ROOT, - DIR_3D_BBOX: PATH_3D_BBOX_ROOT / "train", -} - -KITTI360_ALL_SEQUENCES: Final[List[str]] = [ - "2013_05_28_drive_0000_sync", - "2013_05_28_drive_0002_sync", - "2013_05_28_drive_0003_sync", - # "2013_05_28_drive_0004_sync", - # "2013_05_28_drive_0005_sync", - # "2013_05_28_drive_0006_sync", - # "2013_05_28_drive_0007_sync", - # "2013_05_28_drive_0008_sync", - # "2013_05_28_drive_0009_sync", - # "2013_05_28_drive_0010_sync", - # "2013_05_28_drive_0018_sync", -] - -# Create a temporary directory for detection preprocessing -# PREPROCESS_DETECTION_DIR = Path(tempfile.mkdtemp(prefix="kitti360_detection_")) -PREPROCESS_DETECTION_DIR = Path("/home/daniel/kitti360_detection_temp") +def _get_kitti360_paths_from_root(kitti_data_root: Path) -> Dict[str, Path]: + return { + DIR_ROOT: kitti_data_root, + DIR_2D_RAW: kitti_data_root / DIR_2D_RAW, + DIR_2D_SMT: kitti_data_root / DIR_2D_SMT, + DIR_3D_RAW: kitti_data_root / DIR_3D_RAW, + DIR_3D_SMT: kitti_data_root / DIR_3D_SMT, + DIR_3D_BBOX: kitti_data_root / DIR_3D_BBOX, + DIR_POSES: kitti_data_root / DIR_POSES, + DIR_CALIB: kitti_data_root / DIR_CALIB, + } -def get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: - return MapMetadata( - dataset="kitti360", - split=split, - log_name=log_name, - location=log_name, - map_has_z=True, - map_is_local=True, - ) +def _get_kitti360_required_modality_roots(kitti360_folders: Dict[str, Path]) -> Dict[str, Path]: + return { + DIR_2D_RAW: kitti360_folders[DIR_2D_RAW], + DIR_3D_RAW: kitti360_folders[DIR_3D_RAW], + DIR_POSES: kitti360_folders[DIR_POSES], + DIR_3D_BBOX: kitti360_folders[DIR_3D_BBOX] / "train", + } class Kitti360Converter(AbstractDatasetConverter): @@ -127,40 +120,41 @@ def __init__( self, splits: List[str], kitti360_data_root: Union[Path, str], + detection_cache_root: Union[Path, str], + detection_radius: float, dataset_converter_config: DatasetConverterConfig, - kitti36_sequences: List[str] = KITTI360_ALL_SEQUENCES, + train_sequences: List[str], + val_sequences: List[str], + test_sequences: List[str], ) -> None: super().__init__(dataset_converter_config) for split in splits: - assert ( - split in self.get_available_splits() - ), f"Split {split} is not available. Available splits: {self.available_splits}" + assert split in KITTI360_SPLITS, f"Split {split} is not available. Available splits: {KITTI360_SPLITS}" self._splits: List[str] = splits - self._log_path: Path = Path(kitti360_data_root) - self._kitti36_sequences: List[str] = kitti36_sequences - self._log_paths_and_split: List[Tuple[Path, str]] = self._collect_log_paths() + self._kitti360_data_root: Path = Path(kitti360_data_root) + self._kitti360_folders: Dict[str, Path] = _get_kitti360_paths_from_root(self._kitti360_data_root) - self._total_maps = len(self._log_paths_and_split) # Each log has its own map - self._total_logs = len(self._log_paths_and_split) + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + self._detection_cache_root: Path = Path(detection_cache_root) + self._detection_radius: float = detection_radius - def _collect_log_paths(self) -> List[Tuple[Path, str]]: - """ - Collect candidate sequence folders under data_2d_raw that end with '_sync', - and keep only those sequences that are present in ALL required modality roots - (e.g., data_2d_semantics, data_3d_raw, etc.). - Returns a list of (log_path, split) tuples. - """ - missing_roots = [str(p) for p in KITTI360_REQUIRED_MODALITY_ROOTS.values() if not p.exists()] - if missing_roots: - raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") + self._train_sequences: List[str] = train_sequences + self._val_sequences: List[str] = val_sequences + self._test_sequences: List[str] = test_sequences - # Enumerate candidate sequences from data_2d_raw - candidates = sorted( - p - for p in PATH_2D_RAW_ROOT.iterdir() - if p.is_dir() and p.name.endswith("_sync") and p.name in self._kitti36_sequences - ) + self._log_names_and_split: List[Tuple[str, str]] = self._collect_valid_logs() + self._total_maps = len(self._log_names_and_split) # Each log has its own map + self._total_logs = len(self._log_names_and_split) + + def _collect_valid_logs(self) -> List[Tuple[str, str]]: + """Helper function to collect valid KITTI sequences ("logs") from the dataset root + + :raises FileNotFoundError: If required modality roots are missing + :return: A list of tuples containing the log name and split name + """ def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: if modality_name == DIR_3D_BBOX: @@ -170,29 +164,45 @@ def _has_modality(seq_name: str, modality_name: str, root: Path) -> bool: else: return (root / seq_name).exists() + required_modality_roots = _get_kitti360_required_modality_roots(self._kitti360_folders) + missing_roots = [str(p) for p in required_modality_roots.values() if not p.exists()] + if missing_roots: + raise FileNotFoundError(f"KITTI-360 required roots missing: {missing_roots}") + + # Find all sequences in the 2D raw data directory, and add to split + split_sequence_candidates: Dict[str, List[str]] = defaultdict(list) + for sequence_path in required_modality_roots[DIR_2D_RAW].iterdir(): + if sequence_path.is_dir() and sequence_path.name.endswith("_sync"): + seq_name = sequence_path.name + if seq_name in self._train_sequences: + split_sequence_candidates["kitti360_train"].append(seq_name) + elif seq_name in self._val_sequences: + split_sequence_candidates["kitti360_val"].append(seq_name) + elif seq_name in self._test_sequences: + split_sequence_candidates["kitti360_test"].append(seq_name) + + # Iterate all candidates, check that modalities available, and add to flat list log_paths_and_split: List[Tuple[Path, str]] = [] - for seq_dir in candidates: - seq_name = seq_dir.name - missing_modalities = [ - modality_name - for modality_name, root in KITTI360_REQUIRED_MODALITY_ROOTS.items() - if not _has_modality(seq_name, modality_name, root) - ] - if not missing_modalities: - log_paths_and_split.append((seq_dir, "kitti360")) - else: - logging.info( - f"Sequence '{seq_name}' skipped: missing modalities {missing_modalities}. " - f"Root: {KITTI360_DATA_ROOT}" - ) + for split, sequence_names in split_sequence_candidates.items(): + if split not in self._splits: + continue + for sequence_name in sequence_names: + missing_modalities = [ + modality_name + for modality_name, root in required_modality_roots.items() + if not _has_modality(sequence_name, modality_name, root) + ] + if len(missing_modalities) == 0: + log_paths_and_split.append((sequence_name, split)) + else: + logging.info( + f"Sequence '{sequence_name}' skipped: missing modalities {missing_modalities}. " + f"Root: {self._kitti360_data_root}" + ) logging.info(f"Valid sequences found: {len(log_paths_and_split)}") return log_paths_and_split - def get_available_splits(self) -> List[str]: - """Returns a list of available raw data types.""" - return ["kitti360"] - def get_number_of_maps(self) -> int: """Returns the number of available raw data maps for conversion.""" return self._total_maps @@ -207,15 +217,11 @@ def convert_map(self, map_index: int, map_writer: AbstractMapWriter) -> None: :param map_index: The index of the map to convert. :param map_writer: The map writer to use for writing the converted map. """ - source_log_path, split = self._log_paths_and_split[map_index] - log_name = source_log_path.stem - - map_metadata = get_kitti360_map_metadata(split, log_name) - + log_name, split = self._log_names_and_split[map_index] + map_metadata = _get_kitti360_map_metadata(split, log_name) map_needs_writing = map_writer.reset(self.dataset_converter_config, map_metadata) if map_needs_writing: convert_kitti360_map_with_writer(log_name, map_writer) - map_writer.close() def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: @@ -224,8 +230,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: :param log_index: The index of the log to convert. :param log_writer: The log writer to use for writing the converted log. """ - source_log_path, split = self._log_paths_and_split[log_index] - log_name = source_log_path.stem + log_name, split = self._log_names_and_split[log_index] # Create log metadata log_metadata = LogMetadata( @@ -235,60 +240,131 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=log_name, timestep_seconds=KITTI360_DT, vehicle_parameters=get_kitti360_vw_passat_parameters(), - camera_metadata=get_kitti360_camera_metadata(), - lidar_metadata=get_kitti360_lidar_metadata(), - map_metadata=get_kitti360_map_metadata(split, log_name), + pinhole_camera_metadata=_get_kitti360_pinhole_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + fisheye_mei_camera_metadata=_get_kitti360_fisheye_mei_camera_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + lidar_metadata=_get_kitti360_lidar_metadata( + self._kitti360_folders, + self.dataset_converter_config, + ), + map_metadata=_get_kitti360_map_metadata(split, log_name), ) log_needs_writing = log_writer.reset(self.dataset_converter_config, log_metadata) + if log_needs_writing: - _write_recording_table(log_name, log_writer, self.dataset_converter_config) + ts_list: List[TimePoint] = _read_timestamps(log_name, self._kitti360_folders) + ego_state_all, valid_timestamp = _extract_ego_state_all(log_name, self._kitti360_folders) + ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all], dtype=np.float64) + box_detection_wrapper_all = _extract_kitti360_box_detections_all( + log_name, + len(ts_list), + ego_states_xyz, + valid_timestamp, + self._kitti360_folders, + self._detection_cache_root, + self._detection_radius, + ) + camera_calibration = _load_kitti_360_calibration(self._kitti360_data_root) + logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") + + for idx in range(len(valid_timestamp)): + valid_idx = valid_timestamp[idx] + + pinhole_cameras = _extract_kitti360_pinhole_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + fisheye_cameras = _extract_kitti360_fisheye_mei_cameras( + log_name, + valid_idx, + camera_calibration, + self._kitti360_folders, + self.dataset_converter_config, + ) + lidars = _extract_kitti360_lidar( + log_name, + valid_idx, + self._kitti360_folders, + self.dataset_converter_config, + ) + + log_writer.write( + timestamp=ts_list[valid_idx], + ego_state=ego_state_all[idx], + box_detections=box_detection_wrapper_all[valid_idx], + traffic_lights=None, + pinhole_cameras=pinhole_cameras, + fisheye_mei_cameras=fisheye_cameras, + lidars=lidars, + scenario_tags=None, + route_lane_group_ids=None, + ) log_writer.close() -def get_kitti360_camera_metadata() -> ( - Dict[Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata]] -): - - persp = PATH_CALIB_ROOT / "perspective.txt" - - assert persp.exists() - persp_result = {"image_00": {}, "image_01": {}} - - with open(persp, "r") as f: - lines = [ln.strip() for ln in f if ln.strip()] - for ln in lines: - key, value = ln.split(" ", 1) - cam_id = key.split("_")[-1][:2] - if key.startswith("P_rect_"): - persp_result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) - elif key.startswith("S_rect_"): - persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] - elif key.startswith("D_"): - persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] - - fisheye_camera02_path = PATH_CALIB_ROOT / "image_02.yaml" - fisheye_camera03_path = PATH_CALIB_ROOT / "image_03.yaml" - assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() - fisheye02 = _readYAMLFile(fisheye_camera02_path) - fisheye03 = _readYAMLFile(fisheye_camera03_path) - fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} - - log_cam_infos: Dict[ - Union[PinholeCameraType, FisheyeMEICameraType], Union[PinholeCameraMetadata, FisheyeMEICameraMetadata] - ] = {} - for cam_type, cam_name in KITTI360_CAMERA_TYPES.items(): - if cam_name in ["image_00", "image_01"]: - log_cam_infos[cam_type] = PinholeCameraMetadata( - camera_type=cam_type, - width=persp_result[cam_name]["wh"][0], - height=persp_result[cam_name]["wh"][1], - intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[cam_name]["intrinsic"])), - distortion=PinholeDistortion.from_array(np.array(persp_result[cam_name]["distortion"])), +def _get_kitti360_pinhole_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[PinholeCameraType, PinholeCameraMetadata]: + + pinhole_cam_metadatas: Dict[PinholeCameraType, PinholeCameraMetadata] = {} + if dataset_converter_config.include_pinhole_cameras: + persp = kitti360_folders[DIR_CALIB] / "perspective.txt" + assert persp.exists() + persp_result = {"image_00": {}, "image_01": {}} + + with open(persp, "r") as f: + lines = [ln.strip() for ln in f if ln.strip()] + for ln in lines: + key, value = ln.split(" ", 1) + cam_id = key.split("_")[-1][:2] + if key.startswith("P_rect_"): + persp_result[f"image_{cam_id}"]["intrinsic"] = _read_projection_matrix(ln) + elif key.startswith("S_rect_"): + persp_result[f"image_{cam_id}"]["wh"] = [int(round(float(x))) for x in value.split()] + elif key.startswith("D_"): + persp_result[f"image_{cam_id}"]["distortion"] = [float(x) for x in value.split()] + + for pcam_type, pcam_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + pinhole_cam_metadatas[pcam_type] = PinholeCameraMetadata( + camera_type=pcam_type, + width=persp_result[pcam_name]["wh"][0], + height=persp_result[pcam_name]["wh"][1], + intrinsics=PinholeIntrinsics.from_camera_matrix(np.array(persp_result[pcam_name]["intrinsic"])), + distortion=PinholeDistortion.from_array(np.array(persp_result[pcam_name]["distortion"])), ) - elif cam_name in ["image_02", "image_03"]: - distortion_params = fisheye_result[cam_name]["distortion_parameters"] + + return pinhole_cam_metadatas + + +def _get_kitti360_fisheye_mei_camera_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata]: + fisheye_cam_metadatas: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = {} + if dataset_converter_config.include_fisheye_mei_cameras: + + fisheye_camera02_path = kitti360_folders[DIR_CALIB] / "image_02.yaml" + fisheye_camera03_path = kitti360_folders[DIR_CALIB] / "image_03.yaml" + + assert fisheye_camera02_path.exists() and fisheye_camera03_path.exists() + fisheye02 = _readYAMLFile(fisheye_camera02_path) + fisheye03 = _readYAMLFile(fisheye_camera03_path) + fisheye_result = {"image_02": fisheye02, "image_03": fisheye03} + + for fcam_type, fcam_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + + distortion_params = fisheye_result[fcam_name]["distortion_parameters"] distortion = FisheyeMEIDistortion( k1=distortion_params["k1"], k2=distortion_params["k2"], @@ -296,7 +372,7 @@ def get_kitti360_camera_metadata() -> ( p2=distortion_params["p2"], ) - projection_params = fisheye_result[cam_name]["projection_parameters"] + projection_params = fisheye_result[fcam_name]["projection_parameters"] projection = FisheyeMEIProjection( gamma1=projection_params["gamma1"], gamma2=projection_params["gamma2"], @@ -304,16 +380,27 @@ def get_kitti360_camera_metadata() -> ( v0=projection_params["v0"], ) - log_cam_infos[cam_type] = FisheyeMEICameraMetadata( - camera_type=cam_type, - width=fisheye_result[cam_name]["image_width"], - height=fisheye_result[cam_name]["image_height"], - mirror_parameter=fisheye_result[cam_name]["mirror_parameters"], + fisheye_cam_metadatas[fcam_type] = FisheyeMEICameraMetadata( + camera_type=fcam_type, + width=fisheye_result[fcam_name]["image_width"], + height=fisheye_result[fcam_name]["image_height"], + mirror_parameter=fisheye_result[fcam_name]["mirror_parameters"], distortion=distortion, projection=projection, ) - return log_cam_infos + return fisheye_cam_metadatas + + +def _get_kitti360_map_metadata(split: str, log_name: str) -> MapMetadata: + return MapMetadata( + dataset="kitti360", + split=split, + log_name=log_name, + location=log_name, + map_has_z=True, + map_is_local=True, + ) def _read_projection_matrix(p_line: str) -> np.ndarray: @@ -340,55 +427,31 @@ def _readYAMLFile(fileName: Path) -> Dict[str, Any]: return ret -def get_kitti360_lidar_metadata() -> Dict[LiDARType, LiDARMetadata]: +def _get_kitti360_lidar_metadata( + kitti360_folders: Dict[str, Path], + dataset_converter_config: DatasetConverterConfig, +) -> Dict[LiDARType, LiDARMetadata]: metadata: Dict[LiDARType, LiDARMetadata] = {} - extrinsic = get_lidar_extrinsic() - extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) - extrinsic_state_se3 = _extrinsic_from_imu_to_rear_axle(extrinsic_state_se3) - metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( - lidar_type=LiDARType.LIDAR_TOP, - lidar_index=Kitti360LidarIndex, - extrinsic=extrinsic_state_se3, - ) - return metadata - - -def _write_recording_table( - log_name: str, log_writer: AbstractLogWriter, data_converter_config: DatasetConverterConfig -) -> None: - - ts_list: List[TimePoint] = _read_timestamps(log_name) - ego_state_all, valid_timestamp = _extract_ego_state_all(log_name) - ego_states_xyz = np.array([ego_state.center.array[:3] for ego_state in ego_state_all], dtype=np.float64) - box_detection_wrapper_all = _extract_detections(log_name, len(ts_list), ego_states_xyz, valid_timestamp) - logging.info(f"Number of valid timestamps with ego states: {len(valid_timestamp)}") - - for idx in range(len(valid_timestamp)): - valid_idx = valid_timestamp[idx] - - cameras = _extract_cameras(log_name, valid_idx, data_converter_config) - lidars = _extract_lidar(log_name, valid_idx, data_converter_config) - - log_writer.write( - timestamp=ts_list[valid_idx], - ego_state=ego_state_all[idx], - box_detections=box_detection_wrapper_all[valid_idx], - traffic_lights=None, - cameras=cameras, - lidars=lidars, - scenario_tags=None, - route_lane_group_ids=None, + if dataset_converter_config.include_lidars: + extrinsic = get_kitti360_lidar_extrinsic(kitti360_folders[DIR_CALIB]) + extrinsic_state_se3 = StateSE3.from_transformation_matrix(extrinsic) + extrinsic_state_se3 = _extrinsic_from_imu_to_rear_axle(extrinsic_state_se3) + metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( + lidar_type=LiDARType.LIDAR_TOP, + lidar_index=Kitti360LiDARIndex, + extrinsic=extrinsic_state_se3, ) + return metadata -def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: +def _read_timestamps(log_name: str, kitti360_folders: Dict[str, Path]) -> Optional[List[TimePoint]]: """ Read KITTI-360 timestamps for the given sequence and return Unix epoch timestamps. """ ts_files = [ - PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "timestamps.txt", - PATH_2D_RAW_ROOT / log_name / "image_00" / "timestamps.txt", - PATH_2D_RAW_ROOT / log_name / "image_01" / "timestamps.txt", + kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_00" / "timestamps.txt", + kitti360_folders[DIR_2D_RAW] / log_name / "image_01" / "timestamps.txt", ] if log_name == "2013_05_28_drive_0002_sync": @@ -406,31 +469,25 @@ def _read_timestamps(log_name: str) -> Optional[List[TimePoint]]: dt_obj = datetime.datetime.strptime(dt_str, "%Y-%m-%d %H:%M:%S") dt_obj = dt_obj.replace(tzinfo=datetime.timezone.utc) unix_epoch = datetime.datetime(1970, 1, 1, tzinfo=datetime.timezone.utc) - total_seconds = (dt_obj - unix_epoch).total_seconds() - ns_value = int(ns_str) us_from_ns = ns_value // 1000 - total_us = int(total_seconds * 1_000_000) + us_from_ns - tps.append(TimePoint.from_us(total_us)) return tps return None -def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]]: +def _extract_ego_state_all(log_name: str, kitti360_folders: Dict[str, Path]) -> Tuple[List[EgoStateSE3], List[int]]: ego_state_all: List[List[float]] = [] - - pose_file = PATH_POSES_ROOT / log_name / "poses.txt" + pose_file = kitti360_folders[DIR_POSES] / log_name / "poses.txt" if not pose_file.exists(): raise FileNotFoundError(f"Pose file not found: {pose_file}") poses = np.loadtxt(pose_file) poses_time = poses[:, 0].astype(np.int32) valid_timestamp: List[int] = list(poses_time) - - oxts_path = PATH_POSES_ROOT / log_name / "oxts" / "data" + oxts_path = kitti360_folders[DIR_POSES] / log_name / "oxts" / "data" for idx in range(len(valid_timestamp)): oxts_path_file = oxts_path / f"{int(valid_timestamp[idx]):010d}.txt" @@ -495,11 +552,14 @@ def _extract_ego_state_all(log_name: str) -> Tuple[List[EgoStateSE3], List[int]] return ego_state_all, valid_timestamp -def _extract_detections( +def _extract_kitti360_box_detections_all( log_name: str, ts_len: int, ego_states_xyz: np.ndarray, valid_timestamp: List[int], + kitti360_folders: Dict[str, Path], + detection_cache_root: Path, + detection_radius: float, ) -> List[BoxDetectionWrapper]: detections_states: List[List[List[float]]] = [[] for _ in range(ts_len)] @@ -508,18 +568,23 @@ def _extract_detections( detections_types: List[List[int]] = [[] for _ in range(ts_len)] if log_name == "2013_05_28_drive_0004_sync": - bbox_3d_path = PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train_full" / f"{log_name}.xml" else: - bbox_3d_path = PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + bbox_3d_path = kitti360_folders[DIR_3D_BBOX] / "train" / f"{log_name}.xml" if not bbox_3d_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {bbox_3d_path}") tree = ET.parse(bbox_3d_path) root = tree.getroot() - detection_preprocess_path = PREPROCESS_DETECTION_DIR / f"{log_name}_detection_preprocessed.pkl" + detection_preprocess_path = detection_cache_root / f"{log_name}_detection_preprocessed.pkl" if not detection_preprocess_path.exists(): - process_detection(log_name=log_name, radius_m=60.0, output_dir=PREPROCESS_DETECTION_DIR) + process_detection( + kitti360_data_root=kitti360_folders[DIR_ROOT], + log_name=log_name, + radius_m=detection_radius, + output_dir=detection_cache_root, + ) with open(detection_preprocess_path, "rb") as f: detection_preprocess_result = pickle.load(f) static_records_dict = { @@ -620,7 +685,12 @@ def _extract_detections( return box_detection_wrapper_all -def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConverterConfig) -> List[LiDARData]: +def _extract_kitti360_lidar( + log_name: str, + idx: int, + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> List[LiDARData]: lidars: List[LiDARData] = [] if data_converter_config.include_lidars: @@ -628,17 +698,15 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConver if log_name == "2013_05_28_drive_0002_sync" and idx <= 4390: return lidars - lidar_full_path = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" - + lidar_full_path = kitti360_folders[DIR_3D_RAW] / log_name / "velodyne_points" / "data" / f"{idx:010d}.bin" if lidar_full_path.exists(): - lidars.append( LiDARData( lidar_type=LiDARType.LIDAR_TOP, timestamp=None, iteration=idx, - dataset_root=KITTI360_DATA_ROOT, - relative_path=lidar_full_path.relative_to(KITTI360_DATA_ROOT), + dataset_root=kitti360_folders[DIR_ROOT], + relative_path=lidar_full_path.relative_to(kitti360_folders[DIR_ROOT]), ) ) else: @@ -647,46 +715,77 @@ def _extract_lidar(log_name: str, idx: int, data_converter_config: DatasetConver return lidars -def _extract_cameras( - log_name: str, idx: int, data_converter_config: DatasetConverterConfig +def _extract_kitti360_pinhole_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, ) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: - camera_dict: Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]] = {} - for camera_type, cam_dir_name in KITTI360_CAMERA_TYPES.items(): - if cam_dir_name in ["image_00", "image_01"]: - img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" - elif cam_dir_name in ["image_02", "image_03"]: - img_path_png = PATH_2D_RAW_ROOT / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" - - cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" - if not cam2pose_txt.exists(): - raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - - lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) - with open(cam2pose_txt, "r") as f: - for line in f: - parts = line.strip().split() - key = parts[0][:-1] - if key == cam_dir_name: - values = list(map(float, parts[1:])) - matrix = np.array(values).reshape(3, 4) - cam2pose = np.concatenate((matrix, lastrow)) - cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose - - camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) - camera_extrinsic = _extrinsic_from_imu_to_rear_axle(camera_extrinsic) + pinhole_camera_dict: Dict[PinholeCameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {} + if data_converter_config.include_pinhole_cameras: + + for camera_type, cam_dir_name in KITTI360_PINHOLE_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rect" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] + + if img_path_png.exists(): + if data_converter_config.pinhole_camera_store_option == "path": + camera_data = str(img_path_png) + elif data_converter_config.pinhole_camera_store_option == "binary": + with open(img_path_png, "rb") as f: + camera_data = f.read() + else: + camera_data = None + + pinhole_camera_dict[camera_type] = camera_data, camera_extrinsic + return pinhole_camera_dict + + +def _extract_kitti360_fisheye_mei_cameras( + log_name: str, + idx: int, + camera_calibration: Dict[str, StateSE3], + kitti360_folders: Dict[str, Path], + data_converter_config: DatasetConverterConfig, +) -> Dict[Union[PinholeCameraType, FisheyeMEICameraType], Optional[Tuple[Union[str, bytes], StateSE3]]]: + fisheye_camera_dict: Dict[FisheyeMEICameraType, Optional[Tuple[Union[str, bytes], StateSE3]]] = {} + for camera_type, cam_dir_name in KITTI360_FISHEYE_MEI_CAMERA_TYPES.items(): + img_path_png = kitti360_folders[DIR_2D_RAW] / log_name / cam_dir_name / "data_rgb" / f"{idx:010d}.png" + camera_extrinsic = camera_calibration[cam_dir_name] if img_path_png.exists(): - if data_converter_config.camera_store_option == "path": + if data_converter_config.pinhole_camera_store_option == "path": camera_data = str(img_path_png) - elif data_converter_config.camera_store_option == "binary": + elif data_converter_config.pinhole_camera_store_option == "binary": with open(img_path_png, "rb") as f: camera_data = f.read() else: camera_data = None - - camera_dict[camera_type] = camera_data, camera_extrinsic - return camera_dict + fisheye_camera_dict[camera_type] = camera_data, camera_extrinsic + return fisheye_camera_dict + + +def _load_kitti_360_calibration(kitti_360_data_root: Path) -> Dict[str, StateSE3]: + calib_file = kitti_360_data_root / DIR_CALIB / "calib_cam_to_pose.txt" + if not calib_file.exists(): + raise FileNotFoundError(f"Calibration file not found: {calib_file}") + + lastrow = np.array([0, 0, 0, 1]).reshape(1, 4) + calib_dict: Dict[str, StateSE3] = {} + with open(calib_file, "r") as f: + for line in f: + parts = line.strip().split() + key = parts[0][:-1] + values = list(map(float, parts[1:])) + matrix = np.array(values).reshape(3, 4) + cam2pose = np.concatenate((matrix, lastrow)) + cam2pose = KITTI3602NUPLAN_IMU_CALIBRATION @ cam2pose + camera_extrinsic = StateSE3.from_transformation_matrix(cam2pose) + camera_extrinsic = _extrinsic_from_imu_to_rear_axle(camera_extrinsic) + calib_dict[key] = camera_extrinsic + return calib_dict def _extrinsic_from_imu_to_rear_axle(extrinsic: StateSE3) -> StateSE3: diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py index 5a0cf7e1..e58b165d 100644 --- a/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_sensor_io.py @@ -4,9 +4,9 @@ import numpy as np +from py123d.conversion.registry.lidar_index_registry import Kitti360LiDARIndex from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.lidar.lidar import LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import Kitti360LidarIndex +from py123d.datatypes.sensors.lidar import LiDARType from py123d.geometry.se import StateSE3 from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins @@ -14,16 +14,16 @@ def load_kitti360_lidar_pcs_from_file(filepath: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: if not filepath.exists(): logging.warning(f"LiDAR file does not exist: {filepath}. Returning empty point cloud.") - return {LiDARType.LIDAR_TOP: np.zeros((1, len(Kitti360LidarIndex)), dtype=np.float32)} + return {LiDARType.LIDAR_TOP: np.zeros((1, len(Kitti360LiDARIndex)), dtype=np.float32)} lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic lidar_pc = np.fromfile(filepath, dtype=np.float32) - lidar_pc = np.reshape(lidar_pc, [-1, len(Kitti360LidarIndex)]) + lidar_pc = np.reshape(lidar_pc, [-1, len(Kitti360LiDARIndex)]) - lidar_pc[..., Kitti360LidarIndex.XYZ] = convert_points_3d_array_between_origins( + lidar_pc[..., Kitti360LiDARIndex.XYZ] = convert_points_3d_array_between_origins( from_origin=lidar_extrinsic, to_origin=StateSE3(0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0), - points_3d_array=lidar_pc[..., Kitti360LidarIndex.XYZ], + points_3d_array=lidar_pc[..., Kitti360LiDARIndex.XYZ], ) return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py index 75eba581..ef3511c4 100644 --- a/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py +++ b/src/py123d/conversion/datasets/kitti360/utils/kitti360_helper.py @@ -1,5 +1,4 @@ import copy -import os from pathlib import Path from typing import Any, Dict, List, Tuple @@ -11,14 +10,11 @@ from py123d.geometry.polyline import Polyline3D from py123d.geometry.rotation import EulerAngles -KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) -DIR_CALIB = "calibration" -PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_CALIB = "calibration" +# PATH_CALIB_ROOT: Path = KITTI360_DATA_ROOT / DIR_CALIB -DEFAULT_ROLL = 0.0 -DEFAULT_PITCH = 0.0 - -kitti3602nuplan_imu_calibration_ideal = np.array( +KITTI3602NUPLAN_IMU_CALIBRATION = np.array( [ [1, 0, 0, 0], [0, -1, 0, 0], @@ -27,9 +23,6 @@ ], dtype=np.float64, ) - -KITTI3602NUPLAN_IMU_CALIBRATION = kitti3602nuplan_imu_calibration_ideal - MAX_N = 1000 @@ -246,12 +239,12 @@ def parseOpencvMatrix(node): return mat -def get_lidar_extrinsic() -> np.ndarray: - cam2pose_txt = PATH_CALIB_ROOT / "calib_cam_to_pose.txt" +def get_kitti360_lidar_extrinsic(kitti360_calibration_root: Path) -> np.ndarray: + cam2pose_txt = kitti360_calibration_root / "calib_cam_to_pose.txt" if not cam2pose_txt.exists(): raise FileNotFoundError(f"calib_cam_to_pose.txt file not found: {cam2pose_txt}") - cam2velo_txt = PATH_CALIB_ROOT / "calib_cam_to_velo.txt" + cam2velo_txt = kitti360_calibration_root / "calib_cam_to_velo.txt" if not cam2velo_txt.exists(): raise FileNotFoundError(f"calib_cam_to_velo.txt file not found: {cam2velo_txt}") diff --git a/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py index 324cb337..3f65b375 100644 --- a/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py +++ b/src/py123d/conversion/datasets/kitti360/utils/preprocess_detection.py @@ -21,19 +21,10 @@ import numpy as np import numpy.typing as npt -KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) -DIR_3D_RAW = "data_3d_raw" -DIR_3D_BBOX = "data_3d_bboxes" -DIR_POSES = "data_poses" - -PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW -PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX -PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES - from py123d.conversion.datasets.kitti360.utils.kitti360_helper import ( KITTI3602NUPLAN_IMU_CALIBRATION, KITTI360Bbox3D, - get_lidar_extrinsic, + get_kitti360_lidar_extrinsic, ) from py123d.conversion.datasets.kitti360.utils.kitti360_labels import ( BBOX_LABLES_TO_DETECTION_NAME_DICT, @@ -41,15 +32,24 @@ kittiId2label, ) +# KITTI360_DATA_ROOT = Path(os.environ["KITTI360_DATA_ROOT"]) +# DIR_3D_RAW = "data_3d_raw" +# DIR_3D_BBOX = "data_3d_bboxes" +# DIR_POSES = "data_poses" + +# PATH_3D_RAW_ROOT = KITTI360_DATA_ROOT / DIR_3D_RAW +# PATH_3D_BBOX_ROOT = KITTI360_DATA_ROOT / DIR_3D_BBOX +# PATH_POSES_ROOT = KITTI360_DATA_ROOT / DIR_POSES + -def _bbox_xml_path(log_name: str) -> Path: +def _bbox_xml_path(kitti360_dataset_root: Path, log_name: str) -> Path: if log_name == "2013_05_28_drive_0004_sync": - return PATH_3D_BBOX_ROOT / "train_full" / f"{log_name}.xml" - return PATH_3D_BBOX_ROOT / "train" / f"{log_name}.xml" + return kitti360_dataset_root / "data_3d_bboxes" / "train_full" / f"{log_name}.xml" + return kitti360_dataset_root / "data_3d_bboxes" / "train" / f"{log_name}.xml" -def _lidar_frame_path(log_name: str, frame_idx: int) -> Path: - return PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" +def _lidar_frame_path(kitti360_dataset_root: Path, log_name: str, frame_idx: int) -> Path: + return kitti360_dataset_root / "data_3d_raw" / log_name / "velodyne_points" / "data" / f"{frame_idx:010d}.bin" def _load_lidar_xyz(filepath: Path) -> np.ndarray: @@ -58,9 +58,9 @@ def _load_lidar_xyz(filepath: Path) -> np.ndarray: return arr.reshape(-1, 4)[:, :3] -def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: +def _collect_static_objects(kitti360_dataset_root: Path, log_name: str) -> List[KITTI360Bbox3D]: """Parse XML and collect static objects with valid class names.""" - xml_path = _bbox_xml_path(log_name) + xml_path = _bbox_xml_path(kitti360_dataset_root, log_name) if not xml_path.exists(): raise FileNotFoundError(f"BBox 3D file not found: {xml_path}") tree = ET.parse(xml_path) @@ -84,10 +84,10 @@ def _collect_static_objects(log_name: str) -> List[KITTI360Bbox3D]: return static_objs -def _collect_ego_states(log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: +def _collect_ego_states(kitti360_data_root: Path, log_name: str) -> Tuple[npt.NDArray[np.float64], list[int]]: """Load ego states from poses.txt.""" - pose_file = PATH_POSES_ROOT / log_name / "poses.txt" + pose_file = kitti360_data_root / "data_poses" / log_name / "poses.txt" if not pose_file.exists(): raise FileNotFoundError(f"Pose file not found: {pose_file}") @@ -121,6 +121,7 @@ def _collect_ego_states(log_name: str) -> Tuple[npt.NDArray[np.float64], list[in def process_detection( + kitti360_data_root: Path, log_name: str, radius_m: float = 60.0, output_dir: Optional[Path] = None, @@ -133,29 +134,29 @@ def process_detection( Save per-frame detections to a pickle to avoid recomputation. """ - lidar_dir = PATH_3D_RAW_ROOT / log_name / "velodyne_points" / "data" + lidar_dir = kitti360_data_root / "data_3d_raw" / log_name / "velodyne_points" / "data" if not lidar_dir.exists(): raise FileNotFoundError(f"LiDAR data folder not found: {lidar_dir}") ts_len = len(list(lidar_dir.glob("*.bin"))) logging.info(f"[preprocess] {log_name}: found {ts_len} lidar frames") # 1) Parse objects from XML - static_objs: List[KITTI360Bbox3D] = _collect_static_objects(log_name) + static_objs: List[KITTI360Bbox3D] = _collect_static_objects(kitti360_data_root, log_name) logging.info(f"[preprocess] {log_name}: static objects = {len(static_objs)}") # 2) Filter static objs by ego-centered radius - ego_states, valid_timestamp = _collect_ego_states(log_name) + ego_states, valid_timestamp = _collect_ego_states(kitti360_data_root, log_name) logging.info(f"[preprocess] {log_name}: ego states = {len(ego_states)}") for obj in static_objs: obj.filter_by_radius(ego_states[:, :3, 3], valid_timestamp, radius_m) # 3) Filter static objs by LiDAR point cloud visibility - lidar_extrinsic = get_lidar_extrinsic() + lidar_extrinsic = get_kitti360_lidar_extrinsic(kitti360_data_root / "calibration") def process_one_frame(time_idx: int) -> None: valid_time_idx = valid_timestamp[time_idx] logging.info(f"[preprocess] {log_name}: t={valid_time_idx}") - lidar_path = _lidar_frame_path(log_name, valid_time_idx) + lidar_path = _lidar_frame_path(kitti360_data_root, log_name, valid_time_idx) if not lidar_path.exists(): logging.warning(f"[preprocess] {log_name}: LiDAR frame not found: {lidar_path}") return @@ -194,7 +195,7 @@ def process_one_frame(time_idx: int) -> None: static_records.append(obj.valid_frames) if output_dir is None: - output_dir = PATH_3D_BBOX_ROOT / "preprocess" + output_dir = kitti360_data_root / "data_3d_bboxes" / "preprocess" output_dir.mkdir(parents=True, exist_ok=True) out_path = output_dir / f"{log_name}_detection_preprocessed.pkl" @@ -212,12 +213,14 @@ def process_one_frame(time_idx: int) -> None: logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser(description="Precompute KITTI-360 detections filters") + parser.add_argument("--kitti360_data_root", type=Path, default=".", help="KITTI-360 data root directory") parser.add_argument("--log_name", default="2013_05_28_drive_0000_sync") parser.add_argument("--radius", type=float, default=60.0) parser.add_argument("--out", type=Path, default="detection_preprocess", help="output directory for pkl") args = parser.parse_args() process_detection( + kitti360_data_root=args.kitti360_data_root, log_name=args.log_name, radius_m=args.radius, output_dir=args.out, diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py index c837e559..8f2620ef 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py @@ -24,18 +24,18 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import NuPlanLidarIndex +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetection, TrafficLightDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -52,14 +52,14 @@ # NOTE: Leaving this constant here, to avoid having a nuplan dependency in nuplan_constants.py NUPLAN_CAMERA_MAPPING = { - PinholeCameraType.CAM_F0: CameraChannel.CAM_F0, - PinholeCameraType.CAM_B0: CameraChannel.CAM_B0, - PinholeCameraType.CAM_L0: CameraChannel.CAM_L0, - PinholeCameraType.CAM_L1: CameraChannel.CAM_L1, - PinholeCameraType.CAM_L2: CameraChannel.CAM_L2, - PinholeCameraType.CAM_R0: CameraChannel.CAM_R0, - PinholeCameraType.CAM_R1: CameraChannel.CAM_R1, - PinholeCameraType.CAM_R2: CameraChannel.CAM_R2, + PinholeCameraType.PCAM_F0: CameraChannel.CAM_F0, + PinholeCameraType.PCAM_B0: CameraChannel.CAM_B0, + PinholeCameraType.PCAM_L0: CameraChannel.CAM_L0, + PinholeCameraType.PCAM_L1: CameraChannel.CAM_L1, + PinholeCameraType.PCAM_L2: CameraChannel.CAM_L2, + PinholeCameraType.PCAM_R0: CameraChannel.CAM_R0, + PinholeCameraType.PCAM_R1: CameraChannel.CAM_R1, + PinholeCameraType.PCAM_R2: CameraChannel.CAM_R2, } TARGET_DT: Final[float] = 0.1 # TODO: make configurable @@ -176,7 +176,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=nuplan_log_db.log.map_version, timestep_seconds=TARGET_DT, vehicle_parameters=get_nuplan_chrysler_pacifica_parameters(), - camera_metadata=_get_nuplan_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_nuplan_camera_metadata(source_log_path, self.dataset_converter_config), lidar_metadata=_get_nuplan_lidar_metadata( self._nuplan_sensor_root, log_name, self.dataset_converter_config ), @@ -196,7 +196,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: ego_state=_extract_nuplan_ego_state(nuplan_lidar_pc), box_detections=_extract_nuplan_box_detections(nuplan_lidar_pc, source_log_path), traffic_lights=_extract_nuplan_traffic_lights(nuplan_log_db, lidar_pc_token), - cameras=_extract_nuplan_cameras( + pinhole_cameras=_extract_nuplan_cameras( nuplan_log_db=nuplan_log_db, nuplan_lidar_pc=nuplan_lidar_pc, source_log_path=source_log_path, @@ -256,7 +256,7 @@ def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadat ) camera_metadata: Dict[str, PinholeCameraMetadata] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: for camera_type in NUPLAN_CAMERA_MAPPING.keys(): camera_metadata[camera_type] = _get_camera_metadata(camera_type) @@ -277,7 +277,7 @@ def _get_nuplan_lidar_metadata( for lidar_type in NUPLAN_LIDAR_DICT.values(): metadata[lidar_type] = LiDARMetadata( lidar_type=lidar_type, - lidar_index=NuPlanLidarIndex, + lidar_index=NuPlanLiDARIndex, extrinsic=None, # NOTE: LiDAR extrinsic are unknown ) return metadata @@ -350,7 +350,7 @@ def _extract_nuplan_cameras( camera_dict: Dict[str, Union[str, bytes]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: log_cam_infos = {camera.token: camera for camera in nuplan_log_db.log.cameras} for camera_type, camera_channel in NUPLAN_CAMERA_MAPPING.items(): camera_data: Optional[Union[str, bytes]] = None @@ -387,9 +387,9 @@ def _extract_nuplan_cameras( # Store camera data, either as path or binary camera_data: Optional[Union[str, bytes]] = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(filename_jpg) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(filename_jpg, "rb") as f: camera_data = f.read() diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py index fed2d508..8c2506f0 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_sensor_io.py @@ -6,8 +6,8 @@ from py123d.common.utils.dependencies import check_dependencies from py123d.conversion.datasets.nuplan.utils.nuplan_constants import NUPLAN_LIDAR_DICT -from py123d.datatypes.sensors.lidar.lidar import LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuPlanLidarIndex +from py123d.conversion.registry.lidar_index_registry import NuPlanLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType check_dependencies(["nuplan"], "nuplan") from nuplan.database.utils.pointclouds.lidar import LidarPointCloud @@ -23,7 +23,7 @@ def load_nuplan_lidar_pcs_from_file(pcd_path: Path) -> Dict[LiDARType, np.ndarra lidar_pcs_dict: Dict[LiDARType, np.ndarray] = {} for lidar_id, lidar_type in NUPLAN_LIDAR_DICT.items(): mask = merged_lidar_pc[-1, :] == lidar_id - lidar_pc = merged_lidar_pc[: len(NuPlanLidarIndex), mask].T.astype(np.float32) + lidar_pc = merged_lidar_pc[: len(NuPlanLiDARIndex), mask].T.astype(np.float32) lidar_pcs_dict[lidar_type] = lidar_pc return lidar_pcs_dict diff --git a/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py index 4b074d53..d1159dc4 100644 --- a/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py +++ b/src/py123d/conversion/datasets/nuplan/utils/nuplan_constants.py @@ -4,7 +4,7 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.traffic_light_detections import TrafficLightStatus from py123d.datatypes.maps.map_datatypes import RoadLineType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType from py123d.datatypes.time.time_point import TimePoint diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py index c4e1627e..e7cbf2e2 100644 --- a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py @@ -19,17 +19,17 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuScenesLidarIndex from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_nuscenes_renault_zoe_parameters @@ -148,7 +148,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=log_record["location"], timestep_seconds=TARGET_DT, vehicle_parameters=get_nuscenes_renault_zoe_parameters(), - camera_metadata=_get_nuscenes_camera_metadata(nusc, scene, self.dataset_converter_config), + pinhole_camera_metadata=_get_nuscenes_pinhole_camera_metadata(nusc, scene, self.dataset_converter_config), lidar_metadata=_get_nuscenes_lidar_metadata(nusc, scene, self.dataset_converter_config), map_metadata=_get_nuscenes_map_metadata(log_record["location"]), ) @@ -172,7 +172,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_us(sample["timestamp"]), ego_state=_extract_nuscenes_ego_state(nusc, sample, can_bus), box_detections=_extract_nuscenes_box_detections(nusc, sample), - cameras=_extract_nuscenes_cameras( + pinhole_cameras=_extract_nuscenes_cameras( nusc=nusc, sample=sample, dataset_converter_config=self.dataset_converter_config, @@ -192,14 +192,14 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: gc.collect() -def _get_nuscenes_camera_metadata( +def _get_nuscenes_pinhole_camera_metadata( nusc: NuScenes, scene: Dict[str, Any], dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: first_sample_token = scene["first_sample_token"] first_sample = nusc.get("sample", first_sample_token) @@ -246,7 +246,7 @@ def _get_nuscenes_lidar_metadata( metadata[LiDARType.LIDAR_TOP] = LiDARMetadata( lidar_type=LiDARType.LIDAR_TOP, - lidar_index=NuScenesLidarIndex, + lidar_index=NuScenesLiDARIndex, extrinsic=extrinsic, ) @@ -389,7 +389,7 @@ def _extract_nuscenes_cameras( ) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]: camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: for camera_type, camera_channel in NUSCENES_CAMERA_TYPES.items(): cam_token = sample["data"][camera_channel] cam_data = nusc.get("sample_data", cam_token) @@ -410,9 +410,9 @@ def _extract_nuscenes_cameras( cam_path = NUSCENES_DATA_ROOT / cam_data["filename"] if cam_path.exists() and cam_path.is_file(): - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": camera_data = str(cam_path) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(cam_path, "rb") as f: camera_data = f.read() else: diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py index eccf0124..e09caae6 100644 --- a/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_sensor_io.py @@ -3,21 +3,21 @@ import numpy as np +from py123d.conversion.registry.lidar_index_registry import NuScenesLiDARIndex from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.lidar.lidar import LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import NuScenesLidarIndex +from py123d.datatypes.sensors.lidar import LiDARType from py123d.geometry.se import StateSE3 from py123d.geometry.transform.transform_se3 import convert_points_3d_array_between_origins def load_nuscenes_lidar_pcs_from_file(pcd_path: Path, log_metadata: LogMetadata) -> Dict[LiDARType, np.ndarray]: - lidar_pc = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, len(NuScenesLidarIndex)) + lidar_pc = np.fromfile(pcd_path, dtype=np.float32).reshape(-1, len(NuScenesLiDARIndex)) # convert lidar to ego frame lidar_extrinsic = log_metadata.lidar_metadata[LiDARType.LIDAR_TOP].extrinsic - lidar_pc[..., NuScenesLidarIndex.XYZ] = convert_points_3d_array_between_origins( + lidar_pc[..., NuScenesLiDARIndex.XYZ] = convert_points_3d_array_between_origins( from_origin=lidar_extrinsic, to_origin=StateSE3(0, 0, 0, 1.0, 0, 0, 0), - points_3d_array=lidar_pc[..., NuScenesLidarIndex.XYZ], + points_3d_array=lidar_pc[..., NuScenesLiDARIndex.XYZ], ) return {LiDARType.LIDAR_TOP: lidar_pc} diff --git a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py index dd04d91a..9ea29413 100644 --- a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py +++ b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py @@ -3,7 +3,7 @@ from typing import Final, List from py123d.datatypes.detections.box_detection_types import BoxDetectionType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType NUSCENES_MAPS: List[str] = ["boston-seaport", "singapore-hollandvillage", "singapore-onenorth", "singapore-queenstown"] @@ -50,11 +50,11 @@ } NUSCENES_CAMERA_TYPES = { - PinholeCameraType.CAM_F0: "CAM_FRONT", - PinholeCameraType.CAM_B0: "CAM_BACK", - PinholeCameraType.CAM_L0: "CAM_FRONT_LEFT", - PinholeCameraType.CAM_L1: "CAM_BACK_LEFT", - PinholeCameraType.CAM_R0: "CAM_FRONT_RIGHT", - PinholeCameraType.CAM_R1: "CAM_BACK_RIGHT", + PinholeCameraType.PCAM_F0: "CAM_FRONT", + PinholeCameraType.PCAM_B0: "CAM_BACK", + PinholeCameraType.PCAM_L0: "CAM_FRONT_LEFT", + PinholeCameraType.PCAM_L1: "CAM_BACK_LEFT", + PinholeCameraType.PCAM_R0: "CAM_FRONT_RIGHT", + PinholeCameraType.PCAM_R1: "CAM_BACK_RIGHT", } NUSCENES_DATA_ROOT = Path(os.environ["NUSCENES_DATA_ROOT"]) diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py index dcefb187..9656da00 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py @@ -25,15 +25,15 @@ ) from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import PandasetLidarIndex +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import ( @@ -114,7 +114,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=None, # TODO: Add location information. timestep_seconds=0.1, vehicle_parameters=get_pandaset_chrysler_pacifica_parameters(), - camera_metadata=_get_pandaset_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_pandaset_camera_metadata(source_log_path, self.dataset_converter_config), lidar_metadata=_get_pandaset_lidar_metadata(source_log_path, self.dataset_converter_config), map_metadata=None, # NOTE: Pandaset does not have maps. ) @@ -142,7 +142,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: timestamp=TimePoint.from_s(timestep_s), ego_state=ego_state, box_detections=_extract_pandaset_box_detections(source_log_path, iteration, ego_state), - cameras=_extract_pandaset_sensor_camera( + pinhole_cameras=_extract_pandaset_sensor_camera( source_log_path, iteration, ego_state, @@ -167,7 +167,7 @@ def _get_pandaset_camera_metadata( camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_config.include_cameras: + if dataset_config.include_pinhole_cameras: all_cameras_folder = source_log_path / "camera" for camera_folder in all_cameras_folder.iterdir(): camera_name = camera_folder.name @@ -204,7 +204,7 @@ def _get_pandaset_lidar_metadata( for lidar_name, lidar_type in PANDASET_LIDAR_MAPPING.items(): lidar_metadata[lidar_type] = LiDARMetadata( lidar_type=lidar_type, - lidar_index=PandasetLidarIndex, + lidar_index=PandasetLiDARIndex, extrinsic=PANDASET_LIDAR_EXTRINSICS[ lidar_name ], # TODO: These extrinsics are incorrect, and need to be transformed correctly. @@ -343,7 +343,7 @@ def _extract_pandaset_sensor_camera( camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} iteration_str = f"{iteration:02d}" - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: for camera_name, camera_type in PANDASET_CAMERA_MAPPING.items(): @@ -359,10 +359,10 @@ def _extract_pandaset_sensor_camera( ) camera_data = None - if dataset_converter_config.camera_store_option == "path": + if dataset_converter_config.pinhole_camera_store_option == "path": pandaset_data_root = source_log_path.parent camera_data = str(image_abs_path.relative_to(pandaset_data_root)) - elif dataset_converter_config.camera_store_option == "binary": + elif dataset_converter_config.pinhole_camera_store_option == "binary": with open(image_abs_path, "rb") as f: camera_data = f.read() camera_dict[camera_type] = camera_data, camera_extrinsic diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py index e07ff916..14f1f236 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_sensor_io.py @@ -10,8 +10,8 @@ read_json, read_pkl_gz, ) -from py123d.conversion.registry.lidar_index_registry import PandasetLidarIndex -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.conversion.registry.lidar_index_registry import PandasetLiDARIndex +from py123d.datatypes.sensors.lidar import LiDARType from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_points_3d_array @@ -46,9 +46,9 @@ def load_pandaset_lidars_pcs_from_file( ) for lidar_type in lidar_pc_dict.keys(): - lidar_pc_dict[lidar_type][..., PandasetLidarIndex.XYZ] = convert_absolute_to_relative_points_3d_array( + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ] = convert_absolute_to_relative_points_3d_array( ego_pose, - lidar_pc_dict[lidar_type][..., PandasetLidarIndex.XYZ], + lidar_pc_dict[lidar_type][..., PandasetLiDARIndex.XYZ], ) return lidar_pc_dict diff --git a/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py index 93ef4bc8..1e65b509 100644 --- a/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_constants.py @@ -2,19 +2,19 @@ from py123d.common.utils.enums import SerialIntEnum from py123d.datatypes.detections.box_detection_types import BoxDetectionType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType, PinholeDistortion, PinholeIntrinsics -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType, PinholeDistortion, PinholeIntrinsics from py123d.geometry import StateSE3 PANDASET_SPLITS: List[str] = ["pandaset_train", "pandaset_val", "pandaset_test"] PANDASET_CAMERA_MAPPING: Dict[str, PinholeCameraType] = { - "front_camera": PinholeCameraType.CAM_F0, - "back_camera": PinholeCameraType.CAM_B0, - "front_left_camera": PinholeCameraType.CAM_L0, - "front_right_camera": PinholeCameraType.CAM_R0, - "left_camera": PinholeCameraType.CAM_L1, - "right_camera": PinholeCameraType.CAM_R1, + "front_camera": PinholeCameraType.PCAM_F0, + "back_camera": PinholeCameraType.PCAM_B0, + "front_left_camera": PinholeCameraType.PCAM_L0, + "front_right_camera": PinholeCameraType.PCAM_R0, + "left_camera": PinholeCameraType.PCAM_L1, + "right_camera": PinholeCameraType.PCAM_R1, } PANDASET_LIDAR_MAPPING: Dict[str, LiDARType] = {"main_pandar64": LiDARType.LIDAR_TOP, "front_gt": LiDARType.LIDAR_FRONT} diff --git a/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py index e179a41c..68575e7e 100644 --- a/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py +++ b/src/py123d/conversion/datasets/pandaset/utils/pandaset_utlis.py @@ -61,10 +61,6 @@ def rotate_pandaset_pose_to_iso_coordinates(pose: StateSE3) -> StateSE3: transformation_matrix = pose.transformation_matrix.copy() transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F - # transformation_matrix[0, 3] = pose.y - # transformation_matrix[1, 3] = -pose.x - # transformation_matrix[2, 3] = pose.z - return StateSE3.from_transformation_matrix(transformation_matrix) @@ -78,15 +74,11 @@ def main_lidar_to_rear_axle(pose: StateSE3) -> StateSE3: ], dtype=np.float64, ).T - # F = np.eye(3, dtype=np.float64) transformation_matrix = pose.transformation_matrix.copy() transformation_matrix[0:3, 0:3] = transformation_matrix[0:3, 0:3] @ F rotated_pose = StateSE3.from_transformation_matrix(transformation_matrix) - imu_pose = translate_se3_along_body_frame( - rotated_pose, - vector_3d=Vector3D(x=-0.840, y=0.0, z=0.0), - ) + imu_pose = translate_se3_along_body_frame(rotated_pose, vector_3d=Vector3D(x=-0.840, y=0.0, z=0.0)) return imu_pose diff --git a/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py index 963a056d..82b0c891 100644 --- a/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py +++ b/src/py123d/conversion/datasets/wopd/utils/wopd_constants.py @@ -2,8 +2,8 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.maps.map_datatypes import LaneType, RoadEdgeType, RoadLineType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType WOPD_AVAILABLE_SPLITS: List[str] = [ "wopd_train", @@ -22,11 +22,11 @@ # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L50 WOPD_CAMERA_TYPES: Dict[int, PinholeCameraType] = { - 1: PinholeCameraType.CAM_F0, # front_camera - 2: PinholeCameraType.CAM_L0, # front_left_camera - 3: PinholeCameraType.CAM_R0, # front_right_camera - 4: PinholeCameraType.CAM_L1, # left_camera - 5: PinholeCameraType.CAM_R1, # right_camera + 1: PinholeCameraType.PCAM_F0, # front_camera + 2: PinholeCameraType.PCAM_L0, # front_left_camera + 3: PinholeCameraType.PCAM_R0, # front_right_camera + 4: PinholeCameraType.PCAM_L1, # left_camera + 5: PinholeCameraType.PCAM_R1, # right_camera } # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L66 diff --git a/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py b/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py deleted file mode 100644 index 0cc69d25..00000000 --- a/src/py123d/conversion/datasets/wopd/waymo_map_utils/wopd_map_utils copy.py +++ /dev/null @@ -1,390 +0,0 @@ -# from collections import defaultdict -# from pathlib import Path -# from typing import Dict, List, Optional - -# import geopandas as gpd -# import numpy as np -# import numpy.typing as npt -# import pandas as pd -# import shapely.geometry as geom - -# from py123d.common.utils.dependencies import check_dependencies -# from py123d.conversion.datasets.wopd.waymo_map_utils.womp_boundary_utils import extract_lane_boundaries -# from py123d.datatypes.maps.map_datatypes import MapLayer, RoadEdgeType, RoadLineType -# from py123d.geometry import Point3DIndex, Polyline3D -# from py123d.geometry.utils.units import mph_to_mps - -# check_dependencies(modules=["waymo_open_dataset"], optional_name="waymo") -# from waymo_open_dataset import dataset_pb2 - -# # TODO: -# # - Implement stop signs -# # - Implement speed bumps -# # - Implement driveways with a different semantic type if needed -# # - Implement intersections and lane group logic - -# WAYMO_ROAD_LINE_CONVERSION = { -# 0: RoadLineType.UNKNOWN, # aka. UNKNOWN -# 1: RoadLineType.DASHED_WHITE, # aka. BROKEN_SINGLE_WHITE -# 2: RoadLineType.SOLID_WHITE, # aka. SOLID_SINGLE_WHITE -# 3: RoadLineType.DOUBLE_SOLID_WHITE, # aka. SOLID_DOUBLE_WHITE -# 4: RoadLineType.DASHED_YELLOW, # aka. BROKEN_SINGLE_YELLOW -# 5: RoadLineType.DOUBLE_DASH_YELLOW, # aka. BROKEN_DOUBLE_YELLOW -# 6: RoadLineType.SOLID_YELLOW, # aka. SOLID_SINGLE_YELLOW -# 7: RoadLineType.DOUBLE_SOLID_YELLOW, # aka. SOLID_DOUBLE_YELLOW -# 8: RoadLineType.DOUBLE_DASH_YELLOW, # aka. PASSING_DOUBLE_YELLOW -# } - -# WAYMO_ROAD_EDGE_CONVERSION = { -# 0: RoadEdgeType.UNKNOWN, -# 1: RoadEdgeType.ROAD_EDGE_BOUNDARY, -# 2: RoadEdgeType.ROAD_EDGE_MEDIAN, -# } - - -# def convert_wopd_map(frame: dataset_pb2.Frame, map_file_path: Path) -> None: - -# def _extract_polyline(data) -> npt.NDArray[np.float64]: -# polyline = np.array([[p.x, p.y, p.z] for p in data.polyline], dtype=np.float64) -# return polyline - -# def _extract_polygon(data) -> npt.NDArray[np.float64]: -# polygon = np.array([[p.x, p.y, p.z] for p in data.polygon], dtype=np.float64) -# assert polygon.shape[0] >= 3, "Polygon must have at least 3 points" -# assert polygon.shape[1] == 3, "Polygon must have 3 coordinates (x, y, z)" -# return polygon - -# def _extract_neighbors(data) -> List[Dict[str, int]]: -# neighbors = [] -# for neighbor in data: -# neighbors.append( -# { -# "lane_id": neighbor.feature_id, -# "self_start_index": neighbor.self_start_index, -# "self_end_index": neighbor.self_end_index, -# "neighbor_start_index": neighbor.neighbor_start_index, -# "neighbor_end_index": neighbor.neighbor_end_index, -# } -# ) -# return neighbors - -# lanes: Dict[int, npt.NDArray[np.float64]] = {} -# lanes_successors = defaultdict(list) -# lanes_predecessors = defaultdict(list) -# lanes_speed_limit_mps: Dict[int, float] = {} -# lanes_type: Dict[int, int] = {} -# lanes_left_neighbors: Dict[int, List[Dict[str, int]]] = {} -# lanes_right_neighbors: Dict[int, List[Dict[str, int]]] = {} - -# road_lines: Dict[int, npt.NDArray[np.float64]] = {} -# road_lines_type: Dict[int, RoadLineType] = {} - -# road_edges: Dict[int, npt.NDArray[np.float64]] = {} -# road_edges_type: Dict[int, int] = {} - -# crosswalks: Dict[int, npt.NDArray[np.float64]] = {} -# carparks: Dict[int, npt.NDArray[np.float64]] = {} - -# for map_feature in frame.map_features: -# if map_feature.HasField("lane"): -# polyline = _extract_polyline(map_feature.lane) -# # Ignore lanes with less than 2 points or not 2D -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# lanes[map_feature.id] = polyline -# for lane_id_ in map_feature.lane.exit_lanes: -# lanes_successors[map_feature.id].append(lane_id_) -# for lane_id_ in map_feature.lane.exit_lanes: -# lanes_predecessors[map_feature.id].append(lane_id_) -# lanes_speed_limit_mps[map_feature.id] = mph_to_mps(map_feature.lane.speed_limit_mph) -# lanes_type[map_feature.id] = map_feature.lane.type -# lanes_left_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.left_neighbors) -# lanes_right_neighbors[map_feature.id] = _extract_neighbors(map_feature.lane.right_neighbors) -# elif map_feature.HasField("road_line"): -# polyline = _extract_polyline(map_feature.road_line) -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# road_lines[map_feature.id] = polyline -# road_lines_type[map_feature.id] = WAYMO_ROAD_LINE_CONVERSION.get( -# map_feature.road_line.type, RoadLineType.UNKNOWN -# ) -# elif map_feature.HasField("road_edge"): -# polyline = _extract_polyline(map_feature.road_edge) -# if polyline.ndim != 2 or polyline.shape[0] < 2: -# continue -# road_edges[map_feature.id] = polyline -# road_edges_type[map_feature.id] = WAYMO_ROAD_EDGE_CONVERSION.get( -# map_feature.road_edge.type, RoadEdgeType.UNKNOWN -# ) -# elif map_feature.HasField("stop_sign"): -# # TODO: implement stop signs -# pass -# elif map_feature.HasField("crosswalk"): -# crosswalks[map_feature.id] = _extract_polygon(map_feature.crosswalk) -# elif map_feature.HasField("speed_bump"): -# # TODO: implement speed bumps -# pass -# elif map_feature.HasField("driveway"): -# # NOTE: Determine whether to use a different semantic type for driveways. -# carparks[map_feature.id] = _extract_polygon(map_feature.driveway) - -# lane_left_boundaries_3d, lane_right_boundaries_3d = extract_lane_boundaries( -# lanes, lanes_successors, lanes_predecessors, road_lines, road_edges -# ) - -# lane_df = get_lane_df( -# lanes, -# lanes_successors, -# lanes_predecessors, -# lanes_speed_limit_mps, -# lane_left_boundaries_3d, -# lane_right_boundaries_3d, -# lanes_type, -# lanes_left_neighbors, -# lanes_right_neighbors, -# ) -# lane_group_df = get_lane_group_df( -# lanes, -# lanes_successors, -# lanes_predecessors, -# lane_left_boundaries_3d, -# lane_right_boundaries_3d, -# ) -# intersection_df = get_intersections_df() -# crosswalk_df = get_crosswalk_df(crosswalks) -# walkway_df = get_walkway_df() -# carpark_df = get_carpark_df(carparks) -# generic_drivable_df = get_generic_drivable_df() -# road_edge_df = get_road_edge_df(road_edges, road_edges_type) -# road_line_df = get_road_line_df(road_lines, road_lines_type) - -# map_file_path.unlink(missing_ok=True) -# if not map_file_path.parent.exists(): -# map_file_path.parent.mkdir(parents=True, exist_ok=True) - -# lane_df.to_file(map_file_path, layer=MapLayer.LANE.serialize(), driver="GPKG") -# lane_group_df.to_file(map_file_path, layer=MapLayer.LANE_GROUP.serialize(), driver="GPKG", mode="a") -# intersection_df.to_file(map_file_path, layer=MapLayer.INTERSECTION.serialize(), driver="GPKG", mode="a") -# crosswalk_df.to_file(map_file_path, layer=MapLayer.CROSSWALK.serialize(), driver="GPKG", mode="a") -# walkway_df.to_file(map_file_path, layer=MapLayer.WALKWAY.serialize(), driver="GPKG", mode="a") -# carpark_df.to_file(map_file_path, layer=MapLayer.CARPARK.serialize(), driver="GPKG", mode="a") -# generic_drivable_df.to_file(map_file_path, layer=MapLayer.GENERIC_DRIVABLE.serialize(), driver="GPKG", mode="a") -# road_edge_df.to_file(map_file_path, layer=MapLayer.ROAD_EDGE.serialize(), driver="GPKG", mode="a") -# road_line_df.to_file(map_file_path, layer=MapLayer.ROAD_LINE.serialize(), driver="GPKG", mode="a") - - -# def get_lane_df( -# lanes: Dict[int, npt.NDArray[np.float64]], -# lanes_successors: Dict[int, List[int]], -# lanes_predecessors: Dict[int, List[int]], -# lanes_speed_limit_mps: Dict[int, float], -# lanes_left_boundaries_3d: Dict[int, Polyline3D], -# lanes_right_boundaries_3d: Dict[int, Polyline3D], -# lanes_type: Dict[int, int], -# lanes_left_neighbors: Dict[int, List[Dict[str, int]]], -# lanes_right_neighbors: Dict[int, List[Dict[str, int]]], -# ) -> gpd.GeoDataFrame: - -# ids = [] -# lane_types = [] -# lane_group_ids = [] -# speed_limits_mps = [] -# predecessor_ids = [] -# successor_ids = [] -# left_boundaries = [] -# right_boundaries = [] -# left_lane_ids = [] -# right_lane_ids = [] -# baseline_paths = [] -# geometries = [] - -# def _get_majority_neighbor(neighbors: List[Dict[str, int]]) -> Optional[int]: -# if len(neighbors) == 0: -# return None -# length = { -# neighbor["lane_id"]: neighbor["self_end_index"] - neighbor["self_start_index"] for neighbor in neighbors -# } -# return str(max(length, key=length.get)) - -# for lane_id, lane_centerline_array in lanes.items(): -# if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: -# continue -# lane_centerline = Polyline3D.from_array(lane_centerline_array) -# lane_speed_limit_mps = lanes_speed_limit_mps[lane_id] if lanes_speed_limit_mps[lane_id] > 0.0 else None - -# ids.append(lane_id) -# lane_types.append(lanes_type[lane_id]) -# lane_group_ids.append([lane_id]) -# speed_limits_mps.append(lane_speed_limit_mps) -# predecessor_ids.append(lanes_predecessors[lane_id]) -# successor_ids.append(lanes_successors[lane_id]) -# left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) -# right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) -# left_lane_ids.append(_get_majority_neighbor(lanes_left_neighbors[lane_id])) -# right_lane_ids.append(_get_majority_neighbor(lanes_right_neighbors[lane_id])) -# baseline_paths.append(lane_centerline.linestring) - -# geometry = geom.Polygon( -# np.vstack( -# [ -# lanes_left_boundaries_3d[lane_id].array[:, :2], -# lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], -# ] -# ) -# ) -# geometries.append(geometry) - -# data = pd.DataFrame( -# { -# "id": ids, -# "lane_type": lane_types, -# "lane_group_id": lane_group_ids, -# "speed_limit_mps": speed_limits_mps, -# "predecessor_ids": predecessor_ids, -# "successor_ids": successor_ids, -# "left_boundary": left_boundaries, -# "right_boundary": right_boundaries, -# "left_lane_id": left_lane_ids, -# "right_lane_id": right_lane_ids, -# "baseline_path": baseline_paths, -# } -# ) - -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_lane_group_df( -# lanes: Dict[int, npt.NDArray[np.float64]], -# lanes_successors: Dict[int, List[int]], -# lanes_predecessors: Dict[int, List[int]], -# lanes_left_boundaries_3d: Dict[int, Polyline3D], -# lanes_right_boundaries_3d: Dict[int, Polyline3D], -# ) -> gpd.GeoDataFrame: - -# ids = [] -# lane_ids = [] -# intersection_ids = [] -# predecessor_lane_group_ids = [] -# successor_lane_group_ids = [] -# left_boundaries = [] -# right_boundaries = [] -# geometries = [] - -# # NOTE: WOPD does not provide lane groups, so we create a lane group for each lane. -# for lane_id in lanes.keys(): -# if lane_id not in lanes_left_boundaries_3d or lane_id not in lanes_right_boundaries_3d: -# continue -# ids.append(lane_id) -# lane_ids.append([lane_id]) -# intersection_ids.append(None) # WOPD does not provide intersections -# predecessor_lane_group_ids.append(lanes_predecessors[lane_id]) -# successor_lane_group_ids.append(lanes_successors[lane_id]) -# left_boundaries.append(lanes_left_boundaries_3d[lane_id].linestring) -# right_boundaries.append(lanes_right_boundaries_3d[lane_id].linestring) -# geometry = geom.Polygon( -# np.vstack( -# [ -# lanes_left_boundaries_3d[lane_id].array[:, :2], -# lanes_right_boundaries_3d[lane_id].array[:, :2][::-1], -# ] -# ) -# ) -# geometries.append(geometry) - -# data = pd.DataFrame( -# { -# "id": ids, -# "lane_ids": lane_ids, -# "intersection_id": intersection_ids, -# "predecessor_lane_group_ids": predecessor_lane_group_ids, -# "successor_lane_group_ids": successor_lane_group_ids, -# "left_boundary": left_boundaries, -# "right_boundary": right_boundaries, -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_intersections_df() -> gpd.GeoDataFrame: -# ids = [] -# lane_group_ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide intersections, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids, "lane_group_ids": lane_group_ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_carpark_df(carparks) -> gpd.GeoDataFrame: -# ids = list(carparks.keys()) -# outlines = [geom.LineString(outline) for outline in carparks.values()] -# geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in carparks.values()] - -# data = pd.DataFrame({"id": ids, "outline": outlines}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_walkway_df() -> gpd.GeoDataFrame: -# ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide walkways, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_crosswalk_df(crosswalks: Dict[int, npt.NDArray[np.float64]]) -> gpd.GeoDataFrame: -# ids = list(crosswalks.keys()) -# outlines = [geom.LineString(outline) for outline in crosswalks.values()] -# geometries = [geom.Polygon(outline[..., Point3DIndex.XY]) for outline in crosswalks.values()] - -# data = pd.DataFrame({"id": ids, "outline": outlines}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_generic_drivable_df() -> gpd.GeoDataFrame: -# ids = [] -# geometries = [] - -# # NOTE: WOPD does not provide generic drivable areas, so we create an empty DataFrame. -# data = pd.DataFrame({"id": ids}) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_road_edge_df( -# road_edges: Dict[int, npt.NDArray[np.float64]], road_edges_type: Dict[int, RoadEdgeType] -# ) -> gpd.GeoDataFrame: -# ids = list(road_edges.keys()) -# geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_edges.values()] - -# data = pd.DataFrame( -# { -# "id": ids, -# "road_edge_type": [int(road_edge_type) for road_edge_type in road_edges_type.values()], -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf - - -# def get_road_line_df( -# road_lines: Dict[int, npt.NDArray[np.float64]], road_lines_type: Dict[int, RoadLineType] -# ) -> gpd.GeoDataFrame: -# ids = list(road_lines.keys()) -# geometries = [Polyline3D.from_array(road_edge).linestring for road_edge in road_lines.values()] - -# data = pd.DataFrame( -# { -# "id": ids, -# "road_line_type": [int(road_line_type) for road_line_type in road_lines_type.values()], -# } -# ) -# gdf = gpd.GeoDataFrame(data, geometry=geometries) -# return gdf diff --git a/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py index ca32c3d8..cf25274c 100644 --- a/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py +++ b/src/py123d/conversion/datasets/wopd/waymo_sensor_io.py @@ -6,8 +6,8 @@ from py123d.common.utils.dependencies import check_dependencies from py123d.conversion.datasets.wopd.utils.wopd_constants import WOPD_CAMERA_TYPES, WOPD_LIDAR_TYPES from py123d.conversion.datasets.wopd.utils.wopd_utils import parse_range_image_and_camera_projection -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType check_dependencies(modules=["tensorflow", "waymo_open_dataset"], optional_name="waymo") import tensorflow as tf diff --git a/src/py123d/conversion/datasets/wopd/wopd_converter.py b/src/py123d/conversion/datasets/wopd/wopd_converter.py index c2fe667f..cc42ab4d 100644 --- a/src/py123d/conversion/datasets/wopd/wopd_converter.py +++ b/src/py123d/conversion/datasets/wopd/wopd_converter.py @@ -19,18 +19,18 @@ from py123d.conversion.datasets.wopd.waymo_map_utils.wopd_map_utils import convert_wopd_map from py123d.conversion.log_writer.abstract_log_writer import AbstractLogWriter, LiDARData from py123d.conversion.map_writer.abstract_map_writer import AbstractMapWriter -from py123d.conversion.registry.lidar_index_registry import DefaultLidarIndex, WOPDLidarIndex +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex, WOPDLiDARIndex from py123d.conversion.utils.sensor_utils.camera_conventions import CameraConvention, convert_camera_convention from py123d.datatypes.detections.box_detections import BoxDetectionMetadata, BoxDetectionSE3, BoxDetectionWrapper from py123d.datatypes.maps.map_metadata import MapMetadata from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraMetadata, PinholeCameraType, PinholeDistortion, PinholeIntrinsics, ) -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import DynamicStateSE3, EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import get_wopd_chrysler_pacifica_parameters @@ -143,7 +143,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=str(initial_frame.context.stats.location), timestep_seconds=0.1, vehicle_parameters=get_wopd_chrysler_pacifica_parameters(), - camera_metadata=_get_wopd_camera_metadata( + pinhole_camera_metadata=_get_wopd_camera_metadata( initial_frame, self.dataset_converter_config, ), @@ -178,7 +178,7 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: ego_state=_extract_wopd_ego_state(frame, map_pose_offset), box_detections=_extract_wopd_box_detections(frame, map_pose_offset, self._zero_roll_pitch), traffic_lights=None, # TODO: Check if WOPD has traffic light information - cameras=_extract_wopd_cameras(frame, self.dataset_converter_config), + pinhole_cameras=_extract_wopd_cameras(frame, self.dataset_converter_config), lidars=_extract_wopd_lidars( frame, self._keep_polar_features, @@ -232,7 +232,7 @@ def _get_wopd_camera_metadata( camera_metadata_dict: Dict[PinholeCameraType, PinholeCameraMetadata] = {} - if dataset_converter_config.camera_store_option is not None: + if dataset_converter_config.pinhole_camera_store_option is not None: for calibration in initial_frame.context.camera_calibrations: camera_type = WOPD_CAMERA_TYPES[calibration.name] # https://github.com/waymo-research/waymo-open-dataset/blob/master/src/waymo_open_dataset/dataset.proto#L96 @@ -261,7 +261,7 @@ def _get_wopd_lidar_metadata( laser_metadatas: Dict[LiDARType, LiDARMetadata] = {} # NOTE: Using - lidar_index = WOPDLidarIndex if keep_polar_features else DefaultLidarIndex + lidar_index = WOPDLiDARIndex if keep_polar_features else DefaultLiDARIndex if dataset_converter_config.lidar_store_option is not None: for laser_calibration in initial_frame.context.laser_calibrations: @@ -381,7 +381,7 @@ def _extract_wopd_cameras( camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} - if dataset_converter_config.include_cameras: + if dataset_converter_config.include_pinhole_cameras: # NOTE @DanielDauner: The extrinsic matrix in frame.context.camera_calibration is fixed to model the ego to camera transformation. # The poses in frame.images[idx] are the motion compensated ego poses when the camera triggers. diff --git a/src/py123d/conversion/log_writer/abstract_log_writer.py b/src/py123d/conversion/log_writer/abstract_log_writer.py index 6e5185a2..b367ea69 100644 --- a/src/py123d/conversion/log_writer/abstract_log_writer.py +++ b/src/py123d/conversion/log_writer/abstract_log_writer.py @@ -9,8 +9,9 @@ from py123d.datatypes.detections.box_detections import BoxDetectionWrapper from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -39,7 +40,8 @@ def write( ego_state: Optional[EgoStateSE3] = None, box_detections: Optional[BoxDetectionWrapper] = None, traffic_lights: Optional[TrafficLightDetectionWrapper] = None, - cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + pinhole_cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + fisheye_mei_cameras: Optional[Dict[FisheyeMEICameraType, Tuple[Any, ...]]] = None, lidars: Optional[List[LiDARData]] = None, scenario_tags: Optional[List[str]] = None, route_lane_group_ids: Optional[List[int]] = None, diff --git a/src/py123d/conversion/log_writer/arrow_log_writer.py b/src/py123d/conversion/log_writer/arrow_log_writer.py index 532b7dda..446b1126 100644 --- a/src/py123d/conversion/log_writer/arrow_log_writer.py +++ b/src/py123d/conversion/log_writer/arrow_log_writer.py @@ -15,9 +15,9 @@ from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import add_log_metadata_to_arrow_schema from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3, EgoStateSE3Index from py123d.geometry import BoundingBoxSE3Index, StateSE3, StateSE3Index, Vector3DIndex @@ -84,7 +84,8 @@ def write( ego_state: Optional[EgoStateSE3] = None, box_detections: Optional[BoxDetectionWrapper] = None, traffic_lights: Optional[TrafficLightDetectionWrapper] = None, - cameras: Optional[Dict[Union[PinholeCameraType, FisheyeMEICameraType], Tuple[Any, ...]]] = None, + pinhole_cameras: Optional[Dict[PinholeCameraType, Tuple[Any, ...]]] = None, + fisheye_mei_cameras: Optional[Dict[FisheyeMEICameraType, Tuple[Any, ...]]] = None, lidars: Optional[List[LiDARData]] = None, scenario_tags: Optional[List[str]] = None, route_lane_group_ids: Optional[List[int]] = None, @@ -159,34 +160,68 @@ def write( record_batch_data["traffic_light_types"] = [traffic_light_types] # -------------------------------------------------------------------------------------------------------------- - # Cameras + # Pinhole Cameras # -------------------------------------------------------------------------------------------------------------- - if self._dataset_converter_config.include_cameras: - assert cameras is not None, "Camera data is required but not provided." - provided_cameras = set(cameras.keys()) - expected_cameras = set(self._log_metadata.camera_metadata.keys()) - for camera_type in expected_cameras: - camera_name = camera_type.serialize() + if self._dataset_converter_config.include_pinhole_cameras: + assert pinhole_cameras is not None, "Pinhole camera data is required but not provided." + provided_pinhole_cameras = set(pinhole_cameras.keys()) + expected_pinhole_cameras = set(self._log_metadata.pinhole_camera_metadata.keys()) + for pinhole_camera_type in expected_pinhole_cameras: + pinhole_camera_name = pinhole_camera_type.serialize() # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. # In this case, we write None/null to the arrow table. - camera_data: Optional[Any] = None - camera_pose: Optional[StateSE3] = None - if camera_type in provided_cameras: - camera_data, camera_pose = cameras[camera_type] + pinhole_camera_data: Optional[Any] = None + pinhole_camera_pose: Optional[StateSE3] = None + if pinhole_camera_type in provided_pinhole_cameras: + pinhole_camera_data, pinhole_camera_pose = pinhole_cameras[pinhole_camera_type] # TODO: Refactor how camera data handed to the writer. # This should be combined with configurations to write to log, sensor_root, or sensor_root as mp4. - if isinstance(camera_data, Path) or isinstance(camera_data, str): - camera_data = str(camera_data) - elif isinstance(camera_data, bytes): - camera_data = camera_data - elif isinstance(camera_data, np.ndarray): - _, encoded_img = cv2.imencode(".jpg", camera_data) - camera_data = encoded_img.tobytes() + if isinstance(pinhole_camera_data, Path) or isinstance(pinhole_camera_data, str): + pinhole_camera_data = str(pinhole_camera_data) + elif isinstance(pinhole_camera_data, bytes): + pinhole_camera_data = pinhole_camera_data + elif isinstance(pinhole_camera_data, np.ndarray): + _, encoded_img = cv2.imencode(".jpg", pinhole_camera_data) + pinhole_camera_data = encoded_img.tobytes() + + record_batch_data[f"{pinhole_camera_name}_data"] = [pinhole_camera_data] + record_batch_data[f"{pinhole_camera_name}_extrinsic"] = [ + pinhole_camera_pose.array if pinhole_camera_pose else None + ] + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if self._dataset_converter_config.include_fisheye_mei_cameras: + assert fisheye_mei_cameras is not None, "Fisheye MEI camera data is required but not provided." + provided_fisheye_mei_cameras = set(fisheye_mei_cameras.keys()) + expected_fisheye_mei_cameras = set(self._log_metadata.fisheye_mei_camera_metadata.keys()) + for fisheye_mei_camera_type in expected_fisheye_mei_cameras: + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # NOTE @DanielDauner: Missing cameras are allowed, e.g., for synchronization mismatches. + # In this case, we write None/null to the arrow table. + fisheye_mei_camera_data: Optional[Any] = None + fisheye_mei_camera_pose: Optional[StateSE3] = None + if fisheye_mei_camera_type in provided_fisheye_mei_cameras: + fisheye_mei_camera_data, fisheye_mei_camera_pose = fisheye_mei_cameras[fisheye_mei_camera_type] - record_batch_data[f"{camera_name}_data"] = [camera_data] - record_batch_data[f"{camera_name}_extrinsic"] = [camera_pose.array if camera_pose else None] + # TODO: Refactor how camera data handed to the writer. + # This should be combined with configurations to write to log, sensor_root, or sensor_root as mp4. + if isinstance(fisheye_mei_camera_data, Path) or isinstance(fisheye_mei_camera_data, str): + fisheye_mei_camera_data = str(fisheye_mei_camera_data) + elif isinstance(fisheye_mei_camera_data, bytes): + fisheye_mei_camera_data = fisheye_mei_camera_data + elif isinstance(fisheye_mei_camera_data, np.ndarray): + _, encoded_img = cv2.imencode(".jpg", fisheye_mei_camera_data) + fisheye_mei_camera_data = encoded_img.tobytes() + + record_batch_data[f"{fisheye_mei_camera_name}_data"] = [fisheye_mei_camera_data] + record_batch_data[f"{fisheye_mei_camera_name}_extrinsic"] = [ + fisheye_mei_camera_pose.array if fisheye_mei_camera_pose else None + ] # -------------------------------------------------------------------------------------------------------------- # LiDARs @@ -286,21 +321,44 @@ def _build_schema(dataset_converter_config: DatasetConverterConfig, log_metadata ) # -------------------------------------------------------------------------------------------------------------- - # Cameras + # Pinhole Cameras # -------------------------------------------------------------------------------------------------------------- - if dataset_converter_config.include_cameras: - for camera_type in log_metadata.camera_metadata.keys(): - camera_name = camera_type.serialize() + if dataset_converter_config.include_pinhole_cameras: + for pinhole_camera_type in log_metadata.pinhole_camera_metadata.keys(): + pinhole_camera_name = pinhole_camera_type.serialize() # Depending on the storage option, define the schema for camera data - if dataset_converter_config.camera_store_option == "path": - schema_list.append((f"{camera_name}_data", pa.string())) + if dataset_converter_config.pinhole_camera_store_option == "path": + schema_list.append((f"{pinhole_camera_name}_data", pa.string())) + + elif dataset_converter_config.pinhole_camera_store_option == "binary": + schema_list.append((f"{pinhole_camera_name}_data", pa.binary())) + + elif dataset_converter_config.pinhole_camera_store_option == "mp4": + raise NotImplementedError("MP4 format is not yet supported, but planned for future releases.") + + # Add camera pose + schema_list.append((f"{pinhole_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + + # -------------------------------------------------------------------------------------------------------------- + # Fisheye MEI Cameras + # -------------------------------------------------------------------------------------------------------------- + if dataset_converter_config.include_fisheye_mei_cameras: + for fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata.keys(): + fisheye_mei_camera_name = fisheye_mei_camera_type.serialize() + + # Depending on the storage option, define the schema for camera data + if dataset_converter_config.fisheye_mei_camera_store_option == "path": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.string())) + + elif dataset_converter_config.fisheye_mei_camera_store_option == "binary": + schema_list.append((f"{fisheye_mei_camera_name}_data", pa.binary())) - elif dataset_converter_config.camera_store_option == "binary": - schema_list.append((f"{camera_name}_data", pa.binary())) + elif dataset_converter_config.fisheye_mei_camera_store_option == "mp4": + raise NotImplementedError("MP4 format is not yet supported, but planned for future releases.") # Add camera pose - schema_list.append((f"{camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) + schema_list.append((f"{fisheye_mei_camera_name}_extrinsic", pa.list_(pa.float64(), len(StateSE3Index)))) # -------------------------------------------------------------------------------------------------------------- # LiDARs diff --git a/src/py123d/conversion/log_writer/utils/__init__.py b/src/py123d/conversion/log_writer/utils/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/conversion/registry/lidar_index_registry.py b/src/py123d/conversion/registry/lidar_index_registry.py index 7a7891f8..a65903b4 100644 --- a/src/py123d/conversion/registry/lidar_index_registry.py +++ b/src/py123d/conversion/registry/lidar_index_registry.py @@ -28,14 +28,14 @@ def XYZ(self) -> slice: @register_lidar_index -class DefaultLidarIndex(LiDARIndex): +class DefaultLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @register_lidar_index -class NuPlanLidarIndex(LiDARIndex): +class NuPlanLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -44,7 +44,7 @@ class NuPlanLidarIndex(LiDARIndex): @register_lidar_index -class CARLALidarIndex(LiDARIndex): +class CARLALiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -52,7 +52,7 @@ class CARLALidarIndex(LiDARIndex): @register_lidar_index -class WOPDLidarIndex(LiDARIndex): +class WOPDLiDARIndex(LiDARIndex): RANGE = 0 INTENSITY = 1 ELONGATION = 2 @@ -62,7 +62,7 @@ class WOPDLidarIndex(LiDARIndex): @register_lidar_index -class Kitti360LidarIndex(LiDARIndex): +class Kitti360LiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 @@ -70,7 +70,7 @@ class Kitti360LidarIndex(LiDARIndex): @register_lidar_index -class AVSensorLidarIndex(LiDARIndex): +class AVSensorLiDARIndex(LiDARIndex): """Argoverse Sensor LiDAR Indexing Scheme. NOTE: The LiDAR files also include, 'offset_ns', which we do not currently include. @@ -83,7 +83,7 @@ class AVSensorLidarIndex(LiDARIndex): @register_lidar_index -class PandasetLidarIndex(LiDARIndex): +class PandasetLiDARIndex(LiDARIndex): """Pandaset LiDAR Indexing Scheme.""" X = 0 @@ -93,7 +93,7 @@ class PandasetLidarIndex(LiDARIndex): @register_lidar_index -class NuScenesLidarIndex(LiDARIndex): +class NuScenesLiDARIndex(LiDARIndex): X = 0 Y = 1 Z = 2 diff --git a/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py index 327db77c..4e9684e7 100644 --- a/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py +++ b/src/py123d/conversion/sensor_io/camera/jpeg_camera_io.py @@ -4,7 +4,7 @@ from omegaconf import DictConfig from pyparsing import Union -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraMetadata +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraMetadata from py123d.script.utils.dataset_path_utils import get_dataset_paths DATASET_PATHS: DictConfig = get_dataset_paths() diff --git a/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py index 80948ef3..61473f08 100644 --- a/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/draco_lidar_io.py @@ -4,7 +4,7 @@ import numpy as np import numpy.typing as npt -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata # TODO: add to config DRACO_QUANTIZATION_BITS: Final[int] = 14 diff --git a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py index ab94e578..1a9e2583 100644 --- a/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/file_lidar_io.py @@ -6,7 +6,7 @@ from omegaconf import DictConfig from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType from py123d.script.utils.dataset_path_utils import get_dataset_paths DATASET_PATHS: DictConfig = get_dataset_paths() diff --git a/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py index cedfb2b6..b109c7ca 100644 --- a/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py +++ b/src/py123d/conversion/sensor_io/lidar/laz_lidar_io.py @@ -4,7 +4,7 @@ import numpy as np import numpy.typing as npt -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata def encode_lidar_pc_as_laz_binary(point_cloud: npt.NDArray[np.float32], lidar_metadata: LiDARMetadata) -> bytes: diff --git a/src/py123d/datatypes/scene/abstract_scene.py b/src/py123d/datatypes/scene/abstract_scene.py index cdad4033..33611539 100644 --- a/src/py123d/datatypes/scene/abstract_scene.py +++ b/src/py123d/datatypes/scene/abstract_scene.py @@ -1,15 +1,15 @@ from __future__ import annotations import abc -from typing import List, Optional, Union +from typing import List, Optional from py123d.datatypes.detections.box_detections import BoxDetectionWrapper from py123d.datatypes.detections.traffic_light_detections import TrafficLightDetectionWrapper from py123d.datatypes.maps.abstract_map import AbstractMap from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -54,9 +54,15 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: raise NotImplementedError @abc.abstractmethod - def get_camera_at_iteration( - self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] - ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: + def get_pinhole_camera_at_iteration( + self, iteration: int, camera_type: PinholeCameraType + ) -> Optional[PinholeCamera]: + raise NotImplementedError + + @abc.abstractmethod + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: raise NotImplementedError @abc.abstractmethod @@ -81,8 +87,12 @@ def vehicle_parameters(self) -> VehicleParameters: return self.log_metadata.vehicle_parameters @property - def available_camera_types(self) -> List[Union[PinholeCameraType, FisheyeMEICameraType]]: - return list(self.log_metadata.camera_metadata.keys()) + def available_pinhole_camera_types(self) -> List[PinholeCameraType]: + return list(self.log_metadata.pinhole_camera_metadata.keys()) + + @property + def available_fisheye_mei_camera_types(self) -> List[FisheyeMEICameraType]: + return list(self.log_metadata.fisheye_mei_camera_metadata.keys()) @property def available_lidar_types(self) -> List[LiDARType]: diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene.py b/src/py123d/datatypes/scene/arrow/arrow_scene.py index a3c4db55..79fd4d87 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene.py @@ -19,9 +19,9 @@ ) from py123d.datatypes.scene.arrow.utils.arrow_metadata_utils import get_log_metadata_from_arrow from py123d.datatypes.scene.scene_metadata import LogMetadata, SceneExtractionMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 @@ -128,18 +128,31 @@ def get_route_lane_group_ids(self, iteration: int) -> Optional[List[int]]: route_lane_group_ids = table["route_lane_group_ids"][self._get_table_index(iteration)].as_py() return route_lane_group_ids - def get_camera_at_iteration( + def get_pinhole_camera_at_iteration( self, iteration: int, camera_type: Union[PinholeCameraType, FisheyeMEICameraType] ) -> Optional[Union[PinholeCamera, FisheyeMEICamera]]: - camera: Optional[Union[PinholeCamera, FisheyeMEICamera]] = None - if camera_type in self.available_camera_types: - camera = get_camera_from_arrow_table( + pinhole_camera: Optional[PinholeCamera] = None + if camera_type in self.available_pinhole_camera_types: + pinhole_camera = get_camera_from_arrow_table( self._get_recording_table(), self._get_table_index(iteration), camera_type, self.log_metadata, ) - return camera + return pinhole_camera + + def get_fisheye_mei_camera_at_iteration( + self, iteration: int, camera_type: FisheyeMEICameraType + ) -> Optional[FisheyeMEICamera]: + fisheye_mei_camera: Optional[FisheyeMEICamera] = None + if camera_type in self.available_pinhole_camera_types: + fisheye_mei_camera = get_camera_from_arrow_table( + self._get_recording_table(), + self._get_table_index(iteration), + camera_type, + self.log_metadata, + ) + return fisheye_mei_camera def get_lidar_at_iteration(self, iteration: int, lidar_type: LiDARType) -> Optional[LiDAR]: lidar: Optional[LiDAR] = None diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py index e5840f76..f5f9d067 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py @@ -158,10 +158,10 @@ def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFil # Check if camera data is available for the scene, if specified in filter # NOTE: We only check camera availability at the initial index of the scene. - if filter.camera_types is not None: + if filter.pinhole_camera_types is not None: cameras_available = [ recording_table[f"{camera_type.serialize()}_data"][start_idx].as_py() is not None - for camera_type in filter.camera_types + for camera_type in filter.pinhole_camera_types ] if not all(cameras_available): continue diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py index 1631b9e3..946ffe1f 100644 --- a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -7,6 +7,7 @@ import pyarrow as pa from omegaconf import DictConfig +from py123d.conversion.registry.lidar_index_registry import DefaultLiDARIndex from py123d.conversion.sensor_io.lidar.draco_lidar_io import load_lidar_from_draco_binary from py123d.conversion.sensor_io.lidar.file_lidar_io import load_lidar_pcs_from_file from py123d.conversion.sensor_io.lidar.laz_lidar_io import load_lidar_from_laz_binary @@ -23,10 +24,9 @@ TrafficLightStatus, ) from py123d.datatypes.scene.scene_metadata import LogMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDAR, LiDARMetadata, LiDARType -from py123d.datatypes.sensors.lidar.lidar_index import DefaultLidarIndex +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICamera, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDAR, LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.time.time_point import TimePoint from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -137,7 +137,7 @@ def get_camera_from_arrow_table( else: raise NotImplementedError("Only string file paths for camera data are supported.") - camera_metadata = log_metadata.camera_metadata[camera_type] + camera_metadata = log_metadata.pinhole_camera_metadata[camera_type] if hasattr(camera_metadata, "mirror_parameter") and camera_metadata.mirror_parameter is not None: return FisheyeMEICamera( metadata=camera_metadata, @@ -178,7 +178,7 @@ def get_lidar_from_arrow_table( lidar = LiDAR( metadata=LiDARMetadata( lidar_type=LiDARType.LIDAR_MERGED, - lidar_index=DefaultLidarIndex, + lidar_index=DefaultLiDARIndex, extrinsic=None, ), point_cloud=merged_pc, @@ -192,7 +192,7 @@ def get_lidar_from_arrow_table( lidar_metadata = log_metadata.lidar_metadata[lidar_type] if lidar_data.startswith(b"DRACO"): # NOTE: DRACO only allows XYZ compression, so we need to override the lidar index here. - lidar_metadata.lidar_index = DefaultLidarIndex + lidar_metadata.lidar_index = DefaultLiDARIndex lidar = load_lidar_from_draco_binary(lidar_data, lidar_metadata) elif lidar_data.startswith(b"LASF"): diff --git a/src/py123d/datatypes/scene/scene_filter.py b/src/py123d/datatypes/scene/scene_filter.py index d4bada57..5aa4ae42 100644 --- a/src/py123d/datatypes/scene/scene_filter.py +++ b/src/py123d/datatypes/scene/scene_filter.py @@ -1,9 +1,9 @@ from dataclasses import dataclass from typing import List, Optional, Union -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.camera.utils import deserialize_camera_type, get_camera_type_by_value +from py123d.common.utils.enums import SerialIntEnum +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType # TODO: Add more filter options (e.g. scene tags, ego movement, or whatever appropriate) @@ -13,7 +13,6 @@ class SceneFilter: split_types: Optional[List[str]] = None split_names: Optional[List[str]] = None - # scene_tags: List[str] = None log_names: Optional[List[str]] = None locations: Optional[List[str]] = None # TODO: @@ -25,24 +24,20 @@ class SceneFilter: duration_s: Optional[float] = 10.0 history_s: Optional[float] = 3.0 - camera_types: Optional[List[Union[PinholeCameraType, FisheyeMEICameraType]]] = None + pinhole_camera_types: Optional[List[PinholeCameraType]] = None + fisheye_mei_camera_types: Optional[List[FisheyeMEICameraType]] = None max_num_scenes: Optional[int] = None shuffle: bool = False def __post_init__(self): - if self.camera_types is not None: - assert isinstance(self.camera_types, list), "camera_types must be a list of CameraType" - camera_types = [] - for camera_type in self.camera_types: - if isinstance(camera_type, str): - camera_type = deserialize_camera_type(camera_type) - camera_types.append(camera_type) - elif isinstance(camera_type, int): - camera_type = get_camera_type_by_value(camera_type) - camera_types.append(camera_type) - elif isinstance(camera_type, (PinholeCameraType, FisheyeMEICameraType)): - camera_types.append(camera_type) - else: - raise ValueError(f"Invalid camera type: {camera_type}") - self.camera_types = camera_types + if self.pinhole_camera_types is not None: + assert isinstance(self.pinhole_camera_types, list), "camera_types must be a list of CameraType" + + def _resolve_enum_arguments( + serial_enum_cls: SerialIntEnum, input: List[Union[int, str, SerialIntEnum]] + ) -> List[SerialIntEnum]: + return [serial_enum_cls.from_arbitrary(value) for value in input] + + self.pinhole_camera_types = _resolve_enum_arguments(PinholeCameraType, self.pinhole_camera_types) + self.fisheye_mei_camera_types = _resolve_enum_arguments(FisheyeMEICameraType, self.fisheye_mei_camera_types) diff --git a/src/py123d/datatypes/scene/scene_metadata.py b/src/py123d/datatypes/scene/scene_metadata.py index c7f4ae76..751b9e04 100644 --- a/src/py123d/datatypes/scene/scene_metadata.py +++ b/src/py123d/datatypes/scene/scene_metadata.py @@ -1,13 +1,13 @@ from __future__ import annotations from dataclasses import asdict, dataclass, field -from typing import Dict, Optional, Union +from typing import Dict, Optional import py123d from py123d.datatypes.maps.map_metadata import MapMetadata -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraMetadata, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.fisheye_mei_camera import FisheyeMEICameraMetadata, FisheyeMEICameraType +from py123d.datatypes.sensors.lidar import LiDARMetadata, LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraMetadata, PinholeCameraType from py123d.datatypes.vehicle_state.vehicle_parameters import VehicleParameters @@ -21,9 +21,8 @@ class LogMetadata: timestep_seconds: float vehicle_parameters: Optional[VehicleParameters] = None - camera_metadata: Union[ - Dict[PinholeCameraType, PinholeCameraMetadata], Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] - ] = field(default_factory=dict) + pinhole_camera_metadata: Dict[PinholeCameraType, PinholeCameraMetadata] = field(default_factory=dict) + fisheye_mei_camera_metadata: Dict[FisheyeMEICameraType, FisheyeMEICameraMetadata] = field(default_factory=dict) lidar_metadata: Dict[LiDARType, LiDARMetadata] = field(default_factory=dict) map_metadata: Optional[MapMetadata] = None @@ -32,23 +31,31 @@ class LogMetadata: @classmethod def from_dict(cls, data_dict: Dict) -> LogMetadata: + # Ego Vehicle Parameters if data_dict["vehicle_parameters"] is not None: data_dict["vehicle_parameters"] = VehicleParameters.from_dict(data_dict["vehicle_parameters"]) - camera_metadata = {} - for key, value in data_dict.get("camera_metadata", {}).items(): - if value.get("mirror_parameter") is not None: - camera_type = FisheyeMEICameraType.deserialize(key) - camera_metadata[camera_type] = FisheyeMEICameraMetadata.from_dict(value) - else: - camera_type = PinholeCameraType.deserialize(key) - camera_metadata[camera_type] = PinholeCameraMetadata.from_dict(value) - data_dict["camera_metadata"] = camera_metadata - + # Pinhole Camera Metadata + pinhole_camera_metadata = {} + for key, value in data_dict.get("pinhole_camera_metadata", {}).items(): + pinhole_camera_metadata[PinholeCameraType.deserialize(key)] = PinholeCameraMetadata.from_dict(value) + data_dict["pinhole_camera_metadata"] = pinhole_camera_metadata + + # Fisheye MEI Camera Metadata + fisheye_mei_camera_metadata = {} + for key, value in data_dict.get("fisheye_mei_camera_metadata", {}).items(): + fisheye_mei_camera_metadata[FisheyeMEICameraType.deserialize(key)] = FisheyeMEICameraMetadata.from_dict( + value + ) + data_dict["fisheye_mei_camera_metadata"] = fisheye_mei_camera_metadata + + # LiDAR Metadata data_dict["lidar_metadata"] = { LiDARType.deserialize(key): LiDARMetadata.from_dict(value) for key, value in data_dict.get("lidar_metadata", {}).items() } + + # Map Metadata if data_dict["map_metadata"] is not None: data_dict["map_metadata"] = MapMetadata.from_dict(data_dict["map_metadata"]) @@ -57,7 +64,12 @@ def from_dict(cls, data_dict: Dict) -> LogMetadata: def to_dict(self) -> Dict: data_dict = asdict(self) data_dict["vehicle_parameters"] = self.vehicle_parameters.to_dict() if self.vehicle_parameters else None - data_dict["camera_metadata"] = {key.serialize(): value.to_dict() for key, value in self.camera_metadata.items()} + data_dict["pinhole_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.pinhole_camera_metadata.items() + } + data_dict["fisheye_mei_camera_metadata"] = { + key.serialize(): value.to_dict() for key, value in self.fisheye_mei_camera_metadata.items() + } data_dict["lidar_metadata"] = {key.serialize(): value.to_dict() for key, value in self.lidar_metadata.items()} data_dict["map_metadata"] = self.map_metadata.to_dict() if self.map_metadata else None return data_dict diff --git a/src/py123d/datatypes/sensors/__init__.py b/src/py123d/datatypes/sensors/__init__.py index 89175f33..54cd70a1 100644 --- a/src/py123d/datatypes/sensors/__init__.py +++ b/src/py123d/datatypes/sensors/__init__.py @@ -1,4 +1,4 @@ -from py123d.datatypes.sensors.camera.pinhole_camera import ( +from py123d.datatypes.sensors.pinhole_camera import ( PinholeCameraType, PinholeCamera, PinholeIntrinsicsIndex, @@ -7,3 +7,16 @@ PinholeDistortion, PinholeCameraMetadata, ) +from py123d.datatypes.sensors.fisheye_mei_camera import ( + FisheyeMEICameraType, + FisheyeMEICamera, + FisheyeMEIDistortionIndex, + FisheyeMEIProjectionIndex, + FisheyeMEIProjection, + FisheyeMEICameraMetadata, +) +from py123d.datatypes.sensors.lidar import ( + LiDARType, + LiDARMetadata, + LiDAR, +) diff --git a/src/py123d/datatypes/sensors/camera/__init__.py b/src/py123d/datatypes/sensors/camera/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/datatypes/sensors/camera/utils.py b/src/py123d/datatypes/sensors/camera/utils.py deleted file mode 100644 index 9ed591b0..00000000 --- a/src/py123d/datatypes/sensors/camera/utils.py +++ /dev/null @@ -1,42 +0,0 @@ -from typing import Union - -from py123d.datatypes.sensors.camera.fisheye_mei_camera import FisheyeMEICameraType -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType - - -def get_camera_type_by_value(value: int) -> Union[PinholeCameraType, FisheyeMEICameraType]: - """Dynamically determine camera type based on value range.""" - pinhole_values = [member.value for member in PinholeCameraType] - fisheye_values = [member.value for member in FisheyeMEICameraType] - - if value in pinhole_values: - return PinholeCameraType(value) - elif value in fisheye_values: - return FisheyeMEICameraType(value) - else: - raise ValueError( - f"Invalid camera type value: {value}. " - f"Valid PinholeCameraType values: {pinhole_values}, " - f"Valid FisheyeMEICameraType values: {fisheye_values}" - ) - - -def deserialize_camera_type(camera_str: str) -> Union[PinholeCameraType, FisheyeMEICameraType]: - """Deserialize camera type string to appropriate enum.""" - try: - return PinholeCameraType.deserialize(camera_str) - except (ValueError, KeyError): - pass - - try: - return FisheyeMEICameraType.deserialize(camera_str) - except (ValueError, KeyError): - pass - - pinhole_names = [member.name.lower() for member in PinholeCameraType] - fisheye_names = [member.name.lower() for member in FisheyeMEICameraType] - raise ValueError( - f"Unknown camera type: '{camera_str}'. " - f"Valid PinholeCameraType names: {pinhole_names}, " - f"Valid FisheyeMEICameraType names: {fisheye_names}" - ) diff --git a/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py b/src/py123d/datatypes/sensors/fisheye_mei_camera.py similarity index 98% rename from src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py rename to src/py123d/datatypes/sensors/fisheye_mei_camera.py index afb27960..d8f53f14 100644 --- a/src/py123d/datatypes/sensors/camera/fisheye_mei_camera.py +++ b/src/py123d/datatypes/sensors/fisheye_mei_camera.py @@ -17,9 +17,8 @@ class FisheyeMEICameraType(SerialIntEnum): Enum for fisheye cameras in d123. """ - # NOTE Use higher values to avoid conflicts with PinholeCameraType - CAM_L = 10 - CAM_R = 11 + FCAM_L = 0 + FCAM_R = 1 @dataclass diff --git a/src/py123d/datatypes/sensors/lidar/lidar.py b/src/py123d/datatypes/sensors/lidar.py similarity index 100% rename from src/py123d/datatypes/sensors/lidar/lidar.py rename to src/py123d/datatypes/sensors/lidar.py diff --git a/src/py123d/datatypes/sensors/lidar/__init__.py b/src/py123d/datatypes/sensors/lidar/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/src/py123d/datatypes/sensors/lidar/lidar_index.py b/src/py123d/datatypes/sensors/lidar/lidar_index.py deleted file mode 100644 index 4a295cce..00000000 --- a/src/py123d/datatypes/sensors/lidar/lidar_index.py +++ /dev/null @@ -1,103 +0,0 @@ -from enum import IntEnum - -from py123d.common.utils.enums import classproperty - -LIDAR_INDEX_REGISTRY = {} - - -def register_lidar_index(enum_class): - LIDAR_INDEX_REGISTRY[enum_class.__name__] = enum_class - return enum_class - - -class LiDARIndex(IntEnum): - - @classproperty - def XY(self) -> slice: - """ - Returns a slice for the XY coordinates of the LiDAR point cloud. - """ - return slice(self.X, self.Y + 1) - - @classproperty - def XYZ(self) -> slice: - """ - Returns a slice for the XYZ coordinates of the LiDAR point cloud. - """ - return slice(self.X, self.Z + 1) - - -@register_lidar_index -class DefaultLidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - - -@register_lidar_index -class NuPlanLidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - RING = 4 - ID = 5 - - -@register_lidar_index -class CARLALidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class WOPDLidarIndex(LiDARIndex): - RANGE = 0 - INTENSITY = 1 - ELONGATION = 2 - X = 3 - Y = 4 - Z = 5 - - -@register_lidar_index -class Kitti360LidarIndex(LiDARIndex): - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class AVSensorLidarIndex(LiDARIndex): - """Argoverse Sensor LiDAR Indexing Scheme. - - NOTE: The LiDAR files also include, 'offset_ns', which we do not currently include. - """ - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class PandasetLidarIndex(LiDARIndex): - """Pandaset LiDAR Indexing Scheme.""" - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - - -@register_lidar_index -class NuScenesLidarIndex(LiDARIndex): - - X = 0 - Y = 1 - Z = 2 - INTENSITY = 3 - RING = 4 diff --git a/src/py123d/datatypes/sensors/camera/pinhole_camera.py b/src/py123d/datatypes/sensors/pinhole_camera.py similarity index 96% rename from src/py123d/datatypes/sensors/camera/pinhole_camera.py rename to src/py123d/datatypes/sensors/pinhole_camera.py index 0bb99be6..beefa883 100644 --- a/src/py123d/datatypes/sensors/camera/pinhole_camera.py +++ b/src/py123d/datatypes/sensors/pinhole_camera.py @@ -13,20 +13,17 @@ class PinholeCameraType(SerialIntEnum): - """ - Enum for cameras in py123d. - """ - - CAM_F0 = 0 - CAM_B0 = 1 - CAM_L0 = 2 - CAM_L1 = 3 - CAM_L2 = 4 - CAM_R0 = 5 - CAM_R1 = 6 - CAM_R2 = 7 - CAM_STEREO_L = 8 - CAM_STEREO_R = 9 + + PCAM_F0 = 0 + PCAM_B0 = 1 + PCAM_L0 = 2 + PCAM_L1 = 3 + PCAM_L2 = 4 + PCAM_R0 = 5 + PCAM_R1 = 6 + PCAM_R2 = 7 + PCAM_STEREO_L = 8 + PCAM_STEREO_R = 9 @dataclass diff --git a/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml index 1a121fd9..ff8a2433 100644 --- a/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/av2_sensor_dataset.yaml @@ -21,18 +21,16 @@ av2_sensor_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "binary" # "path", "path_merged", "binary" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: + include_traffic_lights: false include_scenario_tags: false include_route: false + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/carla_dataset.yaml b/src/py123d/script/config/conversion/datasets/carla_dataset.yaml deleted file mode 100644 index c28ccb76..00000000 --- a/src/py123d/script/config/conversion/datasets/carla_dataset.yaml +++ /dev/null @@ -1,35 +0,0 @@ -carla_dataset: - _target_: py123d.conversion.datasets.carla.carla_data_converter.CarlaDataConverter - _convert_: 'all' - - splits: ["carla"] - log_path: "${oc.env:HOME}/carla_workspace/data" - - dataset_converter_config: - _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig - _convert_: 'all' - - force_log_conversion: ${force_log_conversion} - force_map_conversion: ${force_map_conversion} - - # Ego - include_ego: true - - # Box Detections - include_box_detections: true - - # Traffic Lights - include_traffic_lights: true - - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" - - # LiDARs - include_lidars: true - lidar_store_option: "path" # "path", "binary" - - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. - include_scenario_tags: true - include_route: true diff --git a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml index 5b06890e..4919ed79 100644 --- a/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/kitti360_dataset.yaml @@ -2,9 +2,17 @@ kitti360_dataset: _target_: py123d.conversion.datasets.kitti360.kitti360_converter.Kitti360Converter _convert_: 'all' - splits: ["kitti360"] + splits: ["kitti360_train", "kitti360_val", "kitti360_test"] + kitti360_data_root: ${dataset_paths.kitti360_data_root} + # NOTE: We preprocess detections into cache directory to speed up repeated conversions + # The bounding boxes are preprocessed into a per-frame format based on the ego distance and + # visibility based on the lidar point cloud. + detection_cache_root: ${dataset_paths.kitti360_data_root}/preprocessed_detections + detection_radius: 60.0 + + # NOTE: dataset_converter_config: _target_: py123d.conversion.dataset_converter_config.DatasetConverterConfig _convert_: 'all' @@ -21,17 +29,39 @@ kitti360_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" - # Cameras - include_cameras: true - camera_store_option: "path" + # Fisheye Cameras + include_fisheye_mei_cameras: false + fisheye_mei_camera_store_option: "path" # LiDARs include_lidars: true lidar_store_option: "path" - # Scenario tag / Route - include_scenario_tags: false + # Not available: + include_traffic_lights: false include_route: false + include_scenario_tags: false + + # NOTE: Pandaset does not have official splits, so we create our own here. + # We use 80% of the logs for training, 10% for validation, and 10% for testing. + train_sequences: + - "2013_05_28_drive_0000_sync" + - "2013_05_28_drive_0002_sync" + - "2013_05_28_drive_0003_sync" + + + val_sequences: + - "2013_05_28_drive_0004_sync" + - "2013_05_28_drive_0005_sync" + - "2013_05_28_drive_0006_sync" + - "2013_05_28_drive_0007_sync" + + test_sequences: + - "2013_05_28_drive_0008_sync" + - "2013_05_28_drive_0009_sync" + - "2013_05_28_drive_0010_sync" + - "2013_05_28_drive_0018_sync" diff --git a/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml index 671b960c..19b0d0f2 100644 --- a/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuplan_dataset.yaml @@ -26,15 +26,17 @@ nuplan_dataset: # Traffic Lights include_traffic_lights: true - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "path_merged" # "path", "path_merged", "binary" # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. include_scenario_tags: true include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml index a59e67a7..50aea778 100644 --- a/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuplan_mini_dataset.yaml @@ -26,15 +26,17 @@ nuplan_mini_dataset: # Traffic Lights include_traffic_lights: true - # Cameras - include_cameras: true - camera_store_option: "path" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # "path", "binary", "mp4" # LiDARs include_lidars: true - lidar_store_option: "binary" # "path", "path_merged", "binary" + lidar_store_option: "path_merged" # "path", "path_merged", "binary" # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. include_scenario_tags: true include_route: true + + # Not available: + include_fisheye_mei_cameras: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml index 0f3ab95e..7ad5834f 100644 --- a/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuscenes_dataset.yaml @@ -24,13 +24,16 @@ nuscenes_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "path" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" - #lidar + # LiDARs include_lidars: true lidar_store_option: "path" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml index 4c9ba050..e7181c47 100644 --- a/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/nuscenes_mini_dataset.yaml @@ -24,13 +24,16 @@ nuscenes_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" - #lidar + # LiDARs include_lidars: true - lidar_store_option: "binary" + lidar_store_option: "path" + + # Not available: + include_fisheye_mei_cameras: false + include_traffic_lights: false + include_scenario_tags: false + include_route: false diff --git a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml index 51d8e18c..e3e0b1ec 100644 --- a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml @@ -12,31 +12,28 @@ pandaset_dataset: force_log_conversion: ${force_log_conversion} force_map_conversion: ${force_map_conversion} - # Map - include_map: false - # Ego include_ego: true # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "path" # LiDARs include_lidars: true - lidar_store_option: "binary" # "path", "path_merged", "binary" + lidar_store_option: "path" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: + include_map: false + include_fisheye_mei_cameras: false + include_traffic_lights: false include_scenario_tags: false include_route: false + # NOTE: Pandaset does not have official splits, so we create our own here. # We use 80% of the logs for training, 10% for validation, and 10% for testing. train_log_names: diff --git a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml index 441c4966..ed8a16b7 100644 --- a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml @@ -28,15 +28,14 @@ wopd_dataset: # Traffic Lights include_traffic_lights: false - # Cameras - include_cameras: true - camera_store_option: "binary" # "path", "binary", "mp4" + # Pinhole Cameras + include_pinhole_cameras: true + pinhole_camera_store_option: "binary" # "path", "binary", "mp4" # LiDARs include_lidars: true lidar_store_option: "binary" # "path", "path_merged", "binary" - # Scenario tag / Route - # NOTE: These are only supported for nuPlan. Consider removing or expanding support. + # Not available: include_scenario_tags: false include_route: false diff --git a/src/py123d/visualization/matplotlib/camera.py b/src/py123d/visualization/matplotlib/camera.py index 39bf98a3..aadd0baf 100644 --- a/src/py123d/visualization/matplotlib/camera.py +++ b/src/py123d/visualization/matplotlib/camera.py @@ -12,7 +12,7 @@ from py123d.datatypes.detections.box_detection_types import BoxDetectionType from py123d.datatypes.detections.box_detections import BoxDetectionSE3, BoxDetectionWrapper -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeIntrinsics +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeIntrinsics from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry import BoundingBoxSE3Index, Corners3DIndex from py123d.geometry.transform.transform_se3 import convert_absolute_to_relative_se3_array diff --git a/src/py123d/visualization/viser/elements/sensor_elements.py b/src/py123d/visualization/viser/elements/sensor_elements.py index 410cccb9..2dd02c23 100644 --- a/src/py123d/visualization/viser/elements/sensor_elements.py +++ b/src/py123d/visualization/viser/elements/sensor_elements.py @@ -7,8 +7,8 @@ import viser from py123d.datatypes.scene.abstract_scene import AbstractScene -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCamera, PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCamera, PinholeCameraType from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.geometry import StateSE3Index from py123d.geometry.transform.transform_se3 import ( @@ -34,7 +34,7 @@ def add_camera_frustums_to_viser_server( ego_pose[StateSE3Index.XYZ] -= scene_center_array def _add_camera_frustums_to_viser_server(camera_type: PinholeCameraType) -> None: - camera = scene.get_camera_at_iteration(scene_interation, camera_type) + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) if camera is not None: camera_position, camera_quaternion, camera_image = _get_camera_values( camera, @@ -83,7 +83,7 @@ def add_camera_gui_to_viser_server( ) -> None: if viser_config.camera_gui_visible: for camera_type in viser_config.camera_gui_types: - camera = scene.get_camera_at_iteration(scene_interation, camera_type) + camera = scene.get_pinhole_camera_at_iteration(scene_interation, camera_type) if camera is not None: if camera_type in camera_gui_handles: camera_gui_handles[camera_type].image = _rescale_image( diff --git a/src/py123d/visualization/viser/viser_config.py b/src/py123d/visualization/viser/viser_config.py index 77cefd2f..510151f2 100644 --- a/src/py123d/visualization/viser/viser_config.py +++ b/src/py123d/visualization/viser/viser_config.py @@ -1,21 +1,21 @@ from dataclasses import dataclass, field from typing import List, Literal, Optional, Tuple -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType -from py123d.datatypes.sensors.lidar.lidar import LiDARType +from py123d.datatypes.sensors.lidar import LiDARType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.visualization.color.color import ELLIS_5 all_camera_types: List[PinholeCameraType] = [ - PinholeCameraType.CAM_F0, - PinholeCameraType.CAM_B0, - PinholeCameraType.CAM_L0, - PinholeCameraType.CAM_L1, - PinholeCameraType.CAM_L2, - PinholeCameraType.CAM_R0, - PinholeCameraType.CAM_R1, - PinholeCameraType.CAM_R2, - PinholeCameraType.CAM_STEREO_L, - PinholeCameraType.CAM_STEREO_R, + PinholeCameraType.PCAM_F0, + PinholeCameraType.PCAM_B0, + PinholeCameraType.PCAM_L0, + PinholeCameraType.PCAM_L1, + PinholeCameraType.PCAM_L2, + PinholeCameraType.PCAM_R0, + PinholeCameraType.PCAM_R1, + PinholeCameraType.PCAM_R2, + PinholeCameraType.PCAM_STEREO_L, + PinholeCameraType.PCAM_STEREO_R, ] all_lidar_types: List[LiDARType] = [ @@ -66,7 +66,7 @@ class ViserConfig: # -> GUI camera_gui_visible: bool = True - camera_gui_types: List[PinholeCameraType] = field(default_factory=lambda: [PinholeCameraType.CAM_F0].copy()) + camera_gui_types: List[PinholeCameraType] = field(default_factory=lambda: [PinholeCameraType.PCAM_F0].copy()) camera_gui_image_scale: float = 0.25 # Resize factor for the camera image shown in the GUI (<1.0 for speed) # LiDAR diff --git a/src/py123d/visualization/viser/viser_viewer.py b/src/py123d/visualization/viser/viser_viewer.py index 89e6d108..e6333f81 100644 --- a/src/py123d/visualization/viser/viser_viewer.py +++ b/src/py123d/visualization/viser/viser_viewer.py @@ -10,7 +10,7 @@ from py123d.datatypes.maps.map_datatypes import MapLayer from py123d.datatypes.scene.abstract_scene import AbstractScene -from py123d.datatypes.sensors.camera.pinhole_camera import PinholeCameraType +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.datatypes.vehicle_state.ego_state import EgoStateSE3 from py123d.visualization.viser.elements import ( add_box_detections_to_viser_server, diff --git a/test_viser.py b/test_viser.py index a2b83796..1f467be0 100644 --- a/test_viser.py +++ b/test_viser.py @@ -4,7 +4,7 @@ from py123d.visualization.viser.viser_viewer import ViserViewer if __name__ == "__main__": - splits = ["kitti360"] + splits = ["kitti360_train"] # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] # splits = ["nuplan-mini_test", "nuplan-mini_train", "nuplan-mini_val"] # splits = ["nuplan_private_test"] @@ -27,7 +27,7 @@ history_s=0.0, timestamp_threshold_s=None, shuffle=True, - # camera_types=[PinholeCameraType.CAM_F0], + # pinhole_camera_types=[PinholeCameraType.CAM_F0], ) scene_builder = ArrowSceneBuilder() worker = Sequential() From c40ade58152199e1fb00172ebf9e7759860c422f Mon Sep 17 00:00:00 2001 From: Daniel Dauner Date: Mon, 3 Nov 2025 20:04:53 +0100 Subject: [PATCH 32/32] Fixing a few issues not related to KITTI. Testing all dataset (working fine). --- pyproject.toml | 2 +- .../datasets/av2/av2_map_conversion.py | 8 +- .../datasets/av2/av2_sensor_converter.py | 1 + .../datasets/kitti360/kitti360_converter.py | 1 + .../datasets/nuplan/nuplan_converter.py | 16 ++- .../datasets/nuplan/nuplan_map_conversion.py | 4 +- .../datasets/nuscenes/nuscenes_converter.py | 24 +++- .../nuscenes/utils/nuscenes_constants.py | 3 - .../datasets/pandaset/pandaset_converter.py | 1 + .../conversion/map_writer/gpkg_map_writer.py | 38 +++--- .../conversion/map_writer/utils/gpkg_utils.py | 23 +++- .../scene/arrow/arrow_scene_builder.py | 122 +++++++++++------- .../scene/arrow/utils/arrow_getters.py | 1 - src/py123d/datatypes/scene/scene_filter.py | 17 +-- .../conversion/datasets/pandaset_dataset.yaml | 2 +- .../conversion/datasets/wopd_dataset.yaml | 4 +- test_viser.py | 7 +- 17 files changed, 171 insertions(+), 103 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 267ec19d..655a2612 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -105,7 +105,7 @@ nuscenes_expanded = [ "yourdfpy==0.0.58", ] waymo = [ - "protobuf==6.30.2", + "protobuf==4.21.0", "tensorflow==2.13.0", "waymo-open-dataset-tf-2-12-0==1.6.6", ] diff --git a/src/py123d/conversion/datasets/av2/av2_map_conversion.py b/src/py123d/conversion/datasets/av2/av2_map_conversion.py index 41851c58..a55a9cf4 100644 --- a/src/py123d/conversion/datasets/av2/av2_map_conversion.py +++ b/src/py123d/conversion/datasets/av2/av2_map_conversion.py @@ -113,6 +113,10 @@ def _get_centerline_from_boundaries( right_boundary=lane_dict["right_lane_boundary"], ) + # NOTE @DanielDauner: Some neighbor lane IDs might not be present in the dataset. + left_lane_id = lane_dict["left_neighbor_id"] if lane_dict["left_neighbor_id"] in lanes else None + right_lane_id = lane_dict["right_neighbor_id"] if lane_dict["right_neighbor_id"] in lanes else None + map_writer.write_lane( CacheLane( object_id=lane_id, @@ -120,8 +124,8 @@ def _get_centerline_from_boundaries( left_boundary=lane_dict["left_lane_boundary"], right_boundary=lane_dict["right_lane_boundary"], centerline=lane_centerline, - left_lane_id=lane_dict["left_neighbor_id"], - right_lane_id=lane_dict["right_neighbor_id"], + left_lane_id=left_lane_id, + right_lane_id=right_lane_id, predecessor_ids=lane_dict["predecessors"], successor_ids=lane_dict["successors"], speed_limit_mps=None, diff --git a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py index 9891e10c..aebebd19 100644 --- a/src/py123d/conversion/datasets/av2/av2_sensor_converter.py +++ b/src/py123d/conversion/datasets/av2/av2_sensor_converter.py @@ -52,6 +52,7 @@ def __init__( dataset_converter_config: DatasetConverterConfig, ) -> None: super().__init__(dataset_converter_config) + assert av2_data_root is not None, "The variable `av2_data_root` must be provided." for split in splits: assert ( split in AV2_SENSOR_SPLITS diff --git a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py index d525ab3a..8bb9b497 100644 --- a/src/py123d/conversion/datasets/kitti360/kitti360_converter.py +++ b/src/py123d/conversion/datasets/kitti360/kitti360_converter.py @@ -127,6 +127,7 @@ def __init__( val_sequences: List[str], test_sequences: List[str], ) -> None: + assert kitti360_data_root is not None, "The variable `kitti360_data_root` must be provided." super().__init__(dataset_converter_config) for split in splits: assert split in KITTI360_SPLITS, f"Split {split} is not available. Available splits: {KITTI360_SPLITS}" diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py index 8f2620ef..8c77169c 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_converter.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_converter.py @@ -84,7 +84,9 @@ def __init__( dataset_converter_config: DatasetConverterConfig, ) -> None: super().__init__(dataset_converter_config) - + assert nuplan_data_root is not None, "The variable `nuplan_data_root` must be provided." + assert nuplan_maps_root is not None, "The variable `nuplan_maps_root` must be provided." + assert nuplan_sensor_root is not None, "The variable `nuplan_sensor_root` must be provided." for split in splits: assert ( split in NUPLAN_DATA_SPLITS @@ -176,7 +178,9 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: location=nuplan_log_db.log.map_version, timestep_seconds=TARGET_DT, vehicle_parameters=get_nuplan_chrysler_pacifica_parameters(), - pinhole_camera_metadata=_get_nuplan_camera_metadata(source_log_path, self.dataset_converter_config), + pinhole_camera_metadata=_get_nuplan_camera_metadata( + source_log_path, self._nuplan_sensor_root, self.dataset_converter_config + ), lidar_metadata=_get_nuplan_lidar_metadata( self._nuplan_sensor_root, log_name, self.dataset_converter_config ), @@ -235,6 +239,7 @@ def _get_nuplan_map_metadata(location: str) -> MapMetadata: def _get_nuplan_camera_metadata( source_log_path: Path, + nuplan_sensor_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, PinholeCameraMetadata]: @@ -257,8 +262,11 @@ def _get_camera_metadata(camera_type: PinholeCameraType) -> PinholeCameraMetadat camera_metadata: Dict[str, PinholeCameraMetadata] = {} if dataset_converter_config.include_pinhole_cameras: - for camera_type in NUPLAN_CAMERA_MAPPING.keys(): - camera_metadata[camera_type] = _get_camera_metadata(camera_type) + log_name = source_log_path.stem + for camera_type, nuplan_camera_type in NUPLAN_CAMERA_MAPPING.items(): + camera_folder = nuplan_sensor_root / log_name / f"{nuplan_camera_type.value}" + if camera_folder.exists() and camera_folder.is_dir(): + camera_metadata[camera_type] = _get_camera_metadata(camera_type) return camera_metadata diff --git a/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py index bff709be..b8b010cb 100644 --- a/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py +++ b/src/py123d/conversion/datasets/nuplan/nuplan_map_conversion.py @@ -135,8 +135,8 @@ def _write_nuplan_lane_connectors(nuplan_gdf: Dict[str, gpd.GeoDataFrame], map_w # 1. predecessor_ids, successor_ids lane_connector_row = get_row_with_value(nuplan_gdf["lane_connectors"], "fid", str(lane_id)) - predecessor_ids = lane_connector_row["entry_lane_fid"] - successor_ids = lane_connector_row["exit_lane_fid"] + predecessor_ids = [lane_connector_row["entry_lane_fid"]] + successor_ids = [lane_connector_row["exit_lane_fid"]] # 2. left_boundaries, right_boundaries lane_connector_polygons_row = get_row_with_value( diff --git a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py index e7cbf2e2..6c22d6ce 100644 --- a/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py +++ b/src/py123d/conversion/datasets/nuscenes/nuscenes_converter.py @@ -12,7 +12,6 @@ from py123d.conversion.datasets.nuscenes.nuscenes_map_conversion import NUSCENES_MAPS, write_nuscenes_map from py123d.conversion.datasets.nuscenes.utils.nuscenes_constants import ( NUSCENES_CAMERA_TYPES, - NUSCENES_DATA_ROOT, NUSCENES_DATA_SPLITS, NUSCENES_DETECTION_NAME_DICT, NUSCENES_DT, @@ -56,11 +55,19 @@ def __init__( ) -> None: super().__init__(dataset_converter_config) + assert nuscenes_data_root is not None, "The variable `nuscenes_data_root` must be provided." + assert nuscenes_map_root is not None, "The variable `nuscenes_map_root` must be provided." for split in splits: assert ( split in NUSCENES_DATA_SPLITS ), f"Split {split} is not available. Available splits: {NUSCENES_DATA_SPLITS}" + if dataset_converter_config.include_lidars: + assert dataset_converter_config.lidar_store_option in ["path", "binary"], ( + f"Invalid lidar_store_option: {dataset_converter_config.lidar_store_option}. " + f"Supported options are 'path' and 'binary'." + ) + self._splits: List[str] = splits self._nuscenes_data_root: Path = Path(nuscenes_data_root) @@ -175,11 +182,13 @@ def convert_log(self, log_index: int, log_writer: AbstractLogWriter) -> None: pinhole_cameras=_extract_nuscenes_cameras( nusc=nusc, sample=sample, + nuscenes_data_root=self._nuscenes_data_root, dataset_converter_config=self.dataset_converter_config, ), lidars=_extract_nuscenes_lidars( nusc=nusc, sample=sample, + nuscenes_data_root=self._nuscenes_data_root, dataset_converter_config=self.dataset_converter_config, ), ) @@ -385,6 +394,7 @@ def _extract_nuscenes_box_detections(nusc: NuScenes, sample: Dict[str, Any]) -> def _extract_nuscenes_cameras( nusc: NuScenes, sample: Dict[str, Any], + nuscenes_data_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]]: camera_dict: Dict[PinholeCameraType, Tuple[Union[str, bytes], StateSE3]] = {} @@ -407,7 +417,7 @@ def _extract_nuscenes_cameras( extrinsic_matrix[:3, 3] = translation extrinsic = StateSE3.from_transformation_matrix(extrinsic_matrix) - cam_path = NUSCENES_DATA_ROOT / cam_data["filename"] + cam_path = nuscenes_data_root / cam_data["filename"] if cam_path.exists() and cam_path.is_file(): if dataset_converter_config.pinhole_camera_store_option == "path": @@ -426,6 +436,7 @@ def _extract_nuscenes_cameras( def _extract_nuscenes_lidars( nusc: NuScenes, sample: Dict[str, Any], + nuscenes_data_root: Path, dataset_converter_config: DatasetConverterConfig, ) -> List[LiDARData]: lidars: List[LiDARData] = [] @@ -433,15 +444,14 @@ def _extract_nuscenes_lidars( if dataset_converter_config.include_lidars: lidar_token = sample["data"]["LIDAR_TOP"] lidar_data = nusc.get("sample_data", lidar_token) - absolute_lidar_path = NUSCENES_DATA_ROOT / lidar_data["filename"] + absolute_lidar_path = nuscenes_data_root / lidar_data["filename"] if absolute_lidar_path.exists() and absolute_lidar_path.is_file(): lidar = LiDARData( - lidar_type=LiDARType.LIDAR_MERGED, - relative_path=absolute_lidar_path.relative_to(NUSCENES_DATA_ROOT), - dataset_root=NUSCENES_DATA_ROOT, + lidar_type=LiDARType.LIDAR_TOP, + relative_path=absolute_lidar_path.relative_to(nuscenes_data_root), + dataset_root=nuscenes_data_root, iteration=lidar_data.get("iteration"), ) lidars.append(lidar) - return lidars diff --git a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py index 9ea29413..8878401d 100644 --- a/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py +++ b/src/py123d/conversion/datasets/nuscenes/utils/nuscenes_constants.py @@ -1,5 +1,3 @@ -import os -from pathlib import Path from typing import Final, List from py123d.datatypes.detections.box_detection_types import BoxDetectionType @@ -57,4 +55,3 @@ PinholeCameraType.PCAM_R0: "CAM_FRONT_RIGHT", PinholeCameraType.PCAM_R1: "CAM_BACK_RIGHT", } -NUSCENES_DATA_ROOT = Path(os.environ["NUSCENES_DATA_ROOT"]) diff --git a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py index 9656da00..0f177af1 100644 --- a/src/py123d/conversion/datasets/pandaset/pandaset_converter.py +++ b/src/py123d/conversion/datasets/pandaset/pandaset_converter.py @@ -62,6 +62,7 @@ def __init__( super().__init__(dataset_converter_config) for split in splits: assert split in PANDASET_SPLITS, f"Split {split} is not available. Available splits: {PANDASET_SPLITS}" + assert pandaset_data_root is not None, "The variable `pandaset_data_root` must be provided." self._splits: List[str] = splits self._pandaset_data_root: Path = Path(pandaset_data_root) diff --git a/src/py123d/conversion/map_writer/gpkg_map_writer.py b/src/py123d/conversion/map_writer/gpkg_map_writer.py index d5acf041..289e1cc2 100644 --- a/src/py123d/conversion/map_writer/gpkg_map_writer.py +++ b/src/py123d/conversion/map_writer/gpkg_map_writer.py @@ -204,25 +204,25 @@ def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: # 1. Remap lane ids in LANE layer if len(map_dfs[MapLayer.LANE]) > 0: - map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].map(lane_id_mapping.str_to_int) - map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].map( - lane_group_id_mapping.str_to_int + map_dfs[MapLayer.LANE]["id"] = map_dfs[MapLayer.LANE]["id"].apply(lambda x: lane_id_mapping.map(x)) + map_dfs[MapLayer.LANE]["lane_group_id"] = map_dfs[MapLayer.LANE]["lane_group_id"].apply( + lambda x: lane_group_id_mapping.map(x) ) for column in ["predecessor_ids", "successor_ids"]: map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map_list(x)) for column in ["left_lane_id", "right_lane_id"]: - map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply( - lambda x: str(lane_id_mapping.str_to_int[x]) if pd.notna(x) and x is not None else x - ) + map_dfs[MapLayer.LANE][column] = map_dfs[MapLayer.LANE][column].apply(lambda x: lane_id_mapping.map(x)) # 2. Remap lane group ids in LANE_GROUP if len(map_dfs[MapLayer.LANE_GROUP]) > 0: - map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].map(lane_group_id_mapping.str_to_int) + map_dfs[MapLayer.LANE_GROUP]["id"] = map_dfs[MapLayer.LANE_GROUP]["id"].apply( + lambda x: lane_group_id_mapping.map(x) + ) map_dfs[MapLayer.LANE_GROUP]["lane_ids"] = map_dfs[MapLayer.LANE_GROUP]["lane_ids"].apply( lambda x: lane_id_mapping.map_list(x) ) - map_dfs[MapLayer.LANE_GROUP]["intersection_id"] = map_dfs[MapLayer.LANE_GROUP]["intersection_id"].map( - intersection_id_mapping.str_to_int + map_dfs[MapLayer.LANE_GROUP]["intersection_id"] = map_dfs[MapLayer.LANE_GROUP]["intersection_id"].apply( + lambda x: intersection_id_mapping.map(x) ) for column in ["predecessor_ids", "successor_ids"]: map_dfs[MapLayer.LANE_GROUP][column] = map_dfs[MapLayer.LANE_GROUP][column].apply( @@ -231,8 +231,8 @@ def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: # 3. Remap lane group ids in INTERSECTION if len(map_dfs[MapLayer.INTERSECTION]) > 0: - map_dfs[MapLayer.INTERSECTION]["id"] = map_dfs[MapLayer.INTERSECTION]["id"].map( - intersection_id_mapping.str_to_int + map_dfs[MapLayer.INTERSECTION]["id"] = map_dfs[MapLayer.INTERSECTION]["id"].apply( + lambda x: intersection_id_mapping.map(x) ) map_dfs[MapLayer.INTERSECTION]["lane_group_ids"] = map_dfs[MapLayer.INTERSECTION]["lane_group_ids"].apply( lambda x: lane_group_id_mapping.map_list(x) @@ -240,14 +240,18 @@ def _map_ids_to_integer(map_dfs: Dict[MapLayer, gpd.GeoDataFrame]) -> None: # 4. Remap ids in other layers if len(map_dfs[MapLayer.WALKWAY]) > 0: - map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].map(walkway_id_mapping.str_to_int) + map_dfs[MapLayer.WALKWAY]["id"] = map_dfs[MapLayer.WALKWAY]["id"].apply(lambda x: walkway_id_mapping.map(x)) if len(map_dfs[MapLayer.CARPARK]) > 0: - map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].map(carpark_id_mapping.str_to_int) + map_dfs[MapLayer.CARPARK]["id"] = map_dfs[MapLayer.CARPARK]["id"].apply(lambda x: carpark_id_mapping.map(x)) if len(map_dfs[MapLayer.GENERIC_DRIVABLE]) > 0: - map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].map( - generic_drivable_id_mapping.str_to_int + map_dfs[MapLayer.GENERIC_DRIVABLE]["id"] = map_dfs[MapLayer.GENERIC_DRIVABLE]["id"].apply( + lambda x: generic_drivable_id_mapping.map(x) ) if len(map_dfs[MapLayer.ROAD_LINE]) > 0: - map_dfs[MapLayer.ROAD_LINE]["id"] = map_dfs[MapLayer.ROAD_LINE]["id"].map(road_line_id_mapping.str_to_int) + map_dfs[MapLayer.ROAD_LINE]["id"] = map_dfs[MapLayer.ROAD_LINE]["id"].apply( + lambda x: road_line_id_mapping.map(x) + ) if len(map_dfs[MapLayer.ROAD_EDGE]) > 0: - map_dfs[MapLayer.ROAD_EDGE]["id"] = map_dfs[MapLayer.ROAD_EDGE]["id"].map(road_edge_id_mapping.str_to_int) + map_dfs[MapLayer.ROAD_EDGE]["id"] = map_dfs[MapLayer.ROAD_EDGE]["id"].apply( + lambda x: road_edge_id_mapping.map(x) + ) diff --git a/src/py123d/conversion/map_writer/utils/gpkg_utils.py b/src/py123d/conversion/map_writer/utils/gpkg_utils.py index 6ad4e559..2b9ab334 100644 --- a/src/py123d/conversion/map_writer/utils/gpkg_utils.py +++ b/src/py123d/conversion/map_writer/utils/gpkg_utils.py @@ -1,7 +1,7 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Dict, List, Optional +from typing import Any, Dict, List, Optional import pandas as pd @@ -16,14 +16,29 @@ def __post_init__(self): @classmethod def from_series(cls, series: pd.Series) -> IntIDMapping: - unique_ids = series.unique() + # Drop NaN values and convert all to strings + unique_ids = series.dropna().astype(str).unique() str_to_int = {str_id: idx for idx, str_id in enumerate(unique_ids)} return IntIDMapping(str_to_int) - def map_list(self, id_list: Optional[List[str]]) -> pd.Series: + def map(self, str_like: Any) -> Optional[int]: + # Handle NaN and None values + if pd.isna(str_like) or str_like is None: + return None + + # Convert to string for uniform handling + str_key = str(str_like) + return self.str_to_int.get(str_key, None) + + def map_list(self, id_list: Optional[List[str]]) -> List[int]: if id_list is None: return [] - return [self.str_to_int.get(id_str, -1) for id_str in id_list] + list_ = [] + for id_str in id_list: + mapped_id = self.map(id_str) + if mapped_id is not None: + list_.append(mapped_id) + return list_ class IncrementalIntIDMapping: diff --git a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py index f5f9d067..2afebbba 100644 --- a/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py +++ b/src/py123d/datatypes/scene/arrow/arrow_scene_builder.py @@ -102,22 +102,23 @@ def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFil recording_table = get_lru_cached_arrow_table(log_path) log_metadata = get_log_metadata_from_arrow(log_path) + start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 + end_idx = ( + len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) + if filter.duration_s is not None + else len(recording_table) + ) + # 1. Filter location if ( filter.locations is not None and log_metadata.map_metadata is not None and log_metadata.map_metadata.location not in filter.locations ): - return scene_extraction_metadatas + pass - start_idx = int(filter.history_s / log_metadata.timestep_seconds) if filter.history_s is not None else 0 - end_idx = ( - len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds) - if filter.duration_s is not None - else len(recording_table) - ) - if filter.duration_s is None: - return [ + elif filter.duration_s is None: + scene_extraction_metadatas.append( SceneExtractionMetadata( initial_uuid=str(recording_table["uuid"][start_idx].as_py()), initial_idx=start_idx, @@ -125,48 +126,75 @@ def _get_scene_extraction_metadatas(log_path: Union[str, Path], filter: SceneFil history_s=filter.history_s if filter.history_s is not None else 0.0, iteration_duration_s=log_metadata.timestep_seconds, ) - ] - - scene_uuid_set = set(filter.scene_uuids) if filter.scene_uuids is not None else None - - for idx in range(start_idx, end_idx): - scene_extraction_metadata: Optional[SceneExtractionMetadata] = None - - if scene_uuid_set is None: - scene_extraction_metadata = SceneExtractionMetadata( - initial_uuid=str(recording_table["uuid"][idx].as_py()), - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) - elif str(recording_table["uuid"][idx]) in scene_uuid_set: - scene_extraction_metadata = SceneExtractionMetadata( - initial_uuid=str(recording_table["uuid"][idx].as_py()), - initial_idx=idx, - duration_s=filter.duration_s, - history_s=filter.history_s, - iteration_duration_s=log_metadata.timestep_seconds, - ) + ) + else: + scene_uuid_set = set(filter.scene_uuids) if filter.scene_uuids is not None else None + for idx in range(start_idx, end_idx): + scene_extraction_metadata: Optional[SceneExtractionMetadata] = None + + if scene_uuid_set is None: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) + elif str(recording_table["uuid"][idx]) in scene_uuid_set: + scene_extraction_metadata = SceneExtractionMetadata( + initial_uuid=str(recording_table["uuid"][idx].as_py()), + initial_idx=idx, + duration_s=filter.duration_s, + history_s=filter.history_s, + iteration_duration_s=log_metadata.timestep_seconds, + ) - if scene_extraction_metadata is not None: - # Check of timestamp threshold exceeded between previous scene, if specified in filter - if filter.timestamp_threshold_s is not None and len(scene_extraction_metadatas) > 0: - iteration_delta = idx - scene_extraction_metadatas[-1].initial_idx - if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: + if scene_extraction_metadata is not None: + # Check of timestamp threshold exceeded between previous scene, if specified in filter + if filter.timestamp_threshold_s is not None and len(scene_extraction_metadatas) > 0: + iteration_delta = idx - scene_extraction_metadatas[-1].initial_idx + if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s: + continue + + scene_extraction_metadatas.append(scene_extraction_metadata) + + scene_extraction_metadatas_ = [] + for scene_extraction_metadata in scene_extraction_metadatas: + + add_scene = True + start_idx = scene_extraction_metadata.initial_idx + if filter.pinhole_camera_types is not None: + for pinhole_camera_type in filter.pinhole_camera_types: + column_name = f"{pinhole_camera_type.serialize()}_data" + + if ( + pinhole_camera_type in log_metadata.pinhole_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): continue - - # Check if camera data is available for the scene, if specified in filter - # NOTE: We only check camera availability at the initial index of the scene. - if filter.pinhole_camera_types is not None: - cameras_available = [ - recording_table[f"{camera_type.serialize()}_data"][start_idx].as_py() is not None - for camera_type in filter.pinhole_camera_types - ] - if not all(cameras_available): + else: + add_scene = False + break + + if filter.fisheye_mei_camera_types is not None: + for fisheye_mei_camera_type in filter.fisheye_mei_camera_types: + column_name = f"{fisheye_mei_camera_type.serialize()}_data" + + if ( + fisheye_mei_camera_type in log_metadata.fisheye_mei_camera_metadata + and column_name in recording_table.schema.names + and recording_table[column_name][start_idx].as_py() is not None + ): continue + else: + add_scene = False + break + + if add_scene: + scene_extraction_metadatas_.append(scene_extraction_metadata) - scene_extraction_metadatas.append(scene_extraction_metadata) + scene_extraction_metadatas = scene_extraction_metadatas_ del recording_table, log_metadata return scene_extraction_metadatas diff --git a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py index 946ffe1f..8aea7801 100644 --- a/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py +++ b/src/py123d/datatypes/scene/arrow/utils/arrow_getters.py @@ -169,7 +169,6 @@ def get_lidar_from_arrow_table( if lidar_column_name in arrow_table.schema.names: lidar_data = arrow_table[lidar_column_name][index].as_py() - if isinstance(lidar_data, str): lidar_pc_dict = load_lidar_pcs_from_file(relative_path=lidar_data, log_metadata=log_metadata, index=index) if lidar_type == LiDARType.LIDAR_MERGED: diff --git a/src/py123d/datatypes/scene/scene_filter.py b/src/py123d/datatypes/scene/scene_filter.py index 5aa4ae42..62ad9301 100644 --- a/src/py123d/datatypes/scene/scene_filter.py +++ b/src/py123d/datatypes/scene/scene_filter.py @@ -31,13 +31,14 @@ class SceneFilter: shuffle: bool = False def __post_init__(self): - if self.pinhole_camera_types is not None: - assert isinstance(self.pinhole_camera_types, list), "camera_types must be a list of CameraType" + def _resolve_enum_arguments( + serial_enum_cls: SerialIntEnum, input: Optional[List[Union[int, str, SerialIntEnum]]] + ) -> List[SerialIntEnum]: - def _resolve_enum_arguments( - serial_enum_cls: SerialIntEnum, input: List[Union[int, str, SerialIntEnum]] - ) -> List[SerialIntEnum]: - return [serial_enum_cls.from_arbitrary(value) for value in input] + if input is None: + return None + assert isinstance(input, list), f"input must be a list of {serial_enum_cls.__name__}" + return [serial_enum_cls.from_arbitrary(value) for value in input] - self.pinhole_camera_types = _resolve_enum_arguments(PinholeCameraType, self.pinhole_camera_types) - self.fisheye_mei_camera_types = _resolve_enum_arguments(FisheyeMEICameraType, self.fisheye_mei_camera_types) + self.pinhole_camera_types = _resolve_enum_arguments(PinholeCameraType, self.pinhole_camera_types) + self.fisheye_mei_camera_types = _resolve_enum_arguments(FisheyeMEICameraType, self.fisheye_mei_camera_types) diff --git a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml index e3e0b1ec..d70a2aab 100644 --- a/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/pandaset_dataset.yaml @@ -24,7 +24,7 @@ pandaset_dataset: # LiDARs include_lidars: true - lidar_store_option: "path" + lidar_store_option: "binary" # Not available: include_map: false diff --git a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml index ed8a16b7..3fb5acee 100644 --- a/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml +++ b/src/py123d/script/config/conversion/datasets/wopd_dataset.yaml @@ -25,9 +25,6 @@ wopd_dataset: # Box Detections include_box_detections: true - # Traffic Lights - include_traffic_lights: false - # Pinhole Cameras include_pinhole_cameras: true pinhole_camera_store_option: "binary" # "path", "binary", "mp4" @@ -37,5 +34,6 @@ wopd_dataset: lidar_store_option: "binary" # "path", "path_merged", "binary" # Not available: + include_traffic_lights: false include_scenario_tags: false include_route: false diff --git a/test_viser.py b/test_viser.py index 1f467be0..3a1c887e 100644 --- a/test_viser.py +++ b/test_viser.py @@ -1,15 +1,16 @@ from py123d.common.multithreading.worker_sequential import Sequential from py123d.datatypes.scene.arrow.arrow_scene_builder import ArrowSceneBuilder from py123d.datatypes.scene.scene_filter import SceneFilter +from py123d.datatypes.sensors.pinhole_camera import PinholeCameraType from py123d.visualization.viser.viser_viewer import ViserViewer if __name__ == "__main__": - splits = ["kitti360_train"] + # splits = ["kitti360_train"] # splits = ["nuscenes-mini_val", "nuscenes-mini_train"] # splits = ["nuplan-mini_test", "nuplan-mini_train", "nuplan-mini_val"] # splits = ["nuplan_private_test"] # splits = ["carla_test"] - # splits = ["wopd_val"] + splits = ["wopd_val"] # splits = ["av2-sensor_train"] # splits = ["pandaset_test", "pandaset_val", "pandaset_train"] # log_names = ["2021.08.24.13.12.55_veh-45_00386_00472"] @@ -27,7 +28,7 @@ history_s=0.0, timestamp_threshold_s=None, shuffle=True, - # pinhole_camera_types=[PinholeCameraType.CAM_F0], + pinhole_camera_types=[PinholeCameraType.PCAM_F0], ) scene_builder = ArrowSceneBuilder() worker = Sequential()