In [None]:
import matplotlib.pyplot as plt
from shapely.geometry import LineString, Polygon, Point
import numpy as np

from typing import List
import os
from pathlib import Path

from nuplan.database.nuplan_db_orm.nuplandb import LidarBox

import pyarrow as pa
import pyarrow.ipc as ipc

import numpy as np
from tqdm import tqdm

from asim.common.geometry.base import StateSE3
from asim.common.geometry.bounding_box.bounding_box import BoundingBoxSE3
from asim.common.geometry.constants import DEFAULT_ROLL, DEFAULT_PITCH

In [None]:
from asim.dataset.maps.abstract_map import MapSurfaceType

In [None]:
from nuplan.database.nuplan_db_orm.nuplandb import NuPlanDB

In [None]:
from asim.dataset.dataset_specific.nuplan.data_conversion import NuPlanDataset


NUPLAN_DATA_ROOT = Path(os.environ["NUPLAN_DATA_ROOT"])
SPLIT_PATH = NUPLAN_DATA_ROOT / "nuplan-v1.1" / "splits" / "mini"


db_files = list(SPLIT_PATH.iterdir())
# idx = 0
# for idx in range(len(db_files)):
idx = 14

log_db = NuPlanDB(NUPLAN_DATA_ROOT, str(db_files[idx]), None)
print(idx, log_db.log_name, log_db.log.map_version)

In [None]:
from asim.dataset.observation.agent_datatypes import BoundingBoxType


name_mapping = {
    "vehicle": BoundingBoxType.VEHICLE,
    "bicycle": BoundingBoxType.BICYCLE,
    "pedestrian": BoundingBoxType.PEDESTRIAN,
    "traffic_cone": BoundingBoxType.TRAFFIC_CONE,
    "barrier": BoundingBoxType.BARRIER,
    "czone_sign": BoundingBoxType.CZONE_SIGN,
    "generic_object": BoundingBoxType.GENERIC_OBJECT,
}

In [None]:
from nuplan.common.geometry.compute import get_pacifica_parameters


log_name = log_db.log_name
log_token = log_db.log.token
map_location = log_db.log.map_version
vehicle_name = log_db.log.vehicle_name


time_us_log: List[int] = []

bb_ego_log: List[List[float]] = []
bb_frame_log: List[List[List[float]]] = []
bb_track_log: List[List[str]] = []
bb_types_log: List[List[int]] = []

ego_states_log: List[List[float]] = []


for lidar_pc in tqdm(log_db.lidar_pc, dynamic_ncols=True):
    # 1. time_us
    time_us_log.append(lidar_pc.timestamp)

    bb_frame: List[List[float]] = []
    bb_track: List[str] = []
    bb_types: List[int] = []

    for lidar_box in lidar_pc.lidar_boxes:
        lidar_box: LidarBox
        center = StateSE3(
            x=lidar_box.x,
            y=lidar_box.y,
            z=lidar_box.z,
            roll=DEFAULT_ROLL,
            pitch=DEFAULT_PITCH,
            yaw=lidar_box.yaw,
        )
        bounding_box_se3 = BoundingBoxSE3(center, lidar_box.length, lidar_box.width, lidar_box.height)

        bb_frame.append(pa.array(bounding_box_se3.array))
        bb_track.append(lidar_box.track_token)
        bb_types.append(int(name_mapping[lidar_box.category.name]))

    bb_frame_log.append(bb_frame)
    bb_track_log.append(bb_track)
    bb_types_log.append(bb_types)

    # 2. ego_states
    yaw, pitch, roll = yaw_pitch_roll = lidar_pc.ego_pose.quaternion.yaw_pitch_roll
    vehicle_parameters = get_pacifica_parameters()
    ego_bounding_box_se3 = BoundingBoxSE3(
        center=StateSE3(
            x=lidar_pc.ego_pose.x,
            y=lidar_pc.ego_pose.y,
            z=lidar_pc.ego_pose.z,
            roll=roll,
            pitch=pitch,
            yaw=yaw,
        ),
        length=vehicle_parameters.length,
        width=vehicle_parameters.width,
        height=vehicle_parameters.height,
    )

    bb_ego_log.append(pa.array(ego_bounding_box_se3.array))

    if len(bb_ego_log) > 9:
        break
    # break


# Option 1: List Column Approach
list_data = {"time_us": time_us_log, "bb_frame": bb_frame_log, "bb_track": bb_track_log, "bb_types": bb_types_log, "bb_ego": bb_ego_log}

# Create a PyArrow Table
list_schema = pa.schema(
    [
        ("time_us", pa.int64()),
        ("bb_frame", pa.list_(pa.list_(pa.float64(), 9))),
        ("bb_track", pa.list_(pa.string())),
        ("bb_types", pa.list_(pa.int32())),
        ("bb_ego", pa.list_(pa.float64(), 9)),
    ]
)
list_table = pa.Table.from_pydict(list_data, schema=list_schema)


metadata = {
    "recording_id": "drive_20250515_001",
    "location": "Mountain View, CA",
    "weather": "sunny",
    "sensor_config": "standard_suite_v3"
}
metadata_fields = []
metadata_values = []
for key, value in metadata.items():
    metadata_fields.append(key)
    metadata_values.append(pa.scalar(value))

metadata_table = pa.Table.from_arrays(
    [pa.array([value]) for value in metadata_values],
    metadata_fields
)

# schema = {
#     "timeseries": list_table.schema,
#     "metadata": metadata_table.schema
# }
# schema_batch = pa.record_batch([pa.array([str(schema)])], ["schema"])

# # Write to Arrow file
# # with pa.OSFile(f"{log_name}.arrow", "wb") as sink:
# #     writer = pa.RecordBatchFileWriter(sink, list_table.schema)
# #     writer.write_table(list_table)
# #     writer.close()
# schema_batch


In [None]:
import pyarrow as pa
import pyarrow.ipc as ipc

tables = [list_table, metadata_table]  # Different schemas

with pa.OSFile("combined.arrow", "wb") as sink:
    writer = ipc.new_stream(sink, tables[0].schema)  # dummy schema, will be overwritten per chunk
    for i, table in enumerate(tables):
        metadata = {"schema_index": str(i)}
        table = table.replace_schema_metadata(metadata)
        writer.write_table(table)
    writer.close()
