In [None]:

from functools import partial
from pathlib import Path
from typing import Iterator, List, Optional, Set, Union


import pyarrow as pa
from d123.common.multithreading.worker_pool import WorkerPool

from d123.dataset.arrow.helper import open_arrow_arrow_table
from d123.conversion.nuplan.nuplan_data_processor import worker_map
from d123.dataset.logs.log_metadata import LogMetadata
from d123.dataset.scene.abstract_scene import AbstractScene
from d123.dataset.scene.arrow_scene import ArrowScene, SceneExtractionInfo
from d123.dataset.scene.scene_filter import SceneFilter


In [None]:
import numpy as np

DURATION_SECONDS = 10.0  
HISTORY_SECONDS = 3.0
ITERATION_DURATION_SECONDS = 0.1



log_path = "/home/daniel/d123_workspace/data/nuplan_mini_val/2021.06.07.12.54.00_veh-35_01843_02314.arrow"

recording_table = open_arrow_arrow_table(log_path)
log_metadata = LogMetadata.from_arrow_table(recording_table)


# scene_tokens = [str(token) for token in np.random.choice(recording_table.column("token").to_pylist(), size=10)]
scene_tokens = [""]
timestamp_threshold_s: float = 10.0
# timestamp_threshold_s = None
filter = SceneFilter(scene_tokens=scene_tokens, timestamp_threshold_s=timestamp_threshold_s)

In [None]:
scene_tokens

In [None]:
def _get_scene_extraction_info(log_path: str, filter: SceneFilter) -> List[SceneExtractionInfo]:
    scene_extraction_infos: List[SceneExtractionInfo] = []

    recording_table = open_arrow_arrow_table(log_path)
    log_metadata = LogMetadata.from_arrow_table(recording_table)

    # 1. Filter map name
    if filter.map_names is not None and log_metadata.map_name not in filter.map_names:
        return scene_extraction_infos

    start_idx = int(filter.history_s / log_metadata.timestep_seconds)
    end_idx = len(recording_table) - int(filter.duration_s / log_metadata.timestep_seconds)

    scene_token_set = set(filter.scene_tokens) if filter.scene_tokens else None

    for idx in range(start_idx, end_idx):
        scene_extraction_info: Optional[SceneExtractionInfo] = None

        if scene_token_set is None:
            scene_extraction_info = SceneExtractionInfo(
                initial_token=str(recording_table["token"][idx]),
                initial_idx=idx,
                duration_s=filter.duration_s,
                history_s=filter.history_s,
                iteration_duration_s=ITERATION_DURATION_SECONDS,
            )
        elif str(recording_table["token"][idx]) in scene_token_set:
            scene_extraction_info = SceneExtractionInfo(
                initial_token=str(recording_table["token"][idx]),
                initial_idx=idx,
                duration_s=filter.duration_s,
                history_s=filter.history_s,
                iteration_duration_s=ITERATION_DURATION_SECONDS,
            )

        if scene_extraction_info is not None:
            # TODO: add more options
            if filter.timestamp_threshold_s is not None and len(scene_extraction_infos) > 0:
                iteration_delta = idx - scene_extraction_infos[-1].initial_idx
                if (iteration_delta * log_metadata.timestep_seconds) < filter.timestamp_threshold_s:
                    continue

            scene_extraction_infos.append(scene_extraction_info)

    del recording_table, log_metadata
    return scene_extraction_infos


scenes = _get_scene_extraction_info(log_path, filter)
len(scenes)

# 4580

In [None]:
_get_scene_extraction_info(log_path, filter)