## 2D-to-3D Lanes Outline Postfilter
Created by Jett Penner<br>
December 2025 <br>


Runs a postprocessing filter operation on the dataset, determining potential solution outliers (stemming from incorrect data loads or preprocessing). While this could be detected before the main running pipeline, the current configuration allows analysis of input failure cases. Output is a json of outliers (does not natively remove outliers).

First run the `ONCE-3DLanes Data Loader` for data loading and preprocessing. Then, run the `2D-to-3D Lanes Pipeline` for pipeline output.


In [None]:
import yaml
import os
import numpy as np
from pathlib import Path
import open3d as o3d
from tqdm import tqdm
from collections import defaultdict
import json
import sys


In [None]:
# Load configuration data and log
with open("once" + "_config.yaml", "r") as f:
    once_config = yaml.safe_load(f)

from logger import setup_logger, switch_log_file
main_logger = setup_logger("analytics_builder_log")
switch_log_file(main_logger, once_config["output"]["base_path"])
main_logger.info("Starting Program\n")

In [None]:
def load_pointcloud_from_bin(file_path):
    '''
    Load a point cloud from a .bin file.

    Args:
        file_path (str): Path to the .bin file.

    Returns:
        (N,3) np array: Points in the file (XYZ), or None if invalid file.
    '''
    if not os.path.exists(file_path):
        main_logger.error(f"File not found: {file_path}")
        return None

    try:
        points = np.fromfile(file_path, dtype=np.float32)
        if points.size % 3 != 0:
            main_logger.error(f"Invalid point cloud size in file {file_path} ({points.size} floats).")
            return None
        return points.reshape(-1, 3)
    except Exception as e:
        main_logger.error(f"Failed to load point cloud from {file_path}: {e}")
        return None
    


def load_pointcloud_from_pcd(pcd_path):
    '''
    Load a point cloud from a .pcd file.

    Args:
        file_path (str): Path to the .pcd file.

    Returns:
        (N,3) np array: Points in the file (XYZ), or None if invalid file.
    '''
    pcd = o3d.io.read_point_cloud(str(pcd_path))
    
    if not pcd.has_points():
        main_logger.warning(f"No points found in PCD file: {pcd_path}")
        return np.empty((0, 3), dtype=np.float32)
    
    return np.asarray(pcd.points, dtype=np.float32)



def is_point_error(P_sol, P_gt):
    '''
    Determines if the point is an outlier.

    Args:
        P_sol (3,) np array: Solution point.
        P_gt (3,) np array: Ground truth point.

    Returns:
        (bool): whether the point is an outlier.
    '''
    diff = P_sol - P_gt
    sq_dist = np.dot(diff, diff)      # squared Euclidean distance
    return sq_dist > once_config["result_analytics"]["max_eucl_dist"] ** 2



def get_seqs():
    '''
    Get all of the run / sequence ids.

    Returns:
        (arr): An array of all of the ids.
    '''
    base_path = once_config["output"]["base_path"]
    seq_id = once_config["runtime"]["seq_id"]
    seq_ids = []
    if seq_id is None:
        # Get all sequences
        for name in os.listdir(base_path):
            full_path = os.path.join(base_path, name)
            if os.path.isdir(full_path):
                seq_ids.append(name)
        
        main_logger.info(f"Running all run ids: {len(seq_ids)} found.\n\n\n\n")
    else:
        full_path = os.path.join(base_path, seq_id)
        if os.path.exists(full_path) and os.path.isdir(full_path):
            seq_ids.append(seq_id)
            main_logger.info(f"Running prespecified run id: {seq_id}")
        else:
            main_logger.error(f"Prespecified run id {seq_id} is not found, or not a run folder.")
            sys.exit(1)
    return seq_ids



def map_files_by_frame(files):
    '''
    Groups files by GPS time (aka frame).

    Args:
        files (arr[Path]): a list of candidate files.

    Returns:
        mapping (dict): a mapping dictionary of   frameid: {laneid: filepath}
    
    '''
    mapping = defaultdict(dict)
    for fp in files:
        stem = fp.stem  # expected <frameid>_<laneid>
        if "_" not in stem:
            mapping[stem][None] = str(fp)
            continue
        frameid, laneid_str = stem.rsplit('_', 1)
        try:
            laneid = int(laneid_str)
        except ValueError:
            laneid = laneid_str
        mapping[frameid][laneid] = str(fp)
    return mapping


In [None]:
base_path = once_config["output"]["base_path"]

ground_truth_base_folder = once_config["data"]["base_path"]
ground_truth_folder = once_config["data"]["lane_position_ground_truth_folder_path"]

out_path = Path(os.path.join(base_path, once_config["result_analytics"]["posfilter_eval_path"] + ".json"))
if out_path.exists() and not once_config["result_analytics"]["override_existing_output"]:
    with open(out_path, 'r') as f:
        print("File already exists.")
        sys.exit(1)

error_registry = {"lanes": set(), "points": set()}

seq_ids = get_seqs()

for seq_id in tqdm(seq_ids, desc="Processing all run ids"):
    gt_folder = Path(os.path.join(ground_truth_base_folder, seq_id, ground_truth_folder))
    solution_folder = Path(os.path.join(base_path, seq_id, once_config["output"]["thesis_solution_path"]))
    
    sol_files = sorted(solution_folder.glob("*.bin"))
    gt_files = sorted(gt_folder.glob("*.pcd"))

    sol_map = map_files_by_frame(sol_files)
    gt_map = map_files_by_frame(gt_files)
    frame_ids = sorted(set(sol_map.keys()) & set(gt_map.keys()))

    for frame_id in frame_ids:
        solution_files_for_frame = sol_map.get(frame_id, {})
        gt_files_for_frame = gt_map.get(frame_id, {})

        lane_ids = sorted(set(solution_files_for_frame.keys()) & set(gt_files_for_frame.keys()))
        for lane_id in lane_ids:
            solution_filepath = solution_files_for_frame[lane_id]
            gt_filepath = gt_files_for_frame[lane_id]

            lane_ids = sorted(set(solution_files_for_frame.keys()) & set(gt_files_for_frame.keys()))

            sol_pts = load_pointcloud_from_bin(solution_filepath)
            gt_pts  = load_pointcloud_from_pcd(gt_filepath)       

            if sol_pts is None or gt_pts is None:
                error_registry["lanes"].add((seq_id, frame_id, lane_id))
                continue

            sol_pts = np.asarray(sol_pts, dtype=float)
            gt_pts  = np.asarray(gt_pts, dtype=float)

            if sol_pts.shape != gt_pts.shape or sol_pts.ndim != 2 or sol_pts.shape[1] != 3:
                error_registry["lanes"].add((seq_id, frame_id, lane_id))
                continue

            N = sol_pts.shape[0]
            invalid_point_indices = [i for i in range(N) if is_point_error(sol_pts[i], gt_pts[i])]
            if len(invalid_point_indices) == N:
                # All points are invalid, mark the lane
                error_registry["lanes"].add((seq_id, frame_id, lane_id))
            else:
                # Only some points are invalid, mark individual points
                for i in invalid_point_indices:
                    error_registry["points"].add((seq_id, frame_id, lane_id, i))


# Save JSON of invalid
serializable_registry = {
    "lanes": list(error_registry["lanes"]),
    "points": list(error_registry["points"])
}
out_path.parent.mkdir(parents=True, exist_ok=True)
with open(out_path, 'w') as f:
    json.dump(serializable_registry, f, indent=4)