## Results Compiler
Created by Jett Penner<br>
December 2025 <br>


Compiles all of the quantitative results of the proposed and alternative solutions; comparing outputs to the ground truth data and against each other. Compares against identified outliers (from postfiltering). Generates various metric aggregates or data lists, including dataset-wide, run-wide, framewise, lanewise, and pointwise information. Used as a preprocessing optimization step before running quantitative analysis (enabling easy generation of tables and graphs by not re-calculating all of the results). Outputs all of the metrics to a summary json file.

Again, only needed for thesis analysis, not for real implementation. Run the files in order:
1. Run the `ONCE-3DLanes Data Loader` for data loading and preprocessing.
2. Run the `2D-to-3D Lanes Pipeline` for projection output (for proposed and alternate solutions).
3. Run the `2D-to-3D Lanes Outlier Postfilter` for preprocessing outlier detection.
4. Run this code to generate summary values of output.


In [None]:
import yaml
import os
import numpy as np
from pathlib import Path
import open3d as o3d
from tqdm import tqdm
from collections import defaultdict
from scipy.optimize import minimize_scalar
import json
from scipy.spatial import cKDTree
from scipy import stats
import sys


In [None]:
# Config loading and logger
with open("once" + "_config.yaml", "r") as f:
    once_config = yaml.safe_load(f)

from logger import setup_logger, start_timer, stop_timer, switch_log_file
main_logger = setup_logger("analytics_builder_log")
switch_log_file(main_logger, once_config["output"]["base_path"])
main_logger.info("Starting Program\n")

# Load invald lanes/points
invalid_pts_path = Path(os.path.join(once_config["output"]["base_path"], once_config["result_analytics"]["posfilter_eval_path"] + ".json"))
filter_invalid = False
if invalid_pts_path.exists():
    with open(invalid_pts_path, 'r') as f:
        invalid_pts_dict = json.load(f)
    invalid_lanes = set(tuple(x) for x in invalid_pts_dict.get("lanes", []))
    invalid_points = set(tuple(x) for x in invalid_pts_dict.get("points", []))
    filter_invalid = once_config["result_analytics"]["filter_invalid"]


In [None]:
def c_i(a, b, c, d=None, e=None):
    ''' 
    Multipurpose function. 

    Op Mode 1:
        Determins whether a given lane is valid.

        Args:
            a (int): lane id,
            b (int/string): frame id,
            c (int/string): sequence/run id.
            d: None
            e: None

        Returns:
            (bool), true if lane is invalid, false otherwise. 

    Op Mode 2:
        Determines whether point sets contain invalid points and filter them from both sets.

        Args:
            a (N,3) np array: Point set 1.
            b (N,3) np array: Point set 2.
            c (int): lane id,
            d (int/string): frame id,
            e (int/string): sequence/run id.
    
        Returns:
            a (M,3) np array, point set 1 with outliers removed,
            b (M,3) np array, point set 2 with outliers removed.
    '''
    if d is None:
        if not filter_invalid:
            return False
        else:
            return (c, b, a) in invalid_lanes
    else:
        if not filter_invalid:
            return a, b
        else:
            N = a.shape[0]
            # Precompute all invalid indices for this lane/frame/seq
            invalid_indices = {i for s, f, l, i in invalid_points if s == e and f == d and l == c}
            if not invalid_indices:
                return a, b  # nothing to remove
            
            # Create a boolean mask: True if index is valid
            mask = np.ones(N, dtype=bool)
            mask[list(invalid_indices)] = False
            
            return a[mask], b[mask]
    
    
    
def load_pointcloud_from_bin(file_path):
    '''
    Load a point cloud from a .bin file.

    Args:
        file_path (str): Path to the .bin file.

    Returns:
        (N,3) np array: Points in the file (XYZ), or None if invalid file.
    '''
    if not os.path.exists(file_path):
        main_logger.error(f"File not found: {file_path}")
        return None

    try:
        points = np.fromfile(file_path, dtype=np.float32)
        if points.size % 3 != 0:
            main_logger.error(f"Invalid point cloud size in file {file_path} ({points.size} floats).")
            return None
        return points.reshape(-1, 3)
    except Exception as e:
        main_logger.error(f"Failed to load point cloud from {file_path}: {e}")
        return None



def load_pointcloud_from_pcd(pcd_path):
    '''
    Load a point cloud from a .pcd file.

    Args:
        file_path (str): Path to the .pcd file.

    Returns:
        (N,3) np array: Points in the file (XYZ), or None if invalid file.
    '''
    pcd = o3d.io.read_point_cloud(str(pcd_path))
    
    if not pcd.has_points():
        main_logger.warning(f"No points found in PCD file: {pcd_path}")
        return np.empty((0, 3), dtype=np.float32)
    
    return np.asarray(pcd.points, dtype=np.float32)



def fit_line_on_plane(points, precalc_values, curvature_threshold=0.05):
    """
    Fit a 3D line (or quadratic curve if curvature is high) through points known to lie on a plane.

    Args:
        points (N,3) np array: 3D points (already on plane).
        precalc_values (dict): dict containing plane_x, plane_y, origin (see fit_line_on_plane_precalc()).
        curvature_threshold (float, optional): RMS threshold to switch to quadratic fitting. Default is 0.05.

    Returns:
        tuple: (
            model_type (str): the used fitting model, either "linear" or "quadratic",
            model_params (dict): {
                coeffs (tuple) of floats: 
                    Linear: (a,b) satisfying a·x + b = 0,
                    Quadratic: (a,b,c) satisfying a·x^2 + b·x + c = 0.
                z_intercept (3,) np array: 3D point on plane for the y-intercept.
                
                line_dir:
                    Linear: (3,) np array: Unit direction vector along the fitted line.
                    Quadratic: None
                quad_firstorder_dir:
                    Linear: None
                    Quadratic: (3,) np array: Unit direction vector for first-order change along the fitted line.
                quad_secondorder_dir:
                    Linear: None
                    Quadratic: (3,) np array: Unit direction vector for second-order change along the fitted line.
            }
        )
    """
    plane_x = precalc_values["plane_x"]
    plane_y = precalc_values["plane_y"]
    origin = precalc_values["origin"]

    # Convert points to 2D local coordinates on plane
    pts = np.asarray(points)
    local_pts = pts - origin
    x = local_pts @ plane_x
    y = local_pts @ plane_y

    # Fit linear model y = a*x + b
    A = np.vstack([x, np.ones_like(x)]).T
    a, b = np.linalg.lstsq(A, y, rcond=None)[0]
    y_pred = a * x + b
    linear_rms = np.sqrt(np.mean((y - y_pred) ** 2))

    # Quadratic fallback if large rms error
    if linear_rms > curvature_threshold:
        a2, b2, c2 = np.polyfit(x, y, 2)

        # Fit quadratic model y = a*(x^2) + b*x + c
        quad_origin = origin + c2 * plane_y
        quad_firstorder_dir = plane_x + b2 * plane_y
        quad_secondorder_dir = a2 * plane_y

        if np.dot(quad_firstorder_dir, np.array([0, 0, 1])) < 0:
            quad_firstorder_dir *= -1

        if np.dot(quad_secondorder_dir, np.array([0, 0, 1])) < 0:
            quad_secondorder_dir *= -1

        return "quadratic", {
            "coeffs": (a2, b2, c2),
            "z_intercept": quad_origin,
            "line_dir": None,
            "quad_firstorder_dir": quad_firstorder_dir,
            "quad_secondorder_dir": quad_secondorder_dir
        }

    # Return linear
    line_dir = plane_x + a * plane_y
    if np.dot(line_dir, np.array([0, 0, 1])) < 0:
        line_dir *= -1
    line_origin = origin + b * plane_y

    return "linear", {
        "coeffs": (a, b),
        "z_intercept": line_origin,
        "line_dir": line_dir / np.linalg.norm(line_dir),
        "quad_firstorder_dir": None,
        "quad_secondorder_dir": None
    }



def project_point_onto_curve(P, model_type, model_params):
    ''' 
    Project a set of points onto a function curve (closest distance).

    Args:
        P (N,3) np array: The points.
        model_type (str): The used fitting model, either "linear" or "quadratic".
        model_params (dict): Various linear and quadratic model parameters.

    Returns:
        (N,3) np array, the points projected on the curve.
    '''
    P = np.asarray(P)
    if model_type == "linear":
        t = np.dot(P - model_params['z_intercept'], model_params['line_dir'])
        closest = model_params['z_intercept'] + t * model_params['line_dir']
    else:  # quadratic
        a = model_params['quad_firstorder_dir']
        b = model_params['quad_secondorder_dir']
        p0 = model_params['z_intercept']

        def f(t):
            X = p0 + a * t + b * t**2
            return np.sum((X - P)**2)

        # initial guess from linear projection
        res = minimize_scalar(f, bounds=(-100, 100), method='bounded')
        closest = p0 + a * res.x + b * res.x**2

    return closest



def per_point_errors_and_curve_dist(P_sol, P_gt, curve_model_type, curve_model_params):
    '''
    Input:
        P_sol (3,) np array: solution point
        P_gt (3,) np array: ground truth point
        curve_model_type (str): "linear" or "quadratic"
        curve_model_params (dict): Various linear and quadratic model parameters.
    Returns:
        dict: {
            'point_dist': float,        # euclidean distance between P_sol and P_gt
            'curve_dist': float,        # euclidean distance from P_sol to the fitted curve (point-to-curve)
            'gt_to_curve_dist': float   # optional: distance from P_gt to curve (diagnostic)
        }
    Notes:
        Uses project_point_onto_curve(P, model_type, model_params) which should return the closest 3D point on the curve.
    '''
    point_dist = float(np.linalg.norm(P_sol - P_gt))

    # project solution point onto curve
    try:
        closest_on_curve_sol = project_point_onto_curve(P_sol, curve_model_type, curve_model_params)
        curve_dist = float(np.linalg.norm(P_sol - closest_on_curve_sol))
    except Exception as e:
        # fallback: large error if projection fails
        curve_dist = float(np.nan)

    # optional: distance of GT to curve (useful to measure GT fit quality)
    try:
        closest_on_curve_gt = project_point_onto_curve(P_gt, curve_model_type, curve_model_params)
        gt_to_curve_dist = float(np.linalg.norm(P_gt - closest_on_curve_gt))
    except Exception:
        gt_to_curve_dist = float(np.nan)

    return {'point_dist': point_dist, 'curve_dist': curve_dist, 'gt_to_curve_dist': gt_to_curve_dist}



def compute_chamfer_and_hausdorff(ptsA, ptsB):
    ''' 
    Computes and returns the chamfer and hausdorff distances between two point sets.

    Args:
        ptsA (N,3) np array: First point set.
        ptsB (M,3) np array: Second point set.

    Returns:
        tuple: (
            chamfer (float), chamfer distance,
            hausdorff (float), hausdorff distance.
        )
    '''
    if ptsA.size == 0 or ptsB.size == 0:
        return float(np.nan), float(np.nan)
    treeA = cKDTree(ptsA)
    treeB = cKDTree(ptsB)
    dA, _ = treeA.query(ptsB)
    dB, _ = treeB.query(ptsA)
    chamfer = float(np.mean(dA**2) + np.mean(dB**2))
    hausdorff = float(max(np.max(dA), np.max(dB)))
    return chamfer, hausdorff



def per_lane_metrics(solution_filepath, gt_filepath, lane_id, frame_id, seq_id):
    '''
    Load matching solution and ground truth files (corresponding to a sequence, frame, and lane). Calculate and return
    various per-lane metrics.

    Args:
       solution_filepath (str): path to solution file.
       gt_filepath (str): path to ground truth file.
       lane_id (int): lane id.
       frame_id (int/str): frame id.
       seq_id (int/str): sequence/run id.

    Returns:
        {
            'lane_id' (int): lane id,
            'file_id' (int/str): file id (aka frame id),
            'n_points' (int): number of valid points in lane,
            'model_type' (str): 'linear'or 'quadratic',
            'metrics_point' (dict of floats): {MAE, MSE, RMSE, chamfer, hausdorff},
            'metrics_curve' (dict of floats): {MAE, MSE, RMSE},
            'per_point' (dict of arrays of floats): { 'point_distances': [...], 'curve_distances': [...], 'gt_to_curve_distances': [...] }
        }
    '''
    sol_pts = load_pointcloud_from_bin(solution_filepath)   
    gt_pts  = load_pointcloud_from_pcd(gt_filepath)        

    file_id = Path(solution_filepath).stem

    out = {
        'file_id': file_id,
        'lane_id': None,
        'n_points': 0,
        'model_type': None,
        'metrics_point': {},
        'metrics_curve': {},
        'per_point': {}
    }

    if sol_pts is None or gt_pts is None:
        out['error'] = "missing_data"
        return out

    sol_pts = np.asarray(sol_pts, dtype=float)
    gt_pts  = np.asarray(gt_pts, dtype=float)

    if sol_pts.shape != gt_pts.shape or sol_pts.ndim != 2 or sol_pts.shape[1] != 3:
        out['error'] = f"shape_mismatch {sol_pts.shape} vs {gt_pts.shape}"
        return out

    # Fit parametric line/curve on GT lane
    try:
        model_type, model_params = fit_line_on_plane(gt_pts, once_config["projection_mapping"]["snap_curvature_threshold"])
    except Exception as e:
        model_type, model_params = "unknown", None

    sol_pts, gt_pts = c_i(sol_pts, gt_pts, lane_id, frame_id, seq_id)
    
    N = sol_pts.shape[0]

    out['n_points'] = int(N)
    out['model_type'] = model_type

    # per-point distances
    point_dists = np.linalg.norm(sol_pts - gt_pts, axis=1)  # Euclidean distance
    curve_dists = np.empty(N, dtype=float)
    gt_curve_dists = np.empty(N, dtype=float)
    for i in range(N):
        res = per_point_errors_and_curve_dist(sol_pts[i], gt_pts[i], model_type, model_params)
        curve_dists[i] = res['curve_dist']
        gt_curve_dists[i] = res['gt_to_curve_dist']

    # Basic point metrics
    mse_points = float(np.mean(np.sum((sol_pts - gt_pts)**2, axis=1)))
    mae_points = float(np.mean(point_dists))
    rmse_points = float(np.sqrt(np.mean(np.sum((sol_pts - gt_pts)**2, axis=1))))

    chamfer_pts, hausdorff_pts = compute_chamfer_and_hausdorff(sol_pts, gt_pts)

    out['metrics_point'] = {
        'MAE_point': mae_points,
        'MSE_point': mse_points,
        'RMSE_point': rmse_points,
        'Chamfer': chamfer_pts,
        'Hausdorff': hausdorff_pts
    }

    # Curve / structure metrics (per-point to curve)
    # Some curve_dists can be nan if projection failed; ignore those in stats but keep raw values
    valid_curve_mask = ~np.isnan(curve_dists)
    if np.any(valid_curve_mask):
        mae_curve = float(np.nanmean(curve_dists))
        mse_curve = float(np.nanmean(curve_dists**2))
        rmse_curve = float(np.sqrt(np.nanmean(curve_dists**2)))
    else:
        mae_curve = mse_curve = rmse_curve = float(np.nan)

    out['metrics_curve'] = {
        'MAE_curve': mae_curve,
        'MSE_curve': mse_curve,
        'RMSE_curve': rmse_curve
    }

    # Save per-point raw arrays (convert to lists for JSON compatibility)
    out['per_point'] = {
        'point_distances': point_dists.tolist(),
        'curve_distances': [None if np.isnan(x) else float(x) for x in curve_dists.tolist()],
        'gt_to_curve_distances': [None if np.isnan(x) else float(x) for x in gt_curve_dists.tolist()]
    }

    # parse lane id if available in filename: expecting "<frame_id>_<lane_id>"
    try:
        stem = Path(solution_filepath).stem
        lane_id = int(stem.rsplit('_', 1)[1])
        out['lane_id'] = lane_id
    except Exception:
        out['lane_id'] = None

    return out



def per_frame_aggregate(frame_id, solution_files_for_frame, gt_files_for_frame, seq_id):
    '''
    Iterate through a frame, load lane data as per_lane_metrics(), combine and return frame aggregates.

    Args:
       frame_id (int/str): frame id.
       solution_files_for_frame (dict): { lane_id (int) : solution_filepath }
       gt_files_for_frame (dict): { lane_id (int) : gt_filepath }
       seq_id (int/str): sequence/run id.

    Returns:
        {
            'frame_id' (str): frame id,
            'n_points' (int): number of valid points in frame,
            'n_lanes' (int): number of valid lanes in frame,
            'n_linear' (int): number of lanes with a linear model in frame,
            'n_quadratic' (int): number of lanes with a quadratic model in frame,
            'lanes' (dict): { lane_id : lane_dict }, where lane_dict is the result of per_lane_metrics(),
            'frame_metrics' (dict of floats): { '<metric>_mean', '<metric>_median' }, median and mean values for:
                MAE_point, MSE_point, RMSE_point, Chamfer, Hausdorff, MAE_curve, MSE_curve, RMSE_curve
        }
    '''

    lane_results = {}
    n_points = 0
    n_linear = 0
    n_quadratic = 0
    all_metrics = {
        "MAE_point": [],
        "MSE_point": [],
        "RMSE_point": [],
        "Chamfer": [],
        "Hausdorff": [],
        "MAE_curve": [],
        "MSE_curve": [],
        "RMSE_curve": []
    }

    # iterate over the intersection of lane ids present in both
    lane_ids = sorted(set(solution_files_for_frame.keys()) & set(gt_files_for_frame.keys()))
    n_lanes = 0
    for lane_id in lane_ids:
        if c_i(lane_id, frame_id, seq_id):
            continue
        sol_fp = solution_files_for_frame[lane_id]
        gt_fp = gt_files_for_frame[lane_id]
        lane_res = per_lane_metrics(sol_fp, gt_fp, lane_id, frame_id, seq_id)
        
        lane_results[str(lane_id)] = lane_res
        n_points += lane_res.get('n_points', 0)

        mt = lane_res.get('model_type')
        if mt == "linear":
            n_linear += 1
        elif mt == "quadratic":
            n_quadratic += 1

        # get all metrics
        for key in ["MAE_point","MSE_point","RMSE_point","Chamfer","Hausdorff"]:
            val = lane_res.get("metrics_point", {}).get(key)
            if val is not None and not np.isnan(val):
                all_metrics[key].append(float(val))

        for key in ["MAE_curve","MSE_curve","RMSE_curve"]:
            val = lane_res.get("metrics_curve", {}).get(key)
            if val is not None and not np.isnan(val):
                all_metrics[key].append(float(val))
        n_lanes += 1

    # compute summary statistics for all metrics
    summary_stats = {}
    for metric_name, values in all_metrics.items():
        values = np.asarray(values, dtype=float)

        if values.size > 0:
            summary_stats[f"{metric_name}_mean"] = float(np.mean(values))
            summary_stats[f"{metric_name}_median"] = float(np.median(values))
        else:
            summary_stats[f"{metric_name}_mean"] = float(np.nan)
            summary_stats[f"{metric_name}_median"] = float(np.nan)

    frame_summary = {
        "frame_id": frame_id,
        "n_points": n_points,
        "n_lanes": n_lanes,
        "n_linear": n_linear,
        "n_quadratic": n_quadratic,
        "lanes": lane_results,
        "frame_metrics": summary_stats
    }

    return frame_summary



def map_files_by_frame(files):
    '''
    Map files by stem (thus frame id and lane id).

    Args:
       files (arr[str]): input files

    Returns:
        mapping (dict): {'frame_id': { 'lane_id': filepath (str) } }
    '''
    mapping = defaultdict(dict)  # frame_id -> { lane_id: filepath }
    for fp in files:
        stem = fp.stem  # expected "<frame_id>_<lane_id>"
        if "_" not in stem:
            mapping[stem][None] = str(fp)
            continue
        frame_id, lane_id_str = stem.rsplit('_', 1)
        try:
            lane_id = int(lane_id_str)
        except ValueError:
            lane_id = lane_id_str
        mapping[frame_id][lane_id] = str(fp)
    return mapping



def summarize_numeric_list(arr):
    '''
    Ccalculates a variety of metrics from an input array.

    Args:
        arr (arr[int/float]): array of data.

    Returns
        {
            'n' (int): number of items,
            'mean' (float): mean value of the data,
            'median' (float): median value of the data,
            'std' (float): standard deviation of the data,
            'min' (float): min value of the data,
            'max' (float): max value of the data,
            'skew' (float): skew of the data,
            'kurtosis' (float): kurtosis of the data,
            'p<#>' (float): #th percentile of the data (by global config percentiles)
            'mean_bootstrap_95ci' [(float), (float)]: mean 95 confidence interval bootstrap of the data (if global config file)
        }

    Note: calculating 95ci greatly increases runtime.
    '''
    arr = np.asarray(arr, dtype=float)
    arr = arr[~np.isnan(arr)]
    summary = {}
    if arr.size == 0:
        return {'n': 0}
    summary['n'] = int(arr.size)
    summary['mean'] = float(np.mean(arr))
    summary['median'] = float(np.median(arr))
    summary['std'] = float(np.std(arr, ddof=0))
    summary['min'] = float(np.min(arr))
    summary['max'] = float(np.max(arr))
    summary['skew'] = float(stats.skew(arr))
    summary['kurtosis'] = float(stats.kurtosis(arr, fisher=True))
    for p in once_config["result_analytics"]["percentiles"]:
        summary[f'p{p}'] = float(np.percentile(arr, p))
    if once_config["result_analytics"]["bootstrap_ci"]:
        boots = []
        rng = np.random.default_rng(0)
        for _ in range(once_config["result_analytics"]["bootstrap_n"]):
            sample = rng.choice(arr, size=arr.size, replace=True)
            boots.append(np.mean(sample))
        summary['mean_bootstrap_95ci'] = [float(np.percentile(boots, 2.5)), float(np.percentile(boots, 97.5))]
    return summary



def process_run_and_save(solution_folder, gt_folder, output_json_path, solution_name, seq_id, overwrite=False, verbose=True):
    '''
    Process an individual run / sequence. Group items by frame and compute per-lane metrics as per_lane_metrics(), per-frame metrics as
    per_frame_metrics(), and per-run metrics. Saves all run aggregates to a JSON file.

    Args:
        solution folder (str): File path to the generated solution output.
        gt_folder (str): Flie path to the 3D ground truth folder.
        output_json_path (str): File path to save the JSON results data.
        solution_name (str): Method name for debug.
        seq_id (int/str): run / sequence id.
        overwrite (bool): If true, overwrites existing analytics data.
        verbose (bool): If true, prints descriptive loading metrics.

    Output:
        Generates a JSON file of the analytics data, as described by the return.

    Returns
        {
        'run_info' (dict): {
            'solution_folder' (str): filepath of solution folder
            'gt_folder' (str): filepath of ground truth folder
            'n_frames' (int): number of frames in run/sequence,
            'n_lanes': (int): number of lanes in run/sequence,
            'n_points' (int): number of points in run/sequence
        },
        'frames' (dict): { frame_id : frame_dict }, where frame_dict is the result of per_frame_aggregate(),
        'frame_aggregates' (dict): aggregates of all frames dictionaries,
        'lane_aggregates' (dict): aggregates of all lane dictionaries,
        'point_aggregates' (dict): aggregates of all point dictionaries.
        
        }
    '''
    solution_folder = Path(solution_folder)
    gt_folder = Path(gt_folder)
    out_path = Path(output_json_path)

    if out_path.exists() and not overwrite:
        if verbose:
            main_logger.info(f"Found existing output at {out_path}, loading and returning it.")
        with open(out_path, 'r') as f:
            return json.load(f)

    # Determine frames to process
    sol_files = sorted(solution_folder.glob("*.bin"))
    gt_files = sorted(gt_folder.glob("*.pcd"))

    sol_map = map_files_by_frame(sol_files)
    gt_map = map_files_by_frame(gt_files)

    frame_ids = sorted(set(sol_map.keys()) & set(gt_map.keys()))
    if verbose:
        iterator = tqdm(frame_ids, desc=f"Processing output{' for ' + solution_name if solution_name else ''}{' seq ' + str(seq_id) if seq_id else ''}")
    else:
        iterator = frame_ids

    # Dynamically define keys (from other function return keys)
    run_results = {
        'run_info': {
            'solution_folder': str(solution_folder),
            'gt_folder': str(gt_folder),
            'n_frames': len(frame_ids),
            'n_lanes': 0,
            'n_points': 0
        },
        'frames': {},
        'frame_aggregates': {},
        'lane_aggregates': {},
        'point_aggregates': {}
    }

    lane_metric_keys = ["MAE_point", "MSE_point", "RMSE_point", "Chamfer", "Hausdorff",
                        "MAE_curve", "MSE_curve", "RMSE_curve"]
    lane_metric_lists = {k: [] for k in lane_metric_keys}
    per_lane_point_counts = []
    lane_type_counts = defaultdict(int)

    point_metric_keys = ["point_distances", "curve_distances"]
    point_metric_lists = {k: [] for k in point_metric_keys}

    frame_metric_keys = []  # dynamic dictionary for per-frame metrics, will auto-populate from first frame
    frame_metric_lists = defaultdict(list)

    for frame_id in iterator:
        sol_files_for_frame = sol_map.get(frame_id, {})
        gt_files_for_frame = gt_map.get(frame_id, {})

        frame_res = per_frame_aggregate(frame_id, sol_files_for_frame, gt_files_for_frame, seq_id)
        run_results['run_info']['n_lanes'] += frame_res['n_lanes']
        run_results['run_info']['n_points'] += frame_res['n_points']
        run_results['frames'][frame_id] = frame_res

        # Per-lane metrics
        for _, lane_res in frame_res['lanes'].items():
            if lane_res.get('error') is not None:
                continue

            mp = lane_res.get('metrics_point', {})
            mc = lane_res.get('metrics_curve', {})
            npts = lane_res.get('n_points', 0)

            for k in ["MAE_point", "MSE_point", "RMSE_point", "Chamfer", "Hausdorff"]:
                val = mp.get(k)
                if val is not None and np.isfinite(val):
                    lane_metric_lists[k].append(float(val))

            for k in ["MAE_curve", "MSE_curve", "RMSE_curve"]:
                val = mc.get(k)
                if val is not None and np.isfinite(val):
                    lane_metric_lists[k].append(float(val))

            per_lane_point_counts.append(int(npts))
            lane_type_counts[lane_res.get('model_type', 'unknown')] += 1

            per_point_data = lane_res.get('per_point', {})
            for k in point_metric_keys:
                if k in per_point_data:
                    point_metric_lists[k].extend([float(v) for v in per_point_data[k] if np.isfinite(v)])

        # Per-frame metrics
        if not frame_metric_keys and 'frame_metrics' in frame_res:
            frame_metric_keys = list(frame_res['frame_metrics'].keys())

        for k in frame_metric_keys:
            val = frame_res['frame_metrics'].get(k)
            if val is not None and np.isfinite(val):
                frame_metric_lists[k].append(float(val))

    # Dynamic aggregates
    run_results['frame_aggregates'] = {k: summarize_numeric_list(v, k) for k, v in frame_metric_lists.items()}

    run_results['lane_aggregates'] = {k: summarize_numeric_list(v, k) for k, v in lane_metric_lists.items()}
    run_results['lane_aggregates']['per_lane_point_counts'] = summarize_numeric_list(per_lane_point_counts)
    run_results['lane_aggregates']['lane_type_counts'] = dict(lane_type_counts)

    run_results['point_aggregates'] = {k: summarize_numeric_list(v, k) for k, v in point_metric_lists.items()}

    # Save JSON
    out_path.parent.mkdir(parents=True, exist_ok=True)
    with open(out_path, 'w') as f:
        json.dump(run_results, f, indent=2)

    return run_results


In [None]:
def get_seqs():
    '''
    Get all of the run / sequence ids.

    Returns:
        (arr): An array of all of the ids.
    '''
    base_path = once_config["output"]["base_path"]
    seq_id = once_config["runtime"]["seq_id"]
    seq_ids = []
    if seq_id is None:
        # Get all sequences
        for name in os.listdir(base_path):
            full_path = os.path.join(base_path, name)
            if os.path.isdir(full_path):
                seq_ids.append(name)
        
        main_logger.info(f"Running all run ids: {len(seq_ids)} found.\n\n\n\n")
    else:
        full_path = os.path.join(base_path, seq_id)
        if os.path.exists(full_path) and os.path.isdir(full_path):
            seq_ids.append(seq_id)
            main_logger.info(f"Running prespecified run id: {seq_id}")
        else:
            main_logger.error(f"Prespecified run id {seq_id} is not found, or not a run folder.")
            sys.exit(1)
    return seq_ids



def main_analysis_per_seq(seq_id):
    '''
    Run all analysis for a given sequence for all solutions, calculating and saving per-sequence per-solution metrics.

    Args:
        seq_id (int/str): sequence/run id.

    Output:
        Writes per-solution JSON files as process_runs_and_save.
    '''

    base_path = once_config["output"]["base_path"]

    ground_truth_base_folder = once_config["data"]["base_path"]
    ground_truth_folder = once_config["data"]["lane_position_ground_truth_folder_path"]
    grounds_truth_folderpath = Path(os.path.join(ground_truth_base_folder, seq_id, ground_truth_folder))

    evaluation_folderpath = Path(os.path.join(base_path, seq_id, "evaluation"))
    evaluation_extension = once_config["result_analytics"]["evaluation_extension"]

    if once_config["output"]["calc_cl_intrinsic"]:
        cl_intr_solution_folderpath = Path(os.path.join(base_path, seq_id, once_config["output"]["cl_intrinsic_path"]))
        cl_intr_eval_path = Path(os.path.join(evaluation_folderpath, once_config["output"]["cl_intrinsic_path"] + evaluation_extension))
        results = process_run_and_save(
            cl_intr_solution_folderpath, 
            grounds_truth_folderpath, 
            cl_intr_eval_path, 
            solution_name="Cl Intrinsic",
            seq_id=seq_id,
            overwrite=once_config["result_analytics"]["override_existing_output"]
        )

    if once_config["output"]["calc_cl_intrinsic_ground"]:
        cl_intr_base_solution_folderpath = Path(os.path.join(base_path, seq_id, once_config["output"]["cl_intrinsic_ground_path"]))
        cl_intr_base_eval_path = Path(os.path.join(evaluation_folderpath, once_config["output"]["cl_intrinsic_ground_path"] + evaluation_extension))
        results = process_run_and_save(
            cl_intr_base_solution_folderpath, 
            grounds_truth_folderpath, 
            cl_intr_base_eval_path, 
            solution_name="Cl Intrinsic Ground",
            seq_id=seq_id,
            overwrite=once_config["result_analytics"]["override_existing_output"]
        )

    if once_config["output"]["calc_cl_depth"]:
        cl_depth_solution_folderpath = Path(os.path.join(base_path, seq_id, once_config["output"]["cl_depth_path"]))
        cl_depth_eval_path = Path(os.path.join(evaluation_folderpath, once_config["output"]["cl_depth_path"] + evaluation_extension))
        results = process_run_and_save(
            cl_depth_solution_folderpath, 
            grounds_truth_folderpath, 
            cl_depth_eval_path, 
            solution_name="Cl Depth",
            seq_id=seq_id,
            overwrite=once_config["result_analytics"]["override_existing_output"]
        )

    if once_config["output"]["calc_cl_intrinsic_depth"]:
        cl_intr_depth_solution_folderpath = Path(os.path.join(base_path, seq_id, once_config["output"]["cl_intrinsic_depth_path"]))
        cl_intr_depth_eval_path = Path(os.path.join(evaluation_folderpath, once_config["output"]["cl_intrinsic_depth_path"] + evaluation_extension))
        results = process_run_and_save(
            cl_intr_depth_solution_folderpath, 
            grounds_truth_folderpath, 
            cl_intr_depth_eval_path, 
            solution_name="Cl Intrinsic + Depth",
            seq_id=seq_id,
            overwrite=once_config["result_analytics"]["override_existing_output"]
        )

    if once_config["output"]["calc_thesis_solution"]:
        thesis_solution_folderpath = Path(os.path.join(base_path, seq_id, once_config["output"]["thesis_solution_path"]))
        thesis_eval_path = Path(os.path.join(evaluation_folderpath, once_config["output"]["thesis_solution_path"] + evaluation_extension))
        results = process_run_and_save(
            thesis_solution_folderpath, 
            grounds_truth_folderpath, 
            thesis_eval_path, 
            solution_name="Thesis Solution",
            seq_id=seq_id,
            overwrite=once_config["result_analytics"]["override_existing_output"]
        )


In [None]:
seq_ids = get_seqs()

# Run all sequences
for seq_id in seq_ids:
    main_logger.info(f"Running seq_id: {seq_id}")
    start_timer(main_logger)
    main_analysis_per_seq(seq_id)
    stop_timer(main_logger, f"Seq {seq_id}", calc_lane_time=False)
