## Thesis Analysis
Created by Jett Penner<br>
December 2025 <br>


Performs quantitative (and some qualitative) analysis of the data, generating some graphs and tables automatically where efficient for use in my thesis paper. This code lacks coherency or organization and may require non-sequential code block running, as this code is purely intended for personal output generation, and not for future replication (but may act as a guide on the analyses performed). Thus, comments are inconsistent, and certain graphing modules may be AI-generated.

Again, only needed for thesis analysis, not for real implementation. Run the files in order:
1. Run the `ONCE-3DLanes Data Loader` for data loading and preprocessing.
2. Run the `2D-to-3D Lanes Pipeline` for projection output (for proposed and alternate solutions).
3. Run the `2D-to-3D Lanes Outlier Postfilter` for preprocessing outlier detection.
4. Run the `Results Compiler` to generate aggregate and summary statistics for easier (and faster) analyses generation.
5. Run this code (sometimes non-sequentially) to generate summary values of output.


In [None]:
import os
import yaml
import json
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
import matplotlib
from scipy import stats
import shutil
import re
import sys
from pathlib import Path
import matplotlib.image as mpimg
import cv2


In [None]:
def make_solution_label_mapper(all_solutions, solution_labels):
    """
    Returns a function f(solution) -> corresponding label.
    Solutions are matched by identity/equality against the all_solutions list.
    """
    if len(all_solutions) != len(solution_labels):
        raise ValueError("all_solutions and solution_labels must be the same length")
    
    # Build lookup table
    lookup = {sol: label for sol, label in zip(all_solutions, solution_labels)}

    def mapper(solution):
        try:
            return lookup[solution]
        except KeyError:
            raise KeyError(f"Solution {solution!r} not found in all_solutions")

    return mapper


def make_is_important_mapper(all_solutions, alternative_solutions):
    """
    Returns a function is_important(solution) -> True/False.
    True if the solution is not in alternative_solutions (i.e., important),
    False if it exists in alternative_solutions.
    """
    # Build lookup table
    lookup = {sol: (sol not in alternative_solutions) for sol in all_solutions}

    def is_important(solution):
        try:
            return lookup[solution]
        except KeyError:
            raise KeyError(f"Solution {solution!r} not found in all_solutions")

    return is_important

In [None]:
with open("once" + "_config.yaml", "r") as f:
    once_config = yaml.safe_load(f)

base_path = once_config["output"]["base_path"]
seq_ids = []
# Get all sequences
for name in os.listdir(base_path):
    full_path = os.path.join(base_path, name)
    if os.path.isdir(full_path):
        seq_ids.append(name)
results_eval_folder = once_config["result_analytics"]["results_eval_folder"]

thesis_solution = once_config["output"]["thesis_solution_path"]
alternative_solutions = []
if once_config["output"]["calc_cl_intrinsic"]:
    alternative_solutions.append(once_config["output"]["cl_intrinsic_path"])
if once_config["output"]["calc_cl_intrinsic_ground"]:
    alternative_solutions.append(once_config["output"]["cl_intrinsic_ground_path"])
if once_config["output"]["calc_cl_depth"]:
    alternative_solutions.append(once_config["output"]["cl_depth_path"])
if once_config["output"]["calc_cl_intrinsic_depth"]:
    alternative_solutions.append(once_config["output"]["cl_intrinsic_depth_path"])

solution_labels = []

all_solutions = []
if once_config["output"]["calc_thesis_solution"]:
    all_solutions.append(thesis_solution)
    solution_labels.append("Proposed")
for idx, f in enumerate(alternative_solutions):
    all_solutions.append(f)
    solution_labels.append(chr(ord('A') + idx))

solution_labels = ["Proposed", "Pinhole Projection", "Ground Pinhole Projection", "LiDAR Interpolation", "Intrinsic + LiDAR"]
    

map_solution = make_solution_label_mapper(all_solutions, solution_labels)
is_important = make_is_important_mapper(all_solutions, alternative_solutions)

def file_map_solution(s):
    s = s.strip()
    s = re.sub(r'[^A-Za-z]', '_', s)
    s = re.sub(r'_+', '_', s)
    return map_solution(s)

evaluation_extension = once_config["result_analytics"]["evaluation_extension"]

runtime_data_path = os.path.join(once_config["output"]["base_path"], once_config["output"]["runtime_data_path"])

In [None]:
runtime_dict = {}

for seq_id in seq_ids:
    runtime_dict[seq_id] = {solution: {} for solution in all_solutions}

In [None]:
# left empty for ease of non-sequential running

## Runtime

In [None]:
def populate_runtime(runtime_dict, runtime_data_path, seq_id, performance_path):
    # Load runtime JSON data
    with open(runtime_data_path, "r") as f:
        runtime_data = json.load(f)

    # Load performance JSON data to extract counts
    with open(performance_path, "r") as f:
        performance_data = json.load(f)
    run_info = performance_data.get("run_info", {})
    n_frames = run_info.get("n_frames", 1)
    n_lanes = run_info.get("n_lanes", 1)
    n_points = run_info.get("n_points", 1)

    runtime_dict[seq_id]["count_data"] = {
        "n_frames": n_frames,
        "n_lanes": n_lanes,
        "n_points": n_points
    }

    # Iterate through each runtime entry
    for entry in runtime_data:
        s_id = entry.get("seq_id")
        if s_id == seq_id:
            runtimes = entry.get("runtimes", {})

            for solution_name, runtime_value in runtimes.items():
                if solution_name in runtime_dict[seq_id]:
                    if runtime_value == None:
                        runtime_value = 0
                    runtime_dict[seq_id][solution_name]["runtime_data"] = {
                        "runtime": runtime_value,
                        "runtime_frame": runtime_value / n_frames,
                        "runtime_lanes": runtime_value / n_lanes,
                        "runtime_points": runtime_value / n_points
                    }

                    
for seq_id in tqdm(seq_ids, desc="Processing runtimes"):
    performance_path = os.path.join(base_path, seq_id, results_eval_folder, (all_solutions[0] + evaluation_extension))
    populate_runtime(runtime_dict, runtime_data_path, seq_id, performance_path)


In [None]:
def generate_runtime_figures(runtime_dict, output_filename, point_divisor=1000):
    details_rows = []
    summary_rows = []

    # Collect data
    for seq_id, seq_data in runtime_dict.items():
        summary_row = {"Run id": seq_id}
        for solution_name, sol_data in seq_data.items():
            rt_data = sol_data.get("runtime_data", {})
            if not rt_data:
                continue

            details_rows.append({
                "Run id": seq_id,
                "Solution": solution_name,
                "Runtime (s)": rt_data["runtime"],
                "Runtime / Frame (s)": round(rt_data["runtime_frame"], 4),
                "Runtime / Lane (s)": round(rt_data["runtime_lanes"], 4),
                f"Runtime / {point_divisor} Points (s)": round(rt_data["runtime_points"] * point_divisor, 4)
            })

            summary_row[solution_name] = round(rt_data["runtime_frame"], 4)

        summary_rows.append(summary_row)

    # Convert to DataFrames
    details_df = pd.DataFrame(details_rows)
    summary_df = pd.DataFrame(summary_rows).fillna("")

    # Ensure output directory exists
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)

    # Output Excel file
    excel_path = f"{output_filename}.xlsx"
    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        details_df.to_excel(writer, index=False, sheet_name="details")
        summary_df.to_excel(writer, index=False, sheet_name="summary")

    print(f"Runtime table written to {os.path.abspath(excel_path)}")

    # Prepare data
    summary_df_sorted = summary_df.sort_values("Run id")
    run_ids = summary_df_sorted["Run id"].astype(str).str[-3:]  # last 3 chars

    plt.figure(figsize=(10, 6))

    # Get list of solutions excluding Run id
    solution_names = [col for col in summary_df.columns if col != "Run id"]

    # Identify the important solution
    important_solution = next((s for s in solution_names if is_important(s)), None)

    # Generate distinct colors for non-important solutions
    non_important_solutions = [s for s in solution_names if s != important_solution]
    n_colors = len(non_important_solutions)
    cmap = matplotlib.colormaps.get_cmap("BuGn")
    colors_array = [cmap(i) for i in np.linspace(0.3, 1.0, n_colors)]
    colors = {s: colors_array[i] for i, s in enumerate(non_important_solutions)}

    # Assign red to the important solution
    if important_solution is not None:
        colors[important_solution] = (1.0, 0.0, 0.0, 1.0)  # red RGBA

    for solution_name in solution_names:
        y_vals = summary_df_sorted[solution_name].replace("", np.nan).astype(float)

        # Skip solution if entire column is NaN
        if np.all(np.isnan(y_vals)):
            continue

        # Determine alpha and linewidth based on importance
        if solution_name == important_solution:
            alpha = 1.0
            linewidth = 2.5
        else:
            alpha = 0.6  # slightly subdued
            linewidth = 1.5

        plt.plot(
            run_ids,
            y_vals,
            marker="o",
            label=map_solution(solution_name),
            color=colors[solution_name],
            alpha=alpha,
            linewidth=linewidth
        )

    plt.xlabel("Run ID")
    plt.ylabel("Runtime per Frame (s)")
    plt.title("Runtime per Frame Across Sequences")
    plt.grid(True, linestyle="--", alpha=0.5)
    plt.legend()

    # Save plot
    fig_path = f"{output_filename}.png"
    plt.tight_layout()
    plt.savefig(fig_path, dpi=300)
    plt.close()
    
    print(f"Runtime plot written to {os.path.abspath(fig_path)}")


generate_runtime_figures(
    runtime_dict, 
    os.path.join(once_config["result_analytics"]["figure_output_base_path"], "runtime"),
    point_divisor=once_config["result_analytics"]["point_divisor"]
)

## Counts of items

In [None]:
def get_raw_frame_counts():
    once_loader_base = once_config["data"]["once_config_path"]
    with open(os.path.join(once_loader_base,"config.yaml"), "r") as f:
        once_loader_config = yaml.safe_load(f)
    splits = ["train", "val", "test"]

    total_seq_frames = []
    for seq_id in seq_ids:
        for split in splits:
            potential_folder = os.path.join(
                once_loader_base, 
                once_loader_config["once_data"]["cam_data"],
                split,
                "data",
                seq_id
            )

            if os.path.exists(potential_folder) and os.path.isdir(potential_folder):
                total_frames_folder = os.path.join(potential_folder, "cam01")
                count = 0
                for fname in os.listdir(total_frames_folder):
                    if fname.lower().endswith(".jpg"):
                        name_only = os.path.splitext(fname)[0]
                        if name_only.isdigit():   # all characters must be digits
                            count += 1 
                total_seq_frames.append(count)
    return total_seq_frames


def generate_n_items_table(runtime_dict, output_filename, total_seq_frames):
    details_rows = []

    for idx, (seq_id, seq_data) in enumerate(runtime_dict.items()):        
        cd = seq_data["count_data"]
    
        n_frames = cd["n_frames"]
        n_lanes = cd["n_lanes"]
        n_points = cd["n_points"]

        avg_lanes_per_valid_frame = n_lanes / n_frames if n_frames > 0 else None
        avg_points_per_lane = n_points / n_lanes if n_lanes > 0 else None

        # details rows (per-seq full record)
        details_rows.append({
            "Run ID": seq_id,
            "Total Frames": total_seq_frames[idx],
            "Valid Frames": n_frames,
            "Total Lanes": n_lanes,
            "Avg Lanes per Valid Frame": avg_lanes_per_valid_frame,
            "Avg Points per Lane": avg_points_per_lane,
            "Total Points": n_points
        })

    # Convert to DataFrames
    details_df = pd.DataFrame(details_rows)

    # Make output directory
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)

    # Write Excel file
    excel_path = f"{output_filename}.xlsx"
    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        details_df.to_excel(writer, index=False, sheet_name="counts")

    print(f"Counts table written to {os.path.abspath(excel_path)}")


total_seq_frames = get_raw_frame_counts()
generate_n_items_table(runtime_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "num_items"), total_seq_frames)

## Generating Per-Solution Metrics

In [None]:
def solution_detail_analytics(solution, percentiles=[95], max_thresh=None, calc_lane_details=True, seq_grouping_subdict=None):
    if max_thresh is None:
        max_thresh = np.inf

    if seq_grouping_subdict is not None:
        group_set = seq_grouping_subdict["group_set"]
    else:
        group_set = ["x"]

    def summarize_array(arr, percentiles=[90, 95], calculate_pointwise_metrics=True):
        arr = np.asarray(arr, dtype=float)
        arr = arr[np.isfinite(arr)]

        if arr.size == 0:
            return {'n': 0}
        summary = {
            'n': int(arr.size),
            'mean': float(np.mean(arr)),
            'median': float(np.median(arr)),
            'std': float(np.std(arr, ddof=0)),
            'min': float(np.min(arr)),
            'max': float(np.max(arr)),
            'skew': float(stats.skew(arr)),
            'kurtosis': float(stats.kurtosis(arr, fisher=True)),
        }

        for p in percentiles:
            summary[f'p{p}'] = float(np.percentile(arr, p))

        if calculate_pointwise_metrics:
            mse = float(np.mean(arr**2))
            summary['MAE'] = float(np.mean(np.abs(arr)))
            summary['MSE'] = mse
            summary['RMSE'] = float(np.sqrt(mse))

        return summary

    all_results = {m: {} for m in group_set}
    for set_item in group_set:
        all_point_distances = []
        all_curve_distances = []
        all_lane_point_metrics = {
            "MAE": [],
            "MSE": [],
            "RMSE": [],
            "Chamfer": [],
            "Hausdorff": []
        }
        all_lane_curve_metrics = {
            "MAE": [],
            "MSE": [],
            "RMSE": []
        }
        set_item_count = 0
        
        for seq_id in tqdm(seq_ids, desc=(f"Per-seq processing for {solution}") + (f" for {set_item}" if seq_grouping_subdict is not None else "")):
            if seq_grouping_subdict is not None:
                if not seq_grouping_subdict["group_dict"][seq_id] == set_item:
                    continue
            set_item_count += 1
            solution_path = os.path.join(base_path, seq_id, results_eval_folder, solution + evaluation_extension)
            with open(solution_path) as f:
                solution_data = json.load(f)
            
            frames = solution_data.get("frames", {})

            for frame_data in frames.values():
                lanes = frame_data.get("lanes", {})

                for lane_data in lanes.values():
                    pp = lane_data.get("per_point", {})

                    # point distances
                    if "point_distances" in pp:
                        vals = [float(v) for v in pp["point_distances"] if (np.isfinite(v) and v <= max_thresh)]
                        all_point_distances.extend(vals)

                    # curve distances
                    if "curve_distances" in pp:
                        vals = [float(v) for v in pp["curve_distances"] if (np.isfinite(v) and v <= max_thresh)]
                        all_curve_distances.extend(vals)

                    mp = lane_data.get("metrics_point", {})
                    for key in all_lane_point_metrics:
                        if key in mp:
                            all_lane_point_metrics[key].append(mp[key])
                        elif (key + "_point") in mp:
                            all_lane_point_metrics[key].append(mp[key + "_point"])

                    mc = lane_data.get("metrics_curve", {})
                    for key in all_lane_curve_metrics:
                        if key in mc:
                            all_lane_curve_metrics[key].append(mc[key])
                        elif (key + "_curve") in mc:
                            all_lane_curve_metrics[key].append(mc[key + "_curve"])

        
        # Compute full statistics
        point_stats = summarize_array(all_point_distances, percentiles)
        curve_stats = summarize_array(all_curve_distances, percentiles)

        result = {
            "all_point_distances": all_point_distances,
            "all_curve_distances": all_curve_distances,
            "point_stats": point_stats,
            "curve_stats": curve_stats
        }

        if calc_lane_details:
            result["all_lane_point_values"] = all_lane_point_metrics
            result["all_curve_point_values"] = all_lane_curve_metrics
            result["all_lane_point_stats"] = {}
            result["all_lane_curve_stats"] = {}
            for key, values in all_lane_point_metrics.items():
                result["all_lane_point_stats"][key] = summarize_array(np.array(values), percentiles=percentiles, calculate_pointwise_metrics=False)
            for key, values in all_lane_curve_metrics.items():
                result["all_lane_curve_stats"][key] = summarize_array(np.array(values), percentiles=percentiles, calculate_pointwise_metrics=False)

        all_results[set_item] = result

    if seq_grouping_subdict is None:
        return all_results[group_set[0]]
    else:
        return all_results



import os
import pandas as pd

def generate_solution_metrics_table(solution_dict, output_filename, nested_dict=False):
    def process_single_dict(single_dict):
        """Process one solution_dict and return a DataFrame"""
        all_innermost_keys = set()
        for outer_key, outer_val in single_dict.items():
            if isinstance(outer_val, dict):
                for mid_key, mid_val in outer_val.items():
                    if isinstance(mid_val, (int, float)):
                        all_innermost_keys.add(mid_key)
                    elif isinstance(mid_val, dict):
                        for end_key, end_val in mid_val.items():
                            if isinstance(end_val, (int, float)):
                                all_innermost_keys.add(end_key)
        all_innermost_keys = sorted(all_innermost_keys)

        row_pairs = []
        for outer_key, outer_val in single_dict.items():
            if isinstance(outer_val, dict):
                for mid_key, mid_val in outer_val.items():
                    if isinstance(mid_val, (int, float)):
                        row_pairs.append({
                            "label": outer_key,
                            "data": outer_val
                        })
                        break
                    elif isinstance(mid_val, dict):
                        for end_key, end_val in mid_val.items():
                            if isinstance(end_val, (int, float)):
                                row_pairs.append({
                                    "label": f"{outer_key} {mid_key}",
                                    "data": mid_val
                                })
                                break

        rows = []
        for pair in row_pairs:
            row_dict = {"Origin": pair["label"]}
            for key in all_innermost_keys:
                row_dict[key] = pair["data"].get(key, None)
            rows.append(row_dict)

        return pd.DataFrame(rows)

    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    excel_path = f"{output_filename}.xlsx"

    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        if nested_dict:
            # Each key in solution_dict is a sub-dictionary -> one sheet per key
            for sheet_name, subdict in solution_dict.items():
                df = process_single_dict(subdict)
                df.to_excel(writer, index=False, sheet_name=str(sheet_name))
        else:
            # Single table
            df = process_single_dict(solution_dict)
            df.to_excel(writer, index=False, sheet_name="solution_metrics")

    print(f"Solution metrics table written to {os.path.abspath(excel_path)}")



def prepare_output_folder(folder_name: str):
    folder_path = os.path.join(once_config["result_analytics"]["figure_output_base_path"], folder_name)

    if os.path.exists(folder_path):
        # Delete all contents inside the folder
        num = len(os.listdir(folder_path))
        for name in os.listdir(folder_path):
            item = os.path.join(folder_path, name)
            if os.path.isfile(item):
                os.remove(item)
            elif os.path.isdir(item):
                shutil.rmtree(item)
        print(f"[INFO] Cleared {num} items in existing folder: {folder_path}")
    else:
        # Create the folder
        os.makedirs(folder_path, exist_ok=True)
        print(f"[INFO] Created new folder: {folder_path}")

    return folder_path



def plot_distance_distributions(data1, data2, plot_points=True, bins=50, verbose=True, labels=None, outfolder=None):
    # Extract arrays
    all_point = np.asarray(data1, dtype=float)
    all_curve = np.asarray(data2, dtype=float)

    # Select active dataset
    if plot_points:
        data = all_point
        label = "Point Euclidean Distance"
    else:
        data = all_curve
        label = "Curve Euclidean Distance"

    # Remove non-finite values
    data = data[np.isfinite(data)]
    all_point_clean = all_point[np.isfinite(all_point)]
    all_curve_clean = all_curve[np.isfinite(all_curve)]

    if labels is not None:
        num_graph_labels = len(labels[next(iter(labels))])
        if (verbose and not num_graph_labels == 7) or (not verbose and not num_graph_labels == 2):
            print(f"[Error]: Not enough graph labels, needed {7 if verbose else 2}, got {num_graph_labels}.")
            labels = None
    if labels is None:
        labels = {
            "titles": [
                "Overlaid Histogram: Point vs Curve Distances",
                "Overlaid Log-Scaled Histogram: Point vs Curve Distances",
                f"Histogram of {label}",
                f"Log-Scaled Histogram of {label}",
                f"CDF of {label}",
                f"Box Plot of {label}",
                f"Violin Plot of {label}"
            ],
            "xlabel": [
                "Euclidean Distance (m)",
                "Euclidean Distance (m)",
                label + " (m)",
                label + " (m)",
                label,
                label,
                label
            ],
            "ylabel": [
                "Count",
                "Log Count"
                "Count",
                "Log Count",
                "CDF",
                "",
                ""
            ]
        }

    def save_plot_if_needed(title, outfolder):
        if outfolder is None:
            plt.show()
            return
        safe_title = re.sub(r'[<>:"/\\|?*]', '_', title)
        safe_title = re.sub(r'\s+', '_', safe_title)
        safe_title = re.sub(r'_+', '_', safe_title)
        fname = os.path.join(outfolder, safe_title + ".png")
        if os.path.exists(fname):
            i = 1
            while True:
                alt = os.path.join(outfolder, f"{safe_title}{i}.png")
                if not os.path.exists(alt):
                    print(f"[Error] {fname} exists, saving as {safe_title}{i}.png")
                    fname = alt
                    break
                i += 1
        plt.savefig(fname, dpi=300)
        plt.close()

    idx = 0

    plt.figure()
    plt.hist(all_point_clean, bins=bins, alpha=0.5, label="Point Distance")
    plt.hist(all_curve_clean, bins=bins, alpha=0.5, label="Curve Distance")
    plt.title(labels["titles"][idx])
    plt.xlabel(labels["xlabel"][idx])
    plt.ylabel(labels["ylabel"][idx])
    plt.legend()
    plt.tight_layout()
    save_plot_if_needed(labels["titles"][idx], outfolder)
    idx += 1

    plt.figure()
    plt.hist(all_point_clean, bins=bins, alpha=0.5, label="Point Distance", log=True)
    plt.hist(all_curve_clean, bins=bins, alpha=0.5, label="Curve Distance", log=True)
    plt.title(labels["titles"][idx])
    plt.xlabel(labels["xlabel"][idx])
    plt.ylabel(labels["ylabel"][idx])
    plt.legend()
    plt.tight_layout()
    save_plot_if_needed(labels["titles"][idx], outfolder)
    idx += 1

    if verbose:
        plt.figure()
        plt.hist(data, bins=bins)
        plt.title(labels["titles"][idx])
        plt.xlabel(labels["xlabel"][idx])
        plt.ylabel(labels["ylabel"][idx])
        plt.tight_layout()
        save_plot_if_needed(labels["titles"][idx], outfolder)
        idx += 1

    if verbose:
        plt.figure()
        plt.hist(data, bins=bins, log=True)
        plt.title(labels["titles"][idx])
        plt.xlabel(labels["xlabel"][idx])
        plt.ylabel(labels["ylabel"][idx])
        plt.tight_layout()
        save_plot_if_needed(labels["titles"][idx], outfolder)
        idx += 1

    if verbose:
        sorted_data = np.sort(data)
        yvals = np.arange(1, len(sorted_data) + 1) / len(sorted_data)

        plt.figure()
        plt.plot(sorted_data, yvals)
        plt.title(labels["titles"][idx])
        plt.xlabel(labels["xlabel"][idx])
        plt.ylabel(labels["ylabel"][idx])
        plt.grid(True, alpha=0.3)
        plt.tight_layout()
        save_plot_if_needed(labels["titles"][idx], outfolder)
        idx += 1

    if verbose:
        plt.figure()
        plt.boxplot(data, vert=False)
        plt.title(labels["titles"][idx])
        plt.xlabel(labels["xlabel"][idx])
        plt.tight_layout()
        save_plot_if_needed(labels["titles"][idx], outfolder)
        idx += 1

    if verbose:
        plt.figure()
        plt.violinplot(data, vert=False, showmeans=True)
        plt.title(labels["titles"][idx])
        plt.xlabel(labels["xlabel"][idx])
        plt.tight_layout()
        save_plot_if_needed(labels["titles"][idx], outfolder)



### Thesis Solution

In [None]:
percentiles = once_config["result_analytics"]["percentiles"]
thesis_details_dict = solution_detail_analytics(thesis_solution, percentiles, max_thresh=None)
generate_solution_metrics_table(thesis_details_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "thesis_metrics"))

if True:
    access_index = 2
    n_percent_val = str(percentiles[access_index])
    n_percent_label = 'p' + n_percent_val

    larger_metric = "point_stats" if thesis_details_dict["point_stats"][n_percent_label] > thesis_details_dict["curve_stats"][n_percent_label] else "curve_stats"
    print(f"{n_percent_val}% thresh for point: {thesis_details_dict['point_stats'][n_percent_label]}\n{n_percent_val}% thresh for curve: {thesis_details_dict['curve_stats'][n_percent_label]}")
    print(f"Threshold falls to: {thesis_details_dict[larger_metric][n_percent_label]}")
    n_percent_thesis_details_dict = solution_detail_analytics(
        thesis_solution, 
        once_config["result_analytics"]["percentiles"], 
        max_thresh=thesis_details_dict[larger_metric][n_percent_label],
        calc_lane_details=False
    )
    generate_solution_metrics_table(n_percent_thesis_details_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], f"thesis_{n_percent_val}_metrics"))


In [None]:
### Weather/tod comparison
def build_weather_dict():
    weather_dict = {m:{} for m in seq_ids}
    period_dict = {m:{} for m in seq_ids}
    all_weathers = set()
    all_periods = set()
    for seq_id in seq_ids:
        calib_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["calibration_path"]
        )
        if not os.path.exists(calib_file):
            print(f"[ERROR]: Calibration file not found: {calib_file}")
            sys.exit(1)
        with open(calib_file, 'r') as f:
            calibration_data = json.load(f)
        weather = calibration_data["weather"]
        period = calibration_data["period"]

        all_weathers.add(weather)
        all_periods.add(period)
        weather_dict[seq_id] = weather
        period_dict[seq_id] = period

    seq_grouping_dict = {
        "weather": {
            "group_dict": weather_dict,
            "group_set": all_weathers
        },
        "period": {
            "group_dict": period_dict,
            "group_set": all_periods
        }
    }
    return seq_grouping_dict
seq_grouping_dict = build_weather_dict()


def print_group_counts(data, print_details=False):
    group_dict = data.get("group_dict", {})
    group_set = data.get("group_set", [])

    # Build reverse lookup: item → list of ids
    item_to_ids = {item: [] for item in group_set}
    for gid, item in group_dict.items():
        if item in item_to_ids:
            item_to_ids[item].append(gid)
        else:
            # in case group_dict contains an item not listed in group_set
            item_to_ids.setdefault(item, []).append(gid)

    # --- First section: print item + count ---
    for item in sorted(item_to_ids.keys()):
        print(f"{item}: {len(item_to_ids[item])}")

    # --- Second section (optional): detailed listing ---
    if print_details:
        print("\n--- Details ---")
        for item in sorted(item_to_ids.keys()):
            ids = item_to_ids[item]
            print(f"{item}:")
            for gid in sorted(ids):
                print(f"    {gid}")

seq_grouping_dict = build_weather_dict()

print_group_counts(seq_grouping_dict["weather"], print_details=False)
thesis_weather_dict = solution_detail_analytics(thesis_solution, percentiles, max_thresh=None, seq_grouping_subdict=seq_grouping_dict["weather"])
generate_solution_metrics_table(thesis_weather_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "thesis_weather"), nested_dict=True)

print_group_counts(seq_grouping_dict["period"], print_details=False)
thesis_period_dict = solution_detail_analytics(thesis_solution, percentiles, max_thresh=None, seq_grouping_subdict=seq_grouping_dict["period"])
generate_solution_metrics_table(thesis_period_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "thesis_period"), nested_dict=True)



In [None]:
outfolder = prepare_output_folder("thesis_graphs")

# labels = {
#     "titles": ["Aggregate Point Errors Histogram", "Log-Scaled Aggregate Point Errors Histogram"],
#     "xlabel": ["Distance (m)", "Distance (m)"],
#     "ylabel": ["Count", "Log Count"]
# }
plot_distance_distributions(
   thesis_details_dict["all_point_distances"], 
   thesis_details_dict["all_curve_distances"], 
   verbose=False,
   outfolder=outfolder,
#    labels=labels
)

if "n_percent_thesis_details_dict" in globals():
    n_pecent_labels = {
        "titles": [ f"Overlaid Histogram ({n_percent_val}%): Point vs Curve Distances", f"Overlaid Log-Scaled Histogram ({n_percent_val}%): Point vs Curve Distances"],
        "xlabel": ["Euclidean Distance (m)", "Euclidean Distance (m)"],
        "ylabel": ["Count", "Log Count"]
    } 
    plot_distance_distributions(
      n_percent_thesis_details_dict["all_point_distances"], 
      n_percent_thesis_details_dict["all_curve_distances"], 
      verbose=False, 
      labels=n_pecent_labels,
      outfolder=outfolder
    )


rmse_labels = {
    "titles": ["Overlaid Histogram: Point RMSE vs Curve RMSE", "Overlaid Log-Scaled Histogram: Point RMSE vs Curve RMSE"],
    # "titles": ["Aggregate Lane Errors Histogram", "Log-Scaled Aggregate Lane Errors Histogram"],
    "xlabel": ["Distance (m)", "Distance (m)"],
    "ylabel": ["Count", "Log Count"]
}
plot_distance_distributions(
    thesis_details_dict["all_lane_point_values"]["RMSE"], 
    thesis_details_dict["all_curve_point_values"]["RMSE"], 
    verbose=False, 
    labels=rmse_labels,
    outfolder=outfolder
)


### Other solutions

In [None]:
### Solution A: pure intrinsic
percentiles = once_config["result_analytics"]["percentiles"]
for solution in all_solutions:
    if solution == thesis_solution:
        continue
    solution_details_dict = solution_detail_analytics(solution, percentiles, max_thresh=None)
    solution_name = file_map_solution(solution)
    generate_solution_metrics_table(solution_details_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], f"{solution_name}_metrics"))
    
    outfolder = prepare_output_folder(f"{solution_name}_graphs")
    plot_distance_distributions(
        solution_details_dict["all_point_distances"], 
        solution_details_dict["all_curve_distances"], 
        verbose=False,
        outfolder=outfolder,
        #    labels=labels
    )
    rmse_labels = {
        "titles": ["Overlaid Histogram: Point RMSE vs Curve RMSE", "Overlaid Log-Scaled Histogram: Point RMSE vs Curve RMSE"],
        # "titles": ["Aggregate Lane Errors Histogram", "Log-Scaled Aggregate Lane Errors Histogram"],
        "xlabel": ["Distance (m)", "Distance (m)"],
        "ylabel": ["Count", "Log Count"]
    }
    plot_distance_distributions(
        solution_details_dict["all_lane_point_values"]["RMSE"], 
        solution_details_dict["all_curve_point_values"]["RMSE"], 
        verbose=False, 
        labels=rmse_labels,
        outfolder=outfolder
    )


## Comparison Heatmap

In [None]:
def find_first_eval_json(root_folder):
    for folder in sorted(os.listdir(root_folder)):
        # Must be a directory with numeric name
        if not folder.isdigit():
            continue

        eval_dir = os.path.join(root_folder, folder, once_config["result_analytics"]["results_eval_folder"])
        if not os.path.isdir(eval_dir):
            continue

        # Search inside the evaluation folder
        for fname in sorted(os.listdir(eval_dir)):
            if fname.endswith(once_config["result_analytics"]["evaluation_extension"]):
                base = fname[:-len(once_config["result_analytics"]["evaluation_extension"])]
                if base in all_solutions:
                    return os.path.join(eval_dir, fname)

    return None


def print_eval_json_structure(json_path):
    with open(json_path, "r") as f:
        data = json.load(f)

    print(f"\nLoaded JSON: {json_path}\n")

    # -------------------------------
    # 1. Get lane_aggregates keys
    # -------------------------------
    frame_aggregates = data.get("frame_aggregates", {})
    lane_aggregates = data.get("lane_aggregates", {})
    point_aggregates = data.get("point_aggregates", {})

    # -------------------------------
    # 2. Get first frame and first lane
    # -------------------------------
    frames = data.get("frames", {})
    if not frames:
        print("No frames")
        return

    first_frame_key = next(iter(frames.keys()))
    first_frame = frames[first_frame_key]

    lanes = first_frame.get("lanes", {})
    if not lanes:
        print("No lanes in first frame")
        return

    first_lane_key = next(iter(lanes.keys()))
    first_lane = lanes[first_lane_key]

    metrics_point = first_lane.get("metrics_point", {})
    metrics_curve = first_lane.get("metrics_curve", {})
    frame_metrics = first_frame.get("frame_metrics", {})

    # -------------------------------
    # PRINT OUTPUT
    # -------------------------------

    print("1) Subkeys in metrics_point:")
    for k in metrics_point.keys():
        print(f"   - {k}")

    print("\n2) Subkeys in metrics_curve:")
    for k in metrics_curve.keys():
        print(f"   - {k}")

    print("\n3) Subkeys in frame_metrics:")
    for k in frame_metrics.keys():
        print(f"   - {k}")

    print("\n4) Subkeys in frame_aggregates:")
    for k in frame_aggregates.keys():
        print(f"   - {k}")

    print("\n4) Subkeys in lane_aggregates:")
    for k in lane_aggregates.keys():
        print(f"   - {k}")

    print("\n5) Subkeys in point_aggregates")
    for k in point_aggregates.keys():
        print(f"   - {k}")

    print("\n5) Subkeys of each lane and point agg sub-metric:")
    metric_dicts = iter(lane_aggregates.values())
    first_metric_dict = next(metric_dicts, {})
    for k in first_metric_dict.keys():
        print(f"   - {k}")


# Turn true to see the available keys for eval_metrics
if False:
    json_file = find_first_eval_json(once_config["output"]["base_path"])
    if json_file:
        print_eval_json_structure(json_file)
    else:
        print("No matching eval JSON file found.")


In [None]:
# Metrics to check
eval_metrics = {
    "frame_aggregates": [],
    "lane_aggregates": [],
    "point_aggregates": [],
    "frame_wins": [
        "MAE_point_mean", "MAE_point_median", 
        "RMSE_point_mean", "RMSE_point_median",
        "MAE_curve_mean", "MAE_curve_median",
        "RMSE_curve_mean", "RMSE_curve_median"
    ],
    "lane_wins_point":  ["MAE_point", "RMSE_point"],
    "lane_wins_curve": ["MAE_curve", "RMSE_curve"]
}
aggregates_submetrics = []

def create_wins_dict():
    frame_agg_metrics = eval_metrics["frame_aggregates"]
    lane_agg_metrics = eval_metrics["lane_aggregates"]
    point_agg_metrics = eval_metrics["point_aggregates"]
    sub_metrics = aggregates_submetrics
    frame_metrics = eval_metrics["frame_wins"]
    lane_metrics_point = eval_metrics["lane_wins_point"]
    lane_wins_curve = eval_metrics["lane_wins_curve"]
    

    def create_seq_dic():
        return {
            "frame_aggregates": {m: {n: 0 for n in sub_metrics} for m in frame_agg_metrics},
            "lane_aggregates": {m: {n: 0 for n in sub_metrics} for m in lane_agg_metrics},
            "point_aggregates": {m: {n: 0 for n in sub_metrics} for m in point_agg_metrics},
            "frame_wins": {m: 0 for m in frame_metrics},
            "lane_wins_point": {m: 0 for m in lane_metrics_point},
            "lane_wins_curve": {m: 0 for m in lane_wins_curve}
        }

    solution_equal_evaluation_percent = 1 + once_config["result_analytics"]["solution_equal_evaluation_percent"]
    compare_solutions_dict = {}
    for solution in alternative_solutions:

        per_seq_dict = {m: create_seq_dic() for m in seq_ids}

        for seq_id in tqdm(seq_ids, desc=f"Per-seq processing for {solution}"):
            try:
                # Load thesis solution
                thesis_path = os.path.join(base_path, seq_id, results_eval_folder, thesis_solution + evaluation_extension)
                with open(thesis_path) as f:
                    thesis_data = json.load(f)

                # Load other solutions
                path = os.path.join(base_path, seq_id, results_eval_folder, solution + evaluation_extension)
                with open(path) as f:
                    other_solution = json.load(f)
            except FileNotFoundError:
                continue

            for metric in frame_agg_metrics:
                for sub_metric in sub_metrics:
                    thesis_value = thesis_data["frame_aggregates"][metric][sub_metric]
                    other = other_solution["frame_aggregates"][metric][sub_metric]
                per_seq_dict[seq_id]["frame_aggregates"][metric][sub_metric] = int(thesis_value < other * solution_equal_evaluation_percent)

            # ---------- Lane aggregates ----------
            for metric in lane_agg_metrics:
                for sub_metric in sub_metrics:
                    thesis_value = thesis_data["lane_aggregates"][metric][sub_metric]
                    other = other_solution["lane_aggregates"][metric][sub_metric]
                per_seq_dict[seq_id]["lane_aggregates"][metric][sub_metric] = int(thesis_value < other * solution_equal_evaluation_percent)
            
            # ---------- Point aggregates ----------
            for metric in point_agg_metrics:
                for sub_metric in sub_metrics:
                    thesis_value = thesis_data["point_aggregates"][metric][sub_metric]
                    other = other_solution["point_aggregates"][metric][sub_metric]
                per_seq_dict[seq_id]["point_aggregates"][metric][sub_metric] = int(thesis_value < other * solution_equal_evaluation_percent)
                
            # ---------- Per-frame ----------
            for frame_id, frame_data in thesis_data["frames"].items():
                for metric in frame_metrics:
                    thesis_value = frame_data["frame_metrics"][metric]
                    other = other_solution["frames"][frame_id]["frame_metrics"][metric]
                    if thesis_value < other * solution_equal_evaluation_percent:
                        per_seq_dict[seq_id]["frame_wins"][metric] += 1

                # ---------- Per-lane ----------
                for lane_id, lane_data in frame_data["lanes"].items():
                    for metric in lane_metrics_point:
                        thesis_value = lane_data["metrics_point"][metric]
                        other = other_solution["frames"][frame_id]["lanes"][lane_id]["metrics_point"][metric]
                        if thesis_value < other * solution_equal_evaluation_percent:
                            per_seq_dict[seq_id]["lane_wins_point"][metric] += 1
                    for metric in lane_wins_curve:
                        thesis_value = lane_data["metrics_curve"][metric]
                        other = other_solution["frames"][frame_id]["lanes"][lane_id]["metrics_curve"][metric]
                        if thesis_value < other * solution_equal_evaluation_percent:
                            per_seq_dict[seq_id]["lane_wins_curve"][metric] += 1

            for metric in frame_metrics:
                per_seq_dict[seq_id]["frame_wins"][metric] /= runtime_dict[seq_id]["count_data"]["n_frames"]
            for metric in lane_metrics_point:
                per_seq_dict[seq_id]["lane_wins_point"][metric] /= runtime_dict[seq_id]["count_data"]["n_lanes"]
            for metric in lane_wins_curve:
                per_seq_dict[seq_id]["lane_wins_curve"][metric] /= runtime_dict[seq_id]["count_data"]["n_lanes"]

        overall_seq_dict = create_seq_dic()
        for seq_id in seq_ids:
            for m in lane_agg_metrics:
                for n in sub_metrics:
                    overall_seq_dict["frame_aggregates"][m][n] += 1 if per_seq_dict[seq_id]["frame_aggregates"][m][n] else 0
            for m in lane_agg_metrics:
                for n in sub_metrics:
                    overall_seq_dict["lane_aggregates"][m][n] += 1 if per_seq_dict[seq_id]["lane_aggregates"][m][n] else 0
            for m in point_agg_metrics:
                for n in sub_metrics:
                    overall_seq_dict["point_aggregates"][m][n] += 1 if per_seq_dict[seq_id]["point_aggregates"][m][n] else 0
            for m in frame_metrics:
                overall_seq_dict["frame_wins"][m] += per_seq_dict[seq_id]["frame_wins"][m] * runtime_dict[seq_id]["count_data"]["n_frames"]
            for m in lane_metrics_point:
                overall_seq_dict["lane_wins_point"][m] += per_seq_dict[seq_id]["lane_wins_point"][m] * runtime_dict[seq_id]["count_data"]["n_lanes"]
            for m in lane_wins_curve:
                overall_seq_dict["lane_wins_curve"][m] += per_seq_dict[seq_id]["lane_wins_curve"][m] * runtime_dict[seq_id]["count_data"]["n_lanes"]

        for m in lane_agg_metrics:
            for n in sub_metrics:
                overall_seq_dict["frame_aggregates"][m][n] /= len(seq_ids)
        for m in lane_agg_metrics:
            for n in sub_metrics:
                overall_seq_dict["lane_aggregates"][m][n] /= len(seq_ids)
        for m in point_agg_metrics:
            for n in sub_metrics:
                overall_seq_dict["point_aggregates"][m][n] /= len(seq_ids)
        total_n_frames = sum([runtime_dict[seq_id]["count_data"]["n_frames"] for seq_id in seq_ids])
        for m in frame_metrics:
            overall_seq_dict["frame_wins"][m] /= total_n_frames
        total_n_lanes = sum([runtime_dict[seq_id]["count_data"]["n_lanes"] for seq_id in seq_ids])
        for m in lane_metrics_point:
            overall_seq_dict["lane_wins_point"][m] /= total_n_lanes
        for m in lane_wins_curve:
            overall_seq_dict["lane_wins_curve"][m] /= total_n_lanes

        compare_solutions_dict[solution] = {
            "overall": overall_seq_dict,
            "individual": per_seq_dict
        }

    return compare_solutions_dict

compare_solutions_dict = create_wins_dict()
if True:
    outpath = os.path.join(once_config["result_analytics"]["figure_output_base_path"], "metric_comparison.json")
    with open(outpath, 'w') as f:
        json.dump(compare_solutions_dict, f, indent=4)
    print(f"Solution comparison data saved to {outpath}")

In [None]:
def get_gt_curve_distance(partial_out_path, output_filename):
    base_path = once_config["output"]["base_path"]
    all_gt_dists = []

    for seq_id in tqdm(seq_ids, desc=f"Per-seq processing (GT distances) for {partial_out_path}"):
        try:
            thesis_path = os.path.join(base_path, seq_id, results_eval_folder, partial_out_path + evaluation_extension)
            with open(thesis_path) as f:
                thesis_data = json.load(f)
        except FileNotFoundError:
            continue

        # Extract distances from all frames → lanes → per_point → gt_to_curve_distances
        frames = thesis_data.get("frames", {})
        for _, frame_data in frames.items():
            lanes = frame_data.get("lanes", {})
            for _, lane_data in lanes.items():
                per_point = lane_data.get("per_point", {})
                gt_dists = per_point.get("gt_to_curve_distances", [])
                if isinstance(gt_dists, list):
                    all_gt_dists.extend(gt_dists)

    # -------------------------------------------------
    # Compute metrics
    # -------------------------------------------------
    if len(all_gt_dists) == 0:
        print("No gt_to_curve_distances found.")
        return

    arr = np.array(all_gt_dists)

    metrics = {
        "mean": float(np.mean(arr)),
        "median": float(np.median(arr)),
        "max": float(np.max(arr)),
        "std": float(np.std(arr)),
        "mae": float(np.mean(np.abs(arr))),
        "rmse": float(np.sqrt(np.mean(arr ** 2)))
    }

    # -------------------------------------------------
    # Convert to a dataframe that matches your structure:
    #   Origin | mean | max | std | mae | rmse
    # -------------------------------------------------
    rows = [{
        "Origin": "gt_to_curve_distances",
        **metrics
    }]

    df = pd.DataFrame(rows)

    # -------------------------------------------------
    # Save to Excel (same pattern as your other function)
    # -------------------------------------------------
    os.makedirs(os.path.dirname(output_filename), exist_ok=True)
    excel_path = f"{output_filename}.xlsx"

    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        df.to_excel(writer, index=False, sheet_name="gt_curve_distances")

    print(f"GT→curve distance metrics written to {os.path.abspath(excel_path)}")

get_gt_curve_distance(thesis_solution, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "gt_curve_metrics"))

        

In [None]:
def generate_compare_metric_figures(
    main_dict,
    output_filename,
    y_labels=None
):
    rows = []

    for metric_category, submetric_list in eval_metrics.items():

        for submetric in submetric_list:

            # Case 1: aggregate metrics → has mean/median
            if metric_category.endswith("aggregates"):
                for agg_sub in aggregates_submetrics:
                    metric_name = f"{submetric} {agg_sub}"

                    row = {
                        "Metric Category": metric_category,
                        "Metric": metric_name
                    }

                    for ok in alternative_solutions:
                        val = (
                            main_dict.get(ok, {})
                                    .get("overall", {})
                                    .get(metric_category, {})
                                    .get(submetric, {})
                                    .get(agg_sub, None)
                        )
                        row[ok] = val

                    rows.append(row)

            # Case 2: simple one-level metrics
            else:
                metric_name = submetric

                row = {
                    "Metric Category": metric_category,
                    "Metric": metric_name
                }

                for ok in alternative_solutions:
                    val = (
                        main_dict.get(ok, {})
                                .get("overall", {})
                                .get(metric_category, {})
                                .get(submetric, None)
                    )
                    row[ok] = val

                rows.append(row)

    # Convert to DataFrame
    df = pd.DataFrame(rows)


    os.makedirs(os.path.dirname(output_filename), exist_ok=True)

    excel_path = f"{output_filename}.xlsx"
    with pd.ExcelWriter(excel_path, engine="openpyxl") as writer:
        df.to_excel(writer, index=False, sheet_name="metrics")

    print(f"Evaluation metrics table written to {os.path.abspath(excel_path)}")


    # Extract only numeric columns (outer solution keys)
    heat_df = df[alternative_solutions].copy()

    # Convert values → percent form
    percent_matrix = heat_df.to_numpy(dtype=float) * 100.0

    # Apply rounding rules
    def safe_round(x):
        if x is None or np.isnan(x):
            return np.nan
        if 0 < x < 1:
            return 0
        if 99 < x < 100:
            return 99
        return int(round(x))

    vectorized_round = np.vectorize(safe_round)
    percent_matrix = vectorized_round(percent_matrix)


    if y_labels is not None:
        if len(y_labels) == len(df):
            final_y_labels = y_labels
        else:
            print(f"[Error]: Provided y_labels length {len(y_labels)} "
                  f"does not match number of rows {len(df)}. Falling back to auto-generated labels.")
            y_labels = None  # Fall through to default behavior

    if y_labels is None:
        # Build concatenated labels: "<Metric Category> <Metric>"
        combined = (df["Metric Category"] + " " + df["Metric"]).tolist()

        # Prettify: replace underscores, capitalize each word
        clean = []
        for label in combined:
            label = label.replace("_", " ")
            label = " ".join([w.capitalize() for w in label.split()])
            clean.append(label)

        final_y_labels = clean


    x_labels = [map_solution(ok) for ok in alternative_solutions]


    from matplotlib.colors import LinearSegmentedColormap

    # ---- Custom red→yellow→green colormap ----
    cmap = LinearSegmentedColormap.from_list(
        "red_to_green",
        [
            (0.0, "red"),      # 0%
            (0.5, "white"),   # 50%
            (1.0, "green")     # 100%
        ]
    )

    plt.figure(figsize=(12, max(6, len(df) * 0.35)))
    plt.imshow(percent_matrix, aspect="auto", cmap=cmap, vmin=0, vmax=100)

    # Overlay text values
    for i in range(percent_matrix.shape[0]):
        for j in range(percent_matrix.shape[1]):
            val = percent_matrix[i, j]
            if not np.isnan(val):
                plt.text(
                    j, i, f"{val}%",
                    ha="center", va="center", color="black"
                )

    # -------- X / Y tick labels ----------
    plt.xticks(
        ticks=np.arange(len(x_labels)),
        labels=x_labels,
        rotation=0,       # ← upright
        ha="center"
    )

    plt.yticks(
        ticks=np.arange(len(final_y_labels)),
        labels=final_y_labels
    )

    plt.title("Proposed Solution vs. Alternatives: Percentage of Frames/Lanes Outperforming")
    plt.xlabel("Alternative Solution")
    plt.ylabel("")
    plt.tight_layout()

    # ------------------------
    # SAVE FIGURE
    # ------------------------
    fig_path = f"{output_filename}.png"
    plt.savefig(fig_path, dpi=300)
    plt.close()

    print(f"Heatmap saved to {os.path.abspath(fig_path)}")

y_labels = [
    "Per-Frame Mean Point MAE", "Per-Frame Median Point MAE", "Per-Frame Mean Point RMSE", "Per-Frame Median Point RMSE",
    "Per-Frame Mean Curve MAE", "Per-Frame Median Curve MAE", "Per-Frame Mean Curve RMSE", "Per-Frame Median Curve RMSE",
    "Per-Lane Point MAE", "Per-Lane Point RMSE",
    "Per-Lane Curve MAE", "Per-Lane Curve RMSE"
]
generate_compare_metric_figures(compare_solutions_dict, os.path.join(once_config["result_analytics"]["figure_output_base_path"], "metric_comparison"), y_labels)

## Qualitative Analysis Visuals

In [None]:
def display_2d_aligmnent(seq_id, frame_id, solution):
    outfolder = os.path.join(once_config["result_analytics"]["figure_output_base_path"], "qualitative")
    os.makedirs(outfolder, exist_ok=True)
    outfilename = os.path.join(outfolder, str(solution) + "_" + str(seq_id) + "_" + str(frame_id))

    # Data loading
    if True:
        gt_2d_output_path = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["lane_detections_folder_path"],
            frame_id + ".txt"
        )
        if not os.path.exists(gt_2d_output_path):
            print(f"[ERROR]: GT 2D path does not exist: {gt_2d_output_path}")
            sys.exit(1)
        
        solution_output_folder_path = os.path.join(
            once_config["output"]["base_path"],
            seq_id,
            solution
        )
        if not os.path.exists(solution_output_folder_path):
            print(f"[ERROR]: Solutoin folder path does not exist: {solution_output_folder_path}")
            sys.exit(1)
        if not any(Path(solution_output_folder_path).glob(f"{frame_id}_*.bin")):
            print(f"[ERROR]: No such files exist in {solution_output_folder_path} called {frame_id}_*.bin")
            sys.exit(1)
        solution_files = [str(f) for f in sorted(Path(solution_output_folder_path).glob(f"{frame_id}_*.bin"))]

        solution_calib_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["calibration_path"]
        )
        if not os.path.exists(solution_calib_file):
            print(f"[ERROR]: Calib file does not exist: {solution_calib_file}")
            sys.exit(1)

        lidar_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id, 
            once_config["data"]["lidar_path"],
            frame_id + ".bin"
        )
        if not os.path.exists(lidar_file):
            print(f"[ERROR]: Lidar file does not exist: {lidar_file}")
            sys.exit(1)

        camera_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["camera_path"],
            frame_id + ".png"
        )
        if not os.path.exists(camera_file):
            print(f"[ERROR]: Camera file does not exist: {camera_file}")
            sys.exit(1)

        # Load calib
        import nbimporter
        from main_runner import load_calibration, load_2d_lanes, get_lidar_points
        (
            T_camera_lidar,
            _,
            fx, fy, cx, cy,
            img_width, img_height,
            distortion
        ) = load_calibration(solution_calib_file, log=False)

        lanes = load_2d_lanes(gt_2d_output_path, once_config, img_width, img_height, log=False)
        if lanes is None or len(lanes) == 0:
            print(f"[ERROR]: No lanes were found in the desired file.")
            sys.exit(1)

    # Load LiDAR
    def RotateCamera(lidar_points, T_camera_lidar):
        ones = np.ones((lidar_points.shape[0], 1))
        points_lidar_hom = np.hstack((lidar_points, ones))
        points_transformed_hom = (T_camera_lidar @ points_lidar_hom.T).T # lidar -> camera
        return points_transformed_hom[:,:3]
    lidar_points, _, _, _ = get_lidar_points(lidar_file)
    points_lidar_camera = RotateCamera(lidar_points, T_camera_lidar)
    
    def project_points_onto_image(
        points, 
        fx, fy, cx, cy, img_width, img_height, distortion
    ):
        points_rescaled, _ = cv2.projectPoints(
            points,
            np.zeros((3, 1), dtype=np.float32),
            np.zeros((3, 1), dtype=np.float32),
            np.array([
                [fx,   0,  cx],
                [0,   fy,  cy],
                [0,    0,   1]
            ], dtype=np.float64),
            distortion
        )

        points_2d = points_rescaled.reshape(-1, 2)
        points_3d = points.reshape(-1, 3)
        mask = (
            (points_2d[:, 0] >= 0) &
            (points_2d[:, 0] < img_width) &
            (points_2d[:, 1] >= 0) &
            (points_2d[:, 1] < img_height) &
            (points_3d[:, 2] > 0)
        )
        points_visible = points_3d[mask]
        config = once_config
        depth_cutoff_min = config["depth_map"]["depth_cutoff_min"]
        depth_cutoff_max = config["depth_map"]["depth_cutoff_max"]
        depth_max_point_scaling_distance = config["depth_map"]["depth_max_point_scaling_distance"]
        lidar_point_size_min = config["depth_map"]["lidar_point_size_min"]
        lidar_point_size_max = config["depth_map"]["lidar_point_size_max"]

        # Create depth map    
        X = points_visible[:, 0]
        Y = points_visible[:, 1]
        Z = points_visible[:, 2]
        eps = 1e-6
        Z = np.maximum(Z, eps)
        u = fx * (X / Z) + cx
        v = fy * (Y / Z) + cy

        mask = (
            (u >= 0) & (u < img_width) &
            (v >= 0) & (v < img_height)
        )
        u = u[mask]
        v = v[mask]
        c = Z[mask]
        depth_clipped = np.clip(c, depth_cutoff_min, depth_max_point_scaling_distance)  # Clip to fixed range
        depth_normalized = (depth_clipped - depth_cutoff_min) / (depth_max_point_scaling_distance - depth_cutoff_min)
        sizes = lidar_point_size_max - (depth_normalized * (lidar_point_size_max - lidar_point_size_min))  # Linear interpolation

        sorted_indices = np.argsort(c)[::-1]  # Sort descending (closer points last)
        u_sorted = u[sorted_indices]
        v_sorted = v[sorted_indices]
        d_sorted = c[sorted_indices]
        range_min = np.min(d_sorted)
        range_max = np.max(d_sorted)
        sizes_sorted = sizes[sorted_indices]
        v_flipped = img_height - v_sorted

        return (
            u_sorted, 
            v_flipped,
            d_sorted,
            sizes_sorted,
            range_min,
            range_max
        )


    img = mpimg.imread(camera_file)
    img_flipped = np.flipud(img)  # Flip image vertically
    (
        lidar_u, 
        lidar_v,
        lidar_d,
        lidar_s,
        range_min,
        range_max
    ) = project_points_onto_image(
        points_lidar_camera, 
        fx, fy, cx, cy, img_width, img_height, distortion
    )

    plt.figure(figsize=(12, 8))
    plt.scatter(lidar_u, lidar_v, c=lidar_d, cmap='viridis', s=lidar_s**2, vmin=range_min, vmax=range_max)
    plt.colorbar(label='Depth (euclidean distance in meters)')
    plt.title("Camera View: Projection of LiDAR Points")
    plt.xlabel("u (pixels)")
    plt.ylabel("v (pixels)")
    plt.xlim([0, img_width])
    plt.ylim([0, img_height])
    plt.grid(True)
    plt.gca().set_aspect('equal', adjustable='box')
    plt.show()

    plt.figure(figsize=(12, 8))
    plt.imshow(img_flipped)
    plt.scatter(lidar_u, lidar_v, c=lidar_d, cmap='viridis', s=lidar_s**2, vmin=range_min, vmax=range_max)
    plt.colorbar(label='Depth (euclidean distance in meters)')
    plt.title("LiDAR Points Projected on Camera Image")
    plt.xlabel("u (pixels)")
    plt.ylabel("v (pixels)")
    plt.xlim([0, img_width])
    plt.ylim([0, img_height])
    plt.grid(False)
    plt.gca().set_aspect('equal', adjustable='box')
    plt.show()


def display_2d(seq_id, frame_id, solution):
    outfolder = os.path.join(once_config["result_analytics"]["figure_output_base_path"], "qualitative")
    os.makedirs(outfolder, exist_ok=True)
    outfilename = os.path.join(outfolder, str(solution) + "_" + str(seq_id) + "_" + str(frame_id))

    # Data loading
    if True:
        gt_2d_output_path = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["lane_detections_folder_path"],
            frame_id + ".txt"
        )
        if not os.path.exists(gt_2d_output_path):
            print(f"[ERROR]: GT 2D path does not exist: {gt_2d_output_path}")
            sys.exit(1)
        
        solution_output_folder_path = os.path.join(
            once_config["output"]["base_path"],
            seq_id,
            solution
        )
        if not os.path.exists(solution_output_folder_path):
            print(f"[ERROR]: Solutoin folder path does not exist: {solution_output_folder_path}")
            sys.exit(1)
        if not any(Path(solution_output_folder_path).glob(f"{frame_id}_*.bin")):
            print(f"[ERROR]: No such files exist in {solution_output_folder_path} called {frame_id}_*.bin")
            sys.exit(1)
        solution_files = [str(f) for f in sorted(Path(solution_output_folder_path).glob(f"{frame_id}_*.bin"))]

        solution_calib_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["calibration_path"]
        )
        if not os.path.exists(solution_calib_file):
            print(f"[ERROR]: Calib file does not exist: {solution_calib_file}")
            sys.exit(1)

        lidar_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id, 
            once_config["data"]["lidar_path"],
            frame_id + ".bin"
        )
        if not os.path.exists(lidar_file):
            print(f"[ERROR]: Lidar file does not exist: {lidar_file}")
            sys.exit(1)

        camera_file = os.path.join(
            once_config["data"]["base_path"],
            seq_id,
            once_config["data"]["camera_path"],
            frame_id + ".png"
        )
        if not os.path.exists(camera_file):
            print(f"[ERROR]: Camera file does not exist: {camera_file}")
            sys.exit(1)

        # Load calib
        import nbimporter
        from main_runner import load_calibration, load_2d_lanes, get_lidar_points
        (
            T_camera_lidar,
            _,
            fx, fy, cx, cy,
            img_width, img_height,
            distortion
        ) = load_calibration(solution_calib_file, log=False)

        lanes = load_2d_lanes(gt_2d_output_path, once_config, img_width, img_height, log=False)
        if lanes is None or len(lanes) == 0:
            print(f"[ERROR]: No lanes were found in the desired file.")
            sys.exit(1)

    # Load LiDAR
    def RotateCamera(lidar_points, T_camera_lidar):
        ones = np.ones((lidar_points.shape[0], 1))
        points_lidar_hom = np.hstack((lidar_points, ones))
        points_transformed_hom = (T_camera_lidar @ points_lidar_hom.T).T # lidar -> camera
        return points_transformed_hom[:,:3]
    lidar_points, _, _, _ = get_lidar_points(lidar_file)
    points_lidar_camera = RotateCamera(lidar_points, T_camera_lidar)
    
    def project_points_onto_image(
        points, 
        fx, fy, cx, cy, img_width, img_height, distortion
    ):
        points_rescaled, _ = cv2.projectPoints(
            points,
            np.zeros((3, 1), dtype=np.float32),
            np.zeros((3, 1), dtype=np.float32),
            np.array([
                [fx,   0,  cx],
                [0,   fy,  cy],
                [0,    0,   1]
            ], dtype=np.float64),
            distortion
        )

        points_2d = points_rescaled.reshape(-1, 2)
        points_3d = points.reshape(-1, 3)
        mask = (
            (points_2d[:, 0] >= 0) &
            (points_2d[:, 0] < img_width) &
            (points_2d[:, 1] >= 0) &
            (points_2d[:, 1] < img_height) &
            (points_3d[:, 2] > 0)
        )
        points_visible = points_3d[mask]
        config = once_config
        depth_cutoff_min = config["depth_map"]["depth_cutoff_min"]
        depth_cutoff_max = config["depth_map"]["depth_cutoff_max"]
        depth_max_point_scaling_distance = config["depth_map"]["depth_max_point_scaling_distance"]
        lidar_point_size_min = config["depth_map"]["lidar_point_size_min"]
        lidar_point_size_max = config["depth_map"]["lidar_point_size_max"]

        # Create depth map    
        X = points_visible[:, 0]
        Y = points_visible[:, 1]
        Z = points_visible[:, 2]
        eps = 1e-6
        Z = np.maximum(Z, eps)
        u = fx * (X / Z) + cx
        v = fy * (Y / Z) + cy

        mask = (
            (u >= 0) & (u < img_width) &
            (v >= 0) & (v < img_height)
        )
        u = u[mask]
        v = v[mask]
        c = Z[mask]
        depth_clipped = np.clip(c, depth_cutoff_min, depth_max_point_scaling_distance)  # Clip to fixed range
        depth_normalized = (depth_clipped - depth_cutoff_min) / (depth_max_point_scaling_distance - depth_cutoff_min)
        sizes = lidar_point_size_max - (depth_normalized * (lidar_point_size_max - lidar_point_size_min))  # Linear interpolation

        sorted_indices = np.argsort(c)[::-1]  # Sort descending (closer points last)
        u_sorted = u[sorted_indices]
        v_sorted = v[sorted_indices]
        d_sorted = c[sorted_indices]
        range_min = np.min(d_sorted)
        range_max = np.max(d_sorted)
        sizes_sorted = sizes[sorted_indices]
        v_flipped = img_height - v_sorted

        return (
            u_sorted, 
            v_flipped,
            d_sorted,
            sizes_sorted,
            range_min,
            range_max
        )


    img = mpimg.imread(camera_file)
    img_flipped = np.flipud(img)  # Flip image vertically

    output_lanes = []
    for file in solution_files:
        points = np.fromfile(file, dtype=np.float32).reshape(-1, 3)
        output_lanes.append(points )

    def plot_lane_over_image(points=None, lanes_2d=None, title="", outfile=None):
        solution_color = "red"
        gt_color = "#F0E442"
        line_color = "#2596be"            
        plt.figure(figsize=(12, 8))
        plt.imshow(img_flipped)
        if points is not None:
            for idx, p in enumerate(points):
                (
                    u, 
                    v,
                    _,
                    s,
                    _,
                    _
                ) = project_points_onto_image(
                    p, 
                    fx, fy, cx, cy, img_width, img_height, distortion
                )
                if idx==0 and lanes_2d is not None:
                    plt.scatter(u, v, s=s, color=solution_color, zorder=3,  label="Output Points")
                    plt.plot(u, v, color=line_color, linewidth=2, zorder=1, label="Solution Line")
                else:
                    plt.scatter(u, v, s=s, color=solution_color, zorder=3)
                    plt.plot(u, v, color=line_color, linewidth=2, zorder=1)

            if lanes_2d is not None:
                for idx, lane in enumerate(lanes_2d):
                    if idx == 0:
                        plt.scatter(lane[:,0], lane[:,1], color=gt_color, s=2, label="Ground Truth Points")
                    else:
                        plt.scatter(lane[:,0], lane[:,1], color=gt_color, s=2)
                plt.legend(
                    loc='upper right',
                    facecolor='#aaa',   # legend background color
                    edgecolor='black',   # border color
                    framealpha=1
                )
        elif lanes_2d is not None:
            for lane in lanes_2d:
                plt.plot(
                    lane[:, 0],
                    lane[:, 1],
                    color=line_color,
                    linewidth=2,   
                    marker='.',
                    markersize=2,
                    markerfacecolor=gt_color,
                    markeredgecolor=gt_color
                )
        plt.title(title)
        plt.xlim([0, img_width])
        plt.ylim([0, img_height])
        plt.grid(False)
        plt.gca().set_aspect('equal', adjustable='box')
        plt.axis('off')
        if outfile is not None:
            fig_path = f"{outfile}.png"
            plt.savefig(fig_path, dpi=300)      
        plt.show()
    
    plot_lane_over_image(points=output_lanes, lanes_2d=lanes, outfile=outfilename)  
    # plot_lane_over_image(points=output_lanes)
    # plot_lane_over_image(lanes_2d=lanes)


display_2d_aligmnent("000076", "1616343539199", thesis_solution)
# display_2d("000027", "1616101262900", thesis_solution)
# display_2d("000027", "1616100803399", thesis_solution)
# display_2d("000027", "1616100953900", thesis_solution)
# display_2d("000076", "1616344463199", thesis_solution)
# display_2d("000076", "1616343539199", thesis_solution)
# display_2d("000076", "1616343988199", thesis_solution) # reflective bus
# display_2d("000077", "1616344940400", thesis_solution)
# display_2d("000168", "1618716889299", thesis_solution)
# display_2d("000334", "1619406822299", thesis_solution)

# display_2d("000080", "1616348625300", thesis_solution)
# display_2d("000200", "1618797316299", thesis_solution)


# Failures
# display_2d("000077", "1616344656900", thesis_solution)
# display_2d("000034", "1616175279299", thesis_solution)
# display_2d("000034", "1616175475800", thesis_solution) #dataset failure
# display_2d("000076", "1616343733200", thesis_solution)
# display_2d("000076", "1616343783200", thesis_solution) #force filter on car
# display_2d("000168", "1618716973799", thesis_solution) #cant detect other ramp



In [None]:
import math
def display_2d_batch(pairs, solution, n_cols=3):
    """
    pairs: list of (seq_id, frame_id)
    solution: solution folder name
    n_cols: number of columns in the grid
    """

    # --- Compute Grid Size ---
    n_items = len(pairs)
    n_rows = math.ceil(n_items / n_cols)

    # --- Create Output Folder ---
    outfolder = os.path.join(
        once_config["result_analytics"]["figure_output_base_path"],
        "qualitative_fail"
    )
    os.makedirs(outfolder, exist_ok=True)
    outfile = os.path.join(outfolder, f"batch_{solution}.png")

    # --- Create Figure ---
    fig, axs = plt.subplots(
        n_rows, n_cols,
        figsize=(6*n_cols, 5*n_rows),
        squeeze=False
    )

    # --- Helper: Single Subplot Drawing ---
    def draw_single(ax, seq_id, frame_id):
        # Load everything exactly as in original function
        # (keeping original structure, only embedding locally)

        # ---- Paths ----
        base = once_config["data"]["base_path"]
        gt_2d_output_path = os.path.join(base, seq_id,
                                         once_config["data"]["lane_detections_folder_path"],
                                         frame_id + ".txt")

        solution_output_folder_path = os.path.join(
            once_config["output"]["base_path"], seq_id, solution
        )

        solution_files = [str(f) for f in sorted(
            Path(solution_output_folder_path).glob(f"{frame_id}_*.bin")
        )]

        solution_calib_file = os.path.join(base, seq_id,
                                           once_config["data"]["calibration_path"])

        camera_file = os.path.join(base, seq_id,
                                   once_config["data"]["camera_path"],
                                   frame_id + ".png")

        # ---- Load calibration + GT lanes ----
        from main_runner import load_calibration, load_2d_lanes
        (
            T_camera_lidar,
            _,
            fx, fy, cx, cy,
            img_width, img_height,
            distortion
        ) = load_calibration(solution_calib_file, log=False)

        lanes = load_2d_lanes(gt_2d_output_path, once_config, img_width, img_height, log=False)

        img = mpimg.imread(camera_file)

        # ---- Load 3D solution lanes ----
        output_lanes = []
        for f in solution_files:
            p = np.fromfile(f, dtype=np.float32).reshape(-1, 3)
            output_lanes.append(p)

        # ---- Project and Draw ----
        ax.imshow(img)

        def project_points(points):
            pts_2d, _ = cv2.projectPoints(
                points,
                np.zeros((3, 1), dtype=np.float32),
                np.zeros((3, 1), dtype=np.float32),
                np.array([[fx, 0, cx],
                          [0, fy, cy],
                          [0, 0, 1]], dtype=np.float64),
                distortion
            )
            pts_2d = pts_2d.reshape(-1, 2)
            mask = (
                (pts_2d[:,0] >= 0) & (pts_2d[:,0] < img_width) &
                (pts_2d[:,1] >= 0) & (pts_2d[:,1] < img_height) &
                (points[:,2] > 0)
            )
            return pts_2d[mask]
        
        

       
        # --- Draw 3D solution lanes ---
        for lane3d in output_lanes:
            pts2d = project_points(lane3d)
            if pts2d.shape[0] > 1:
                ax.scatter(pts2d[:,0], pts2d[:,1], color="red", s=1, zorder=3,  label="Output Points")
                ax.plot(pts2d[:,0], pts2d[:,1], color="#2596be", zorder=1, linewidth=2)

        # --- Draw GT 2D lanes ---
        for lane in lanes:
            ax.scatter(lane[:,0], img_height - lane[:,1], s=1, color="#F0E442")

        ax.set_xlim([0, img_width])
        ax.set_ylim([img_height, 0])  # invert y for images
        ax.set_aspect("equal")
        ax.set_title(f"{seq_id[-3:]} / {frame_id}")
        ax.axis("off")

    # ---- Draw all images into grid ----
    idx = 0
    for r in range(n_rows):
        for c in range(n_cols):
            if idx < n_items:
                seq_id, frame_id = pairs[idx]
                draw_single(axs[r, c], seq_id, frame_id)
            else:
                axs[r, c].axis("off")
            idx += 1

    # ---- Final Save (only once) ----
    plt.tight_layout()
    fig.savefig(outfile, dpi=200)
    plt.close(fig)
    print(f"[SAVED] {outfile}")

pairs = [
    ("000027", "1616101761900"),
    ("000027", "1616101262900"),
    ("000027", "1616100803399"),
    ("000027", "1616100953900"),
    ("000076", "1616344463199"),
    ("000076", "1616343539199"),
    ("000076", "1616343988199"),
    ("000077", "1616344940400"),
    ("000080", "1616348625300"),
    ("000168", "1618716889299"),
    ("000200", "1618797316299"),
    ("000334", "1619406822299"),
]
pairs_fail = [
    ("000077", "1616344656900"),
    ("000034", "1616175279299"),
    ("000034", "1616175475800"), #dataset failure
    ("000076", "1616343733200"),
    ("000076", "1616343783200"), #force filter on car
    ("000168", "1618716973799")
]
display_2d_batch(pairs_fail, thesis_solution, n_cols=3)