In [None]:
import pandas as pd
import pickle
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np

# Increase plot resolution
plt.rcParams["figure.dpi"] = 120
sns.set_theme(style="whitegrid")

In [None]:
# Base Directories
models_dir = Path(
    "/home/guillemc/dev/LuPNT-private/output/2025_FeatureMatching/eval_results_models"
)
legacy_dir = Path(
    "/home/guillemc/dev/LuPNT-private/output/2025_FeatureMatching/eval_results"
)

all_data = []
all_per_pair_list = []

# Dataset Name Mapping (Prettify)
dataset_map = {
    "short_base": "Baseline",
    "short_camera_effects": "Camera Effects",
    "short_higher_elevation": "Higher Sun Elevation",
    "short_no_lights": "No Lights",
    "long_base": "Long Base",
    "long_camera_effects": "Long Camera Effects",
    "long_higher_elevation": "Long Higher Elevation",
    "long_no_lights": "Long No Lights",
    "rover_0": "Spirals",
}

# Model Name Mapping (Renaming)
model_name_map = {
    "SuperPoint+LightGlue_Spirals": "Finetuned",
    # We will filter out the others, but mapping them just in case
    "lightglue_spirals_v1": "SuperPoint+LightGlue (Spirals)",
    "lightglue_unreal_base1": "SuperPoint+LightGlue (Traverse)",
    "spirals_seg_rover0": "SuperPoint+LightGlue (Spirals+Segmentation)",
}

# Skip list (Dumb/Test models AND models user wants to hide)
skip_patterns = [
    "semantic_test",
    "local_traverse_fov90",
    "spirals_20251211",
    # User requested ONLY "Spirals Legacy" (now "Finetuned")
    "lightglue_spirals_v1",
    "lightglue_unreal_base1",
    "spirals_seg_rover0",
]

# --- 1. Load Trained Models ---
print(f"Scanning models in {models_dir}...")
if models_dir.exists():
    for model_path in models_dir.iterdir():
        if not model_path.is_dir():
            continue
        raw_model_name = model_path.name

        # Filtering Models
        if any(skip in raw_model_name for skip in skip_patterns):
            continue

        # Determine Display Name
        model_display_name = raw_model_name  # Default

        # Check for user-defined mapping (prefix match)
        for key, val in model_name_map.items():
            if key in raw_model_name:
                model_display_name = val
                break

        # Fallback for others
        if model_display_name == raw_model_name and "spirals" in raw_model_name.lower():
            model_display_name = f"Finetuned ({raw_model_name})"

        for dataset_path in model_path.iterdir():
            if not dataset_path.is_dir():
                continue
            raw_dataset_name = dataset_path.name

            # Filter Datasets: ONLY short_*, exclude rover_0/Spirals
            if "short" not in raw_dataset_name:
                continue

            dataset_name = dataset_map.get(
                raw_dataset_name, raw_dataset_name.replace("_", " ").title()
            )

            for step_file in dataset_path.glob("step_*.pkl"):
                try:
                    step = int(step_file.stem.split("_")[1])
                    with open(step_file, "rb") as f:
                        data = pickle.load(f)

                    metrics = data["results"]
                    for method, res in metrics.items():
                        # Summary
                        if "summary" in res:
                            row = res["summary"].copy()
                            row["Model Type"] = "Finetuned"
                            row["Model"] = model_display_name
                            row["Method"] = method
                            row["Dataset"] = dataset_name
                            row["Step"] = step
                            if "abs_loc_t_error" in row:
                                row["Abs Trans Error (m)"] = row["abs_loc_t_error"]
                            if "rel_pose_r_error" in row:
                                row["Rel Rot Error (deg)"] = row["rel_pose_r_error"]
                            all_data.append(row)

                        # Per Pair
                        if "per_pair" in res:
                            pp_data = res["per_pair"]
                            if "num_matches" in pp_data and isinstance(
                                pp_data["num_matches"], list
                            ):
                                target_len = len(pp_data["num_matches"])
                                filtered_pp = {
                                    k: v
                                    for k, v in pp_data.items()
                                    if isinstance(v, list) and len(v) == target_len
                                }
                                try:
                                    pp_df = pd.DataFrame(filtered_pp)
                                    pp_df["Model Type"] = "Finetuned"
                                    pp_df["Model"] = model_display_name
                                    pp_df["Method"] = method
                                    pp_df["Dataset"] = dataset_name
                                    pp_df["Step"] = step
                                    if "Frame" not in pp_df.columns:
                                        pp_df["Frame"] = pp_df.index * step
                                    all_per_pair_list.append(pp_df)
                                except:
                                    pass
                except:
                    pass

# --- 2. Load Legacy Results (Baselines) ---
print(f"Scanning legacy results in {legacy_dir}...")
if legacy_dir.exists():
    for dataset_path in legacy_dir.iterdir():
        if not dataset_path.is_dir():
            continue
        raw_dataset_name = dataset_path.name
        dataset_name = dataset_map.get(
            raw_dataset_name, raw_dataset_name.replace("_", " ").title()
        )

        for agent_path in dataset_path.iterdir():  # e.g. rover_0
            if not agent_path.is_dir():
                continue

            # Filter Datasets: ONLY short_*, exclude rover_0/Spirals
            if "short" not in raw_dataset_name:
                continue

            for step_file in agent_path.glob("*.pkl"):
                try:
                    parts = step_file.stem.split("_")
                    if len(parts) >= 2 and parts[0] == "step":
                        try:
                            step = int(parts[1])
                        except:
                            continue
                    else:
                        continue

                    with open(step_file, "rb") as f:
                        data = pickle.load(f)

                    metrics = data.get("results", {})
                    for method, res in metrics.items():
                        # Determine Label: Just "Detection+Matching" (e.g. SuperPoint+LightGlue)
                        # Remove "Baseline" prefix to match user request
                        model_label = method

                        # Summary
                        if "summary" in res:
                            row = res["summary"].copy()
                            row["Model Type"] = "Baseline"
                            row["Model"] = (
                                model_label  # Use the method name directly as the Model label
                            )
                            row["Method"] = method
                            row["Dataset"] = dataset_name
                            row["Step"] = step
                            if "rel_pose_r_error" in row:
                                row["Rel Rot Error (deg)"] = row["rel_pose_r_error"]
                            if "rel_pose_t_error" in row:
                                row["Trans Error (m)"] = row["rel_pose_t_error"]
                            all_data.append(row)

                        # Per Pair
                        if "per_pair" in res:
                            pp_data = res["per_pair"]
                            if "num_matches" in pp_data and isinstance(
                                pp_data["num_matches"], list
                            ):
                                target_len = len(pp_data["num_matches"])
                                filtered_pp = {
                                    k: v
                                    for k, v in pp_data.items()
                                    if isinstance(v, list) and len(v) == target_len
                                }
                                try:
                                    pp_df = pd.DataFrame(filtered_pp)
                                    pp_df["Model Type"] = "Baseline"
                                    pp_df["Model"] = model_label
                                    pp_df["Method"] = method
                                    pp_df["Dataset"] = dataset_name
                                    pp_df["Step"] = step
                                    if "Frame" not in pp_df.columns:
                                        pp_df["Frame"] = pp_df.index * step
                                    all_per_pair_list.append(pp_df)
                                except:
                                    pass
                except:
                    pass

df_summary = pd.DataFrame(all_data)
if all_per_pair_list:
    df_per_pair = pd.concat(all_per_pair_list, ignore_index=True)
    # UNIFIED LABEL COLUMN
    # Just use 'Model' for the label. It now contains the pretty name or the method name.
    df_per_pair["Labels"] = df_per_pair["Model"]
else:
    df_per_pair = pd.DataFrame()

print(f"Loaded {len(df_summary)} total summary records.")
if not df_summary.empty:
    # Also for summary df
    df_summary["Labels"] = df_summary["Model"]

    # 2b. Completeness Table
    # Create a pivot table showing which Steps exist for each Model+Dataset
    print("Generating Completeness Table...")
    completeness = (
        df_summary.groupby(["Dataset", "Labels"])["Step"]
        .apply(lambda x: sorted(list(set(x))))
        .unstack(fill_value="-")
    )

    # Style the table
    # We can just display it as a dataframe
    display(completeness)

    display(df_summary.head())

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import torch


def plot_metric_per_step(
    df, metric, title_prefix, ylabel, kind="box", log_scale=False, share_y=True
):
    if df.empty or metric not in df.columns:
        return

    unique_steps = sorted(df["Step"].unique())
    num_steps = len(unique_steps)

    if num_steps == 0:
        return

    # 1. Establish global order for consistent colors across subplots
    global_hue_order = sorted(df["Labels"].dropna().unique())
    global_x_order = sorted(df["Dataset"].dropna().unique())

    # 2. Pass share_y argument to subplots
    fig, axes = plt.subplots(1, num_steps, figsize=(num_steps * 6, 6), sharey=share_y)

    if num_steps == 1:
        axes = [axes]

    for i, step in enumerate(unique_steps):
        ax = axes[i]
        subset = df[df["Step"] == step].copy()

        # Convert metric to numeric
        if metric in subset.columns:
            subset.loc[:, metric] = subset[metric].apply(
                lambda x: x.item() if isinstance(x, torch.Tensor) else x
            )
            subset.loc[:, metric] = pd.to_numeric(subset[metric], errors="coerce")

        # Drop NaNs for plotting
        subset_plot = subset.dropna(subset=[metric])

        if subset_plot.empty:
            ax.set_title(f"$\Delta t={step}$ (No Valid Data)")
            ax.set_xticks([])

            # If not sharing Y, we still want the label on the empty plot to maintain layout
            if i == 0 or not share_y:
                ax.set_ylabel(ylabel)
            else:
                ax.set_yticks([])  # Hide ticks if sharing Y and not the first
            continue

        if kind == "box":
            sns.boxplot(
                data=subset_plot,
                x="Dataset",
                y=metric,
                hue="Labels",
                order=global_x_order,
                hue_order=global_hue_order,
                showfliers=False,
                ax=ax,
            )
        elif kind == "bar":
            sns.barplot(
                data=subset_plot,
                x="Dataset",
                y=metric,
                hue="Labels",
                order=global_x_order,
                hue_order=global_hue_order,
                estimator=np.median,
                errorbar=None,
                ax=ax,
            )

        ax.set_title(f"$\Delta t={step}$")
        ax.set_xlabel("Dataset")

        # --- Logic for Y-Axis Labels based on share_y ---
        if share_y:
            if i == 0:
                ax.set_ylabel(ylabel)
            else:
                ax.set_ylabel("")
                ax.tick_params(
                    axis="y", labelleft=False
                )  # Hide ticks on subsequent plots
        else:
            # If NOT sharing Y, every plot gets the label and keeps its ticks
            ax.set_ylabel(ylabel)

        if log_scale:
            ax.set_yscale("log")

        ax.set_xticklabels(ax.get_xticklabels(), rotation=45, ha="right")

        # Legend handling
        if i == 1:
            handles, labels = ax.get_legend_handles_labels()
            if handles:
                ax.legend(
                    handles,
                    labels,
                    bbox_to_anchor=(1.05, 1),
                    loc="upper left",
                    borderaxespad=0.0,
                )
        else:
            if ax.get_legend() is not None:
                ax.get_legend().remove()

        # Table printing
        mean_table = subset.groupby(["Dataset", "Labels"])[metric].mean().unstack()
        print(f"\n{metric} for step {step}")
        print(mean_table.to_string(na_rep="-", float_format=lambda x: "%.2g" % x))

    fig.suptitle(title_prefix, fontsize=16, y=1.02)
    plt.tight_layout(rect=[0, 0, 1, 0.98])
    plt.show()

## 1. Keypoints & Matches

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("num_matches", "Number of Matches", "Count"),
        ("num_keypoints", "Number of Keypoints (Total)", "Count"),
        ("num_keypoints0", "Number of Keypoints (Img0)", "Count"),
        ("num_keypoints1", "Number of Keypoints (Img1)", "Count"),
        ("covisible", "Covisible Points", "Count"),
        ("covisible_percent", "Covisible Percent", "%"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="box")

## 2. Match Precision & Recall

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("epi_prec@1e-4", "Epipolar Precision @ 1e-4", "Precision"),
        ("epi_prec@5e-4", "Epipolar Precision @ 5e-4", "Precision"),
        ("epi_prec@1e-3", "Epipolar Precision @ 1e-3", "Precision"),
        ("reproj_prec@1px", "Reprojection Precision @ 1px", "Precision"),
        ("reproj_prec@3px", "Reprojection Precision @ 3px", "Precision"),
        ("reproj_prec@5px", "Reprojection Precision @ 5px", "Precision"),
        ("gt_match_recall@3px", "GT Match Recall @ 3px", "Recall"),
        ("gt_match_precision@3px", "GT Match Precision @ 3px", "Precision"),
        ("mean_matching_score", "Mean Matching Score", "Score"),
    ]:
        if col in df_per_pair.columns:
            # Ratios often better as boxplots or bars.
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="box")

## 3. RANSAC Statistics

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("ransac_inl", "RANSAC Inliers", "Count"),
        ("ransac_inl%", "RANSAC Inlier %", "Percent"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="box")

## 4. Relative Pose Errors

In [None]:
if not df_per_pair.empty:
    # Main
    for col, title, ylabel in [
        ("rel_pose_error", "Relative Pose Error (Max)", "Error"),
        ("rel_pose_t_error", "Rel Translation Error", "Meters"),
        ("rel_pose_r_error", "Rel Rotation Error", "Deg"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(
                df_per_pair, col, title, ylabel, kind="bar"
            )  # Median bar
    # Per Axis
    for col, title, ylabel in [
        ("rel_pose_t_error_x", "Rel Trans Error X", "Meters"),
        ("rel_pose_t_error_y", "Rel Trans Error Y", "Meters"),
        ("rel_pose_t_error_z", "Rel Trans Error Z", "Meters"),
        ("rel_pose_r_error_roll", "Rel Rot Error Roll", "Deg"),
        ("rel_pose_r_error_pitch", "Rel Rot Error Pitch", "Deg"),
        ("rel_pose_r_error_yaw", "Rel Rot Error Yaw", "Deg"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="bar")

## 5. Absolute Localization Errors

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("abs_loc_t_error", "Abs Translation Error", "Meters"),
        ("abs_loc_r_error", "Abs Rotation Error", "Deg"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="bar")
    for col, title, ylabel in [
        ("abs_loc_t_error_x", "Abs Trans Error X", "Meters"),
        ("abs_loc_t_error_y", "Abs Trans Error Y", "Meters"),
        ("abs_loc_t_error_z", "Abs Trans Error Z", "Meters"),
        ("abs_loc_r_error_roll", "Abs Rot Error Roll", "Deg"),
        ("abs_loc_r_error_pitch", "Abs Rot Error Pitch", "Deg"),
        ("abs_loc_r_error_yaw", "Abs Rot Error Yaw", "Deg"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="bar")

## 6. Localization Accuracy

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("abs_loc_acc@0.25m_2deg", "Loc Accuracy @ 0.25m 2deg", "Success Rate (0-1)"),
        ("abs_loc_acc@0.5m_5deg", "Loc Accuracy @ 0.5m 5deg", "Success Rate (0-1)"),
        ("abs_loc_acc@1.0m_10deg", "Loc Accuracy @ 1.0m 10deg", "Success Rate (0-1)"),
    ]:
        if col in df_per_pair.columns:
            # Bar plot of the mean gives the accuracy %
            # If the column is boolean/0-1, 'barplot' with default estimator=mean works perfectly.
            # But earlier we set estimator=np.median for errors.
            # For accuracy (0/1), we want MEAN (percentage).
            # So let's handle that explicitly or use a different kind="bar_mean" logic.
            # Or just pass estimator=np.mean to a custom call here loops.

            unique_steps = sorted(df_per_pair["Step"].unique())
            for step in unique_steps:
                subset = df_per_pair[df_per_pair["Step"] == step]
                if subset.empty:
                    continue
                plt.figure(figsize=(10, 6))
                sns.barplot(
                    data=subset,
                    x="Dataset",
                    y=col,
                    hue="Labels",
                    estimator=np.mean,
                    errorbar=None,
                )
                plt.title(f"{title} (Step {step})")
                plt.ylabel(ylabel)
                plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
                plt.xticks(rotation=45, ha="right")
                plt.tight_layout()
                plt.show()

## 7. Runtime Performance

In [None]:
if not df_per_pair.empty:
    for col, title, ylabel in [
        ("extraction_time", "Extraction Time", "Seconds"),
        ("matching_time", "Matching Time", "Seconds"),
        ("total_time", "Total Time", "Seconds"),
    ]:
        if col in df_per_pair.columns:
            plot_metric_per_step(df_per_pair, col, title, ylabel, kind="bar")

## 8. Summary Heatmaps (Detailed Absolute Metrics)

In [None]:
summary_metrics = [
    # Main Accuracies
    ("mabs_loc_acc@0.25m_2deg", "Loc Acc @ 0.25m 2deg", "float"),
    ("mabs_loc_acc@0.5m_5deg", "Loc Acc @ 0.5m 5deg", "float"),
    ("mabs_loc_acc@1.0m_10deg", "Loc Acc @ 1.0m 10deg", "float"),
    # Absolute Errors
    ("mabs_loc_t_error", "Mean Abs Trans Error (m)", "float"),
    ("mabs_loc_r_error", "Mean Abs Rot Error (deg)", "float"),
    # Specific Axis Errors (if you want deep dive)
    ("mabs_loc_t_error_z", "Mean Abs Z Error (m)", "float"),
    # Matching
    ("mean_matching_score", "Mean Matching Score", "float"),
]

if not df_summary.empty:
    unique_steps = sorted(df_summary["Step"].unique())
    for step in unique_steps:
        subset_step = df_summary[df_summary["Step"] == step]
        if subset_step.empty:
            continue

        # Group heatmaps into logical blocks for cleaner display

        # 1. Accuracy Heatmaps
        acc_metrics = [m for m in summary_metrics if "acc" in m[0] or "score" in m[0]]
        for metric, title, fmt in acc_metrics:
            if metric not in subset_step.columns:
                continue
            pivot_df = subset_step.pivot(
                index="Dataset", columns="Model", values=metric
            )
            if pivot_df.empty:
                continue

            plt.figure(figsize=(10, len(pivot_df) * 0.8 + 2))
            sns.heatmap(pivot_df, annot=True, fmt=".2f", cmap="viridis", linewidths=0.5)
            plt.title(f"{title} (Step {step})")
            plt.tight_layout()
            plt.show()

        # 2. Error Heatmaps (where Lower is Better - separate colormap?)
        err_metrics = [m for m in summary_metrics if "error" in m[0]]
        for metric, title, fmt in err_metrics:
            if metric not in subset_step.columns:
                continue
            pivot_df = subset_step.pivot(
                index="Dataset", columns="Model", values=metric
            )
            if pivot_df.empty:
                continue

            plt.figure(figsize=(10, len(pivot_df) * 0.8 + 2))
            # Use 'viridis_r' (reversed) or 'magma' so brighter/lighter = higher error?
            # Usually keep standard but know that Red/Low isn't necessarily bad if using coolwarm.
            # Stick to Viridis but remember high value = bad for error.
            sns.heatmap(pivot_df, annot=True, fmt=".3f", cmap="magma", linewidths=0.5)
            plt.title(f"{title} (Step {step})")
            plt.tight_layout()
            plt.show()

## 9. Focused Comparisons

### 9a. Baseline Performance across Datasets

In [None]:
if not df_summary.empty:
    # Filter for Baselines only
    baselines = df_summary[df_summary["Model Type"] == "Baseline"]
    if not baselines.empty:
        metric = "mabs_loc_acc@0.25m_2deg"
        if metric in baselines.columns:
            plot_metric_per_step(
                baselines,
                metric,
                f"Baseline Accuracy ({metric})",
                "Accuracy",
                kind="bar",
            )

### 9b. Finetuned Performance across Datasets

In [None]:
if not df_summary.empty:
    # Filter for Finetuned only
    finetuned = df_summary[df_summary["Model Type"] == "Finetuned"]
    if not finetuned.empty:
        metric = "mabs_loc_acc@0.25m_2deg"
        if metric in finetuned.columns:
            plot_metric_per_step(
                finetuned,
                metric,
                f"Finetuned Accuracy ({metric})",
                "Accuracy",
                kind="bar",
            )

## 10. Performance Over Time

In [None]:
if not df_per_pair.empty:
    datasets = df_per_pair["Dataset"].unique()
    steps = df_per_pair["Step"].unique()

    for dataset in datasets:
        for step in steps:
            subset = df_per_pair[
                (df_per_pair["Dataset"] == dataset) & (df_per_pair["Step"] == step)
            ]
            if subset.empty:
                continue

            plt.figure(figsize=(14, 6))
            sns.lineplot(
                data=subset,
                x="Frame",
                y="rel_pose_r_error",
                hue="Labels",
                style="Model Type",
                alpha=0.8,
            )
            plt.title(f"Rotation Error Trajectory - {dataset} (Step {step})")
            plt.ylabel("Rotation Error (deg)")
            plt.yscale("log")
            plt.grid(True, alpha=0.3)
            plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
            plt.tight_layout()
            plt.show()

In [None]:
df_per_pair.columns

In [None]:
if not df_per_pair.empty:
    datasets = df_per_pair["Dataset"].unique()
    steps = df_per_pair["Step"].unique()

    for dataset in datasets:
        for step in steps:
            subset = df_per_pair[
                (df_per_pair["Dataset"] == dataset) & (df_per_pair["Step"] == step)
            ]
            if subset.empty:
                continue

            plt.figure(figsize=(14, 6))
            sns.lineplot(
                data=subset,
                x="Frame",
                y="abs_loc_r_error",
                hue="Labels",
                style="Model Type",
                alpha=0.8,
            )
            plt.title(f"Rotation Error Trajectory - {dataset} (Step {step})")
            plt.ylabel("Rotation Error (deg)")
            plt.yscale("log")
            plt.grid(True, alpha=0.3)
            plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
            plt.tight_layout()
            plt.show()

            plt.figure(figsize=(14, 6))
            sns.lineplot(
                data=subset,
                x="Frame",
                y="abs_loc_t_error",
                hue="Labels",
                style="Model Type",
                alpha=0.8,
            )
            plt.title(f"Translation Error Trajectory - {dataset} (Step {step})")
            plt.ylabel("Translation Error (m)")
            plt.yscale("log")
            plt.grid(True, alpha=0.3)
            plt.legend(bbox_to_anchor=(1.02, 1), loc="upper left")
            plt.tight_layout()
            plt.show()

## 11. Per-Dataset Detailed Comparisons

In [None]:
if not df_summary.empty:
    datasets = sorted(df_summary["Dataset"].unique())
    metrics_to_show = ["mabs_loc_acc@0.25m_2deg", "mrel_pose_r_error"]

    for dataset in datasets:
        subset = df_summary[df_summary["Dataset"] == dataset]
        if subset.empty:
            continue

        # Plot 2 key metrics for this dataset
        fig, axes = plt.subplots(1, 2, figsize=(15, 6))
        fig.suptitle(f"Performance on {dataset} Dataset")

        for i, metric in enumerate(metrics_to_show):
            if metric in subset.columns:
                sns.barplot(
                    data=subset,
                    x="Step",
                    y=metric,
                    hue="Labels",
                    ax=axes[i],
                    errorbar=None,
                )
                axes[i].set_title(metric)
                axes[i].legend(bbox_to_anchor=(1.05, 1), loc="upper left")

        plt.tight_layout()
        plt.show()

## 12. Speed vs Accuracy Tradeoff

In [None]:
if (
    not df_summary.empty
    and "mabs_loc_acc@0.25m_2deg" in df_summary.columns
    and "mmatching_time" in df_summary.columns
):
    plt.figure(figsize=(10, 8))
    sns.scatterplot(
        data=df_summary,
        x="mmatching_time",
        y="mabs_loc_acc@0.25m_2deg",
        hue="Labels",
        style="Dataset",
        s=150,
        alpha=0.8,
    )
    plt.title("Speed vs Accuracy (Matching Time vs Loc Acc @ 0.25m)")
    plt.xlabel("Mean Matching Time (s)")
    plt.ylabel("Localization Accuracy (%)")
    plt.legend(bbox_to_anchor=(1.05, 1), loc="upper left")
    plt.grid(True)
    plt.tight_layout()
    plt.show()

## 5. Evaluation Completeness (Recap)

In [None]:
print("Completeness Table (Recap):")
display(completeness)