# Indexing Method Comparison

This notebook loads all method result files from `results/`, builds a ranking table, and shows category-level summaries.


In [None]:
from pathlib import Path
import json
import sys
import pandas as pd

repo_root = Path.cwd()
if not (repo_root / "indexing_experiments").exists():
    repo_root = repo_root.parent

if str(repo_root) not in sys.path:
    sys.path.insert(0, str(repo_root))

from indexing_experiments import METHOD_SPECS


In [None]:
results_dir = repo_root / "results"
rows = []
for path in sorted(results_dir.glob("*.json")):
    payload = json.loads(path.read_text(encoding="utf-8"))
    metrics = payload.get("metrics", {})
    rows.append(
        {
            "method_id": payload.get("method_id"),
            "method_name": payload.get("method_name"),
            "category": payload.get("category"),
            "precision_at_k": metrics.get("precision_at_k", 0.0),
            "recall_at_k": metrics.get("recall_at_k", 0.0),
            "mrr": metrics.get("mrr", 0.0),
            "ndcg_at_k": metrics.get("ndcg_at_k", 0.0),
            "chunks": payload.get("chunks", 0),
            "result_file": path.name,
        }
    )

score_df = pd.DataFrame(rows)
score_df.sort_values(["mrr", "ndcg_at_k"], ascending=False).reset_index(drop=True)


In [None]:
if len(score_df) == 0:
    print("No results found in results/. Run method notebooks first.")
else:
    category_summary = (
        score_df.groupby("category", as_index=False)[["precision_at_k", "recall_at_k", "mrr", "ndcg_at_k"]]
        .mean()
        .sort_values("mrr", ascending=False)
    )
    display(category_summary)


In [None]:
expected = {spec.method_id for spec in METHOD_SPECS}
available = set(score_df["method_id"].tolist()) if len(score_df) else set()
missing = sorted(expected - available)

print(f"Expected methods: {len(expected)}")
print(f"Available result files: {len(available)}")
print("Missing methods:")
missing
