In [None]:
import pandas as pd
from pathlib import Path
from collections import defaultdict
from tqdm import tqdm

from retrieval.search import search

# =========================================================
# CONFIG
# =========================================================
BASE_DIR = Path(__file__).resolve().parents[1]

EXCEL_FILE = BASE_DIR / "data" / "train_test_data" / "Gen_AI Dataset (1).xlsx"
TRAIN_SHEET = "train-set"

OUTPUT_CSV = BASE_DIR / "data" / "retrieval_eval.csv"

K_VALUES = [10, 20, 30, 50]


# =========================================================
# LOAD TRAIN DATA
# =========================================================
print("ðŸ”¹ Loading training dataset...")

df = pd.read_excel(EXCEL_FILE, sheet_name=TRAIN_SHEET)

# Expecting columns: query, assessment_url
df.columns = [c.strip().lower() for c in df.columns]

assert "query" in df.columns
assert "assessment_url" in df.columns

# Group URLs per query
query_to_urls = defaultdict(set)
for _, row in df.iterrows():
    query_to_urls[row["query"].strip()].add(row["assessment_url"].strip())

queries = list(query_to_urls.items())

print(f"ðŸ”¹ Unique queries: {len(queries)}")


# =========================================================
# EVALUATION
# =========================================================
results = []

print("ðŸ”¹ Running Phase-2 recall evaluation...")

for query, true_urls in tqdm(queries):
    retrieved = search(query)

    retrieved_urls = [
        r["assessment_id"] for r in retrieved
    ]

    row = {
        "query": query,
        "num_relevant": len(true_urls),
    }

    for k in K_VALUES:
        top_k = retrieved_urls[:k]
        hit = any(url in top_k for url in true_urls)
        row[f"recall@{k}"] = int(hit)

    results.append(row)


# =========================================================
# AGGREGATE METRICS
# =========================================================
eval_df = pd.DataFrame(results)

summary = {
    f"recall@{k}": eval_df[f"recall@{k}"].mean()
    for k in K_VALUES
}

print("\nðŸ”¹ Phase-2 Recall Summary")
for k, v in summary.items():
    print(f"{k}: {v:.3f}")


# =========================================================
# SAVE RESULTS
# =========================================================
eval_df.to_csv(OUTPUT_CSV, index=False)
print(f"\nâœ… Saved evaluation results to: {OUTPUT_CSV}")
