In [None]:
import json
import random
import time
from pathlib import Path

# 🔧 Configuration
QUERY_SET = [
    "space survival after disaster",
    "future war with robots",
    "romantic comedy set in New York",
    "a detective solving mysterious murders",
    "animated adventure through the jungle",
    "vampires hunting humans at night",
    "time travel gone wrong",
    "fighting for freedom in medieval times",
    "AI becomes sentient and dangerous",
    "epic historical battle scenes"
]

USER_IDS = list(range(1, 11))
TOP_K = 20
RERANK_TOP_N = 10

# 📁 Output log
log_path = Path("~\query_logs.jsonl")
log_path.write_text("")  # Clear previous logs

# 🚀 Evaluation loop
for i, query in enumerate(QUERY_SET):
    user_id = random.choice(USER_IDS)
    start = time.time()
    
    results = hybrid_reranked_search(query, user_id, top_k=TOP_K, rerank_top_n=RERANK_TOP_N)
    
    elapsed = time.time() - start
    top_titles = [hit.payload["title"] for hit, _ in results]
    top_scores = [score for _, score in results]

    record = {
        "query": query,
        "user_id": user_id,
        "latency_sec": round(elapsed, 3),
        "top_titles": top_titles,
        "rerank_scores": [round(s, 4) for s in top_scores],
        "timestamp": time.strftime("%Y-%m-%d %H:%M:%S")
    }

    # Print + append to log
    print(f"✅ [{i+1}/{len(QUERY_SET)}] \"{query}\" | Top: {top_titles[0]} | ⏱️ {elapsed:.2f}s")
    with log_path.open("a") as f:
        f.write(json.dumps(record) + "\\n")


In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import json
from pathlib import Path

# 📂 Load logs
log_path = Path("~\query_logs.jsonl")
records = [json.loads(line) for line in log_path.read_text().splitlines()]
df = pd.DataFrame(records)

# 🎯 1. Query Latency
plt.figure(figsize=(8, 4))
plt.hist(df["latency_sec"], bins=10, edgecolor="black")
plt.title("⏱️ Query Latency Distribution")
plt.xlabel("Latency (seconds)")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()

# 🎯 2. Top-1 Titles Frequency
top1_counts = df["top_titles"].apply(lambda x: x[0]).value_counts()

plt.figure(figsize=(10, 4))
top1_counts.plot(kind="bar", color="skyblue")
plt.title("🏆 Top-1 Titles Returned")
plt.xlabel("Title")
plt.ylabel("Frequency")
plt.xticks(rotation=45, ha="right")
plt.tight_layout()
plt.show()

# 🎯 3. Re-ranker Scores Distribution
all_scores = [score for scores in df["rerank_scores"] for score in scores]

plt.figure(figsize=(8, 4))
plt.hist(all_scores, bins=20, edgecolor="black", color="orange")
plt.title("📈 Re-ranker Score Distribution")
plt.xlabel("Score")
plt.ylabel("Frequency")
plt.grid(True)
plt.show()
