# Load index, imports, embedding functions

In [17]:
# evaluation.ipynb (示范代码)

import sys
sys.path.append('../')  # 调整为你的项目根目录，确保能导入src包

from src.embedding import embed_text
from src.search import search_faiss, search_keyword, hybrid_search
import numpy as np

# 测试查询和对应的“相关文档ID” (根据你数据实际修改)
test_queries = {
    "machine learning": [1, 5],
    "natural language processing": [2, 7],
    "deep neural networks": [0, 3],
    "information retrieval": [4, 6],
    "transformer models": [8],
    "convolutional networks": [1, 9],
    "unsupervised learning": [2, 5],
    "reinforcement learning": [0, 7],
    "transfer learning": [3, 8],
    "language models": [4, 9],
}

def recall_at_k(results, relevant_ids, k=3):
    hits = 0
    for res in results[:k]:
        doc_id = res[0] if isinstance(res, (list, tuple)) else res
        if doc_id in relevant_ids:
            hits = 1
            break
    return hits

recalls = {'vector': [], 'keyword': [], 'hybrid': []}

for query, relevant in test_queries.items():
    emb = embed_text(query)
    vec_res = search_faiss(emb, top_k=3)
    key_res = search_keyword(query, top_k=3)
    hyb_res = hybrid_search(emb, search_faiss, search_keyword, query, top_k=3)

    recalls['vector'].append(recall_at_k(vec_res, relevant))
    recalls['keyword'].append(recall_at_k(key_res, relevant))
    recalls['hybrid'].append(recall_at_k(hyb_res, relevant))

print(f"Recall@3 Vector: {np.mean(recalls['vector']):.3f}")
print(f"Recall@3 Keyword: {np.mean(recalls['keyword']):.3f}")
print(f"Recall@3 Hybrid: {np.mean(recalls['hybrid']):.3f}")


Recall@3 Vector: 0.700
Recall@3 Keyword: 0.600
Recall@3 Hybrid: 0.300
