In [1]:
from datasets import load_dataset, Dataset
from ragas import evaluate
from ragas.metrics import context_recall, context_precision, answer_correctness
from src.retriever import get_relevant_chunks
import os

  _embeddings = HuggingFaceEmbeddings(model_name=settings.EMBEDDING_MODEL)


In [2]:
hotpot = load_dataset("hotpot_qa", "distractor", split="validation[:700]")
hotpot = hotpot.filter(lambda x: x["answer"].strip().lower() not in ["yes","no"])

In [3]:
len(hotpot)

658

In [4]:
questions = hotpot["question"]
ground_truths = hotpot["answer"]       # 正确答案
contexts_block = hotpot["context"]      # dict，有 title & sentences

In [6]:
#把每个 title+sentences 拼成一个段落，写入 documents/hotpot/*.txt
out_dir = "documents/hotpot"
os.makedirs(out_dir, exist_ok=True)

paras = []  # [(title, paragraph_str), ...]
for block in contexts_block:
    for title, sents in zip(block["title"], block["sentences"]):
        para = " ".join(sents).strip()
        paras.append((title, para))

# 去重
seen, uniq = set(), []
for title, para in paras:
    if para not in seen:
        seen.add(para)
        uniq.append((title, para))

for i, (title, para) in enumerate(uniq):
    safe = "".join(c if c.isalnum() else "_" for c in title)[:30]
    fname = f"{i:04d}_{safe}.txt"
    with open(f"{out_dir}/{fname}", "w", encoding="utf-8") as f:
        f.write(para)

print(f"Wrote {len(uniq)} paragraphs to {out_dir}")

Wrote 6480 paragraphs to documents/hotpot


In [7]:
#手搓 Top-k 检索 
contexts_list, manual_hits = [], []

for i, (q, gt) in enumerate(zip(questions, ground_truths)):
    ctxs = get_relevant_chunks(q, k=7)   # 设置k
    contexts_list.append(ctxs)
    hit = any(gt in c for c in ctxs)
    manual_hits.append(hit)
    print(f"[{i:02d}] hit={hit}  retrieved={len(ctxs)}")

print(f"\nManual Recall: {sum(manual_hits)/len(manual_hits):.3f}")

[Retriever] Intersection (交集) count: 8
[00] hit=False  retrieved=7
[Retriever] Intersection (交集) count: 32
[01] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 10
[02] hit=False  retrieved=7
[Retriever] Intersection (交集) count: 40
[03] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 11
[04] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 22
[05] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 16
[06] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 34
[07] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 19
[08] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 23
[09] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 29
[10] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 18
[11] hit=True  retrieved=7
[Retriever] Intersection (交集) count: 36
[12] hit=False  retrieved=7
[Retriever] Intersection (交集) count: 32
[13] hit=False  retrieved=7
[Retriever] Intersection (交集) count: 23
[14] hit=True  retr

In [9]:
answers = [" ".join(c) for c in contexts_list]  # 或替换为真实的 LLM 生成

In [None]:
from ragas.llms import LangchainLLMWrapper
from langchain_openai import ChatOpenAI
from config import settings
#  组装 RAGAS 数据并评估

data = {
    "question":     questions,
    "contexts":     contexts_list,
    #"answer":       answers,
    "ground_truth": ground_truths,
}
print("Data lengths:", {k: len(v) for k, v in data.items()})

eval_ds = Dataset.from_dict(data)
scores  = evaluate(
    dataset=eval_ds,
    metrics=[context_recall, context_precision],
    llm = LangchainLLMWrapper(ChatOpenAI(model=settings.CHAT_MODEL))
)
df = scores.to_pandas()

print("\n=== RAGAS 评估结果 ===")
print(df)

avg = df[["context_recall","context_precision"]].mean()
print("\n=== 平均指标 ===")
print(avg.to_string())

Data lengths: {'question': 658, 'contexts': 658, 'ground_truth': 658}


NameError: name 'ChatOpenAI' is not defined

In [10]:
#  查看第n个样例
n = 0
print(f"--- Sample {n} ---")
print("Q :", questions[n])
print("GT:", ground_truths[n])
print("Ctxs:")
for j, c in enumerate(contexts_list[n]):
    print(f"  ({j})", c[:1000], "…")
print("\nAns:", answers[n])

--- Sample 0 ---
Q : What government position was held by the woman who portrayed Corliss Archer in the film Kiss and Tell?
GT: Chief of Protocol
Ctxs:
  (0) Kiss and Tell is a 1945 American comedy film starring then 17-year-old Shirley Temple as Corliss Archer.  In the film, two teenage girls cause their respective parents much concern when they start to …
  (1) role as well as her final film appearance.  It is a sequel to the 1945 film "Kiss and Tell".  "A Kiss for Corliss" was retitled "Almost a Bride" before release and this title appears in the title …
  (2) A Kiss for Corliss is a 1949 American comedy film directed by Richard Wallace and written by Howard Dimsdale.  It stars Shirley Temple in her final starring role as well as her final film appearance. …
  (3) Nancy in "Shazzan", Penelope Pitstop, and Josie in "Josie and the Pussycats", and on radio as the title character in "Meet Corliss Archer". …
  (4) Meet Corliss Archer is an American television sitcom that aired on CBS (Ju

sample0是一个典型的 2-Hop 问答。
我们检索到了多条 “Kiss and Tell / Meet Corliss Archer”电影和角色有关的信息，
但缺少了 Shirley Temple 的政府职务。
要答对，必须先命中 Shirley Temple 的人物条目，再从条目里捞到她的政府履历。检索必须跨文档联结电影 → 人物 → 职位。

解决：
1.改写query，多路召回，比如LangChain MultiQueryRetriever
2.agent rag，让 Agent 边检索边思考：Step-1 找演员 -》 Step-2 找职位 -》 汇总
2.Graph，把文档主动转成“实体-关系图”再检索，适用于多跳。目前只有一跳，不一定要用。且查询成本很大。
