In [3]:
# 单元格 1：环境配置

In [29]:
# 环境配置：.env 中存放 GOOGLE_API_KEY / NEO4J_URI / NEO4J_USER / NEO4J_PASSWORD
from dotenv import load_dotenv
import os, warnings
warnings.filterwarnings("ignore")

load_dotenv()
API_KEY       = os.getenv("GOOGLE_API_KEY")
NEO4J_URI     = os.getenv("NEO4J_URI")
NEO4J_USER    = os.getenv("NEO4J_USER")
NEO4J_PASSWORD= os.getenv("NEO4J_PASSWORD")
assert API_KEY and NEO4J_URI, "请检查 .env 配置！"

#import google.generativeai as genai
#genai.configure(api_key=API_KEY)

from google import genai
# Load your Gemini API key from the environment
client = genai.Client(api_key=API_KEY)

In [31]:
# 单元格 2：统一导入

In [33]:
import sys
sys.path.append("scripts")  # 加入 scripts 資料夾到 Python 路徑

# 从五个模块里导入所有函数/类
from data_extraction       import fetch_act_urls, fetch_page_text_with_headings
from knowledge_graph       import generate_triples_batched, visualize_graph, export_graphml, load_graphml, query_graph
from rag_retriever         import GeminiEmbeddings, build_vector_store, rag_similarity_search
from bm25_retriever        import split_text_into_chunks, build_bm25_retriever
from qa_pipeline           import (
    rewrite_query_for_search,
    classify_intent_llm,
    decompose_query,
    expand_query_with_llm,
    pick_diverse_queries,
    generate_answer_from_sections
)

# 其他全局依赖
import networkx as nx


In [11]:
#单元格 3：数据抓取 & 文档准备

In [15]:
# 1) 抓取所有 Archon Quest 页面 URL
urls = fetch_act_urls()
print(f"找到 {len(urls)} 个任务页面.")

# 2) 从每个页面提取带节标题的段落，组装成 Document 列表
docs = []
for u in urls:
    docs.extend(fetch_page_text_with_headings(u))
print(f"一共 {len(docs)} 个段落块.")


找到 42 个任务页面.
一共 330 个段落块.


In [16]:
#单元格 4：构建 & 可视化知识图谱

In [17]:
# 把所有文本拼一起，生成三元组
full_text = "\n\n".join(d.page_content for d in docs)
triples   = generate_triples_batched(full_text)

# NetworkX 可视化
visualize_graph(triples)

# 导出/加载示例
export_graphml(nx.DiGraph(), "genshin_graph.graphml")     # 或者直接用 generate_triples_batched 的结果构造 graph 再导出
G = load_graphml("genshin_graph.graphml")

# 查询示例
print(query_graph(G, "Paimon"))


Splitting into 34 chunk(s)...
Processing chunk 1/34
Error in chunk 1: name 'client' is not defined
Processing chunk 2/34
Error in chunk 2: name 'client' is not defined
Processing chunk 3/34
Error in chunk 3: name 'client' is not defined
Processing chunk 4/34
Error in chunk 4: name 'client' is not defined
Processing chunk 5/34
Error in chunk 5: name 'client' is not defined
Processing chunk 6/34
Error in chunk 6: name 'client' is not defined
Processing chunk 7/34
Error in chunk 7: name 'client' is not defined
Processing chunk 8/34
Error in chunk 8: name 'client' is not defined
Processing chunk 9/34
Error in chunk 9: name 'client' is not defined
Processing chunk 10/34
Error in chunk 10: name 'client' is not defined
Processing chunk 11/34
Error in chunk 11: name 'client' is not defined
Processing chunk 12/34
Error in chunk 12: name 'client' is not defined
Processing chunk 13/34
Error in chunk 13: name 'client' is not defined
Processing chunk 14/34
Error in chunk 14: name 'client' is not de

NameError: name 'clean_triples' is not defined

In [19]:
#单元格 5：构建 & 测试 RAG 向量库

In [35]:
# 1) 用五个模块输出的 docs 来 build vector store
vectorstore = build_vector_store(docs, persist_directory="./genshin_chroma")

# 2) 测试一次相似度检索
results = rag_similarity_search(vectorstore, "Paimon 和 Zhongli 的关系", k=3)
for doc in results:
    print(doc.metadata["source"], "→", doc.page_content[:200])


NameError: name 'client' is not defined

In [23]:
#单元格 6：构建 & 测试 BM25 检索器

In [25]:
# 把所有段落拆成更小的 chunk（可选）
chunks = split_text_into_chunks("\n\n".join(d.page_content for d in docs))
bm25  = build_bm25_retriever(chunks, k=3)

# 测试关键词检索
for doc in bm25.get_relevant_documents("Paimon 是谁"):
    print(doc.metadata["source"], "→", doc.page_content[:200])


chunk_264 → something had attracted the monsters. Paimon is worried, but Dainsleif states that the hut is the main investigation and thus the Traveler and Paimon set up a campfire for the night. Paimon asks about
chunk_241 → The Traveler regains consciousness atGandharva Ville, with Paimon waking them up.
chunk_273 → taking care ofLynette. He then introduces himself asLyneyand Paimon explains that Lynette was talking to them. This surprises Lyney, who explains that she usually keeps to herself. Paimon asks about L


  for doc in bm25.get_relevant_documents("Paimon 是谁"):


In [37]:
# 单元格 7：运行 Hybrid QA Pipeline

In [39]:
# 示例：回答用户问题
query     = "what's paimon's deal with zhongli?"
entity_id = "Paimon"

# 1. 重写、2. 意图分类、3. 拆解、4. 扩展、5. 选多样化
rq  = rewrite_query_for_search(query)
intent = classify_intent_llm(rq)
subs   = decompose_query(rq)
expanded = sum((expand_query_with_llm(s) for s in subs), [])
top_qs   = pick_diverse_queries(expanded, embedding_model=GeminiEmbeddings(), top_k=3)

# 6. 多源检索
kg_sects   = []
for q in top_qs:
    out = query_graph(G, entity_id)  # 或者直接用你的 GraphCypherQAChain
    kg_sects.append(str(out))
rag_sects  = "\n".join(d.page_content for q in expanded 
                      for d in rag_similarity_search(vectorstore, q, k=2))
bm25_sects = "\n".join(d.page_content for q in expanded 
                      for d in bm25.get_relevant_documents(q))

# 7. 最终答案
answer = generate_answer_from_sections(
    question=query,
    kg_section="\n".join(kg_sects),
    rag_section=rag_sects,
    bm25_section=bm25_sects
)
print(answer)


NameError: name 'client' is not defined

In [11]:
import sys
sys.path.append("src")  # 加入 scripts 資料夾到 Python 路徑

# ────────────────────────────────────────────────────────────
# 0. 環境設定
from dotenv import load_dotenv
import os, warnings
warnings.filterwarnings("ignore")
load_dotenv()

API_KEY      = os.getenv("GOOGLE_API_KEY")
NEO4J_URI    = os.getenv("NEO4J_URI")
NEO4J_USER   = os.getenv("NEO4J_USER")
NEO4J_PASS   = os.getenv("NEO4J_PASSWORD")
assert API_KEY and NEO4J_URI

# 1. 匯入模組
from data_extraction    import fetch_wiki_text, fetch_multiple_wiki_texts, fetch_act_urls
from triple_extraction  import generate_triples_batched, Triple
from kg_builder         import build_graph, export_graphml, triples_to_csv
from rag_builder        import build_rag_vectorstore
from bm25_builder       import build_bm25_retriever
from qa_pipeline        import init_gemini_llm, build_kg_qa_chain
from utils              import rewrite_query_for_search, decompose_query, expand_query_with_llm, pick_diverse_queries

import google.generativeai as genai
# 初始化 Gemini client
genai.configure(api_key=API_KEY)

# 2. 資料擷取
urls = fetch_act_urls()
raw_text = "\n\n".join(fetch_wiki_text(u) for u in urls)

# 3. 三元組擷取
from google import genai as client  # or client = genai.Client(api_key=API_KEY)
full_text = raw_text  # or fetch_multiple_wiki_texts(urls)
triples = generate_triples_batched(full_text, client, model="models/gemini-1.5-pro")

# 4. 建圖與匯出
G = build_graph(triples)
export_graphml(G, "genshin_story_kg.graphml")
triples_to_csv(triples, "all_nodes.csv", "all_edges.csv")

# 5. 建立 RAG 與 BM25
vectorstore = build_rag_vectorstore(urls, client, "./genshin_chroma")
bm25 = build_bm25_retriever(raw_text)

# 6. 問答 Pipeline
GeminiLLM = init_gemini_llm(API_KEY)
qa_chain = build_kg_qa_chain(NEO4J_URI, NEO4J_USER, NEO4J_PASS, GeminiLLM, top_k=5)

# 7. 執行範例查詢
query = "what's paimon's deal with zhongli?"
q0 = rewrite_query_for_search(query)
subs = decompose_query(q0)
exps = sum([expand_query_with_llm(s) for s in subs], [])
topq = pick_diverse_queries(exps, vectorstore.embedding, top_k=3)

# KG 回答
kg_answers = [ qa_chain.invoke({"query": q, "id":"Paimon"}) for q in topq ]

# RAG & BM25 檢索
rag_docs = sum([ vectorstore.similarity_search(q, k=3) for q in exps ], [])
bm25_docs = sum([ bm25.get_relevant_documents(q) for q in exps ], [])

# 8. 結果整合、生成最終回答…（可再呼叫 utils.generate_answer_from_sections）


AttributeError: module 'google.genai.models' has no attribute 'generate_content'