# Graph RAG
使用知识图片来增强检索

In [5]:
import os
from dotenv import load_dotenv

# 加载 .env 文件中的环境变量
load_dotenv(override=True)  # 使用 override=True 确保加载最新的 .env 数据

True

In [6]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(
    model=os.environ.get("OPENAPI_MODEL"),
    base_url=os.environ.get("OPENAPI_API_BASE"),
    api_key=os.environ.get("OPENAPI_API_KEY"),
    temperature=0,
)

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings_model = OpenAIEmbeddings(
    model="BAAI/bge-m3",
    # https://api.siliconflow.cn/v1/embeddings
    base_url=os.environ.get("SILICONFLOW_API_BASE"),
    api_key=os.environ.get("SILICONFLOW_API_KEY"),
)


In [4]:
# 数据准备
! pip install graph_rag_example_helpers
! pip install "langchain-graph-retriever[chroma]"

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting langchain-graph-retriever[chroma]
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/5c/b9/347f2bb6d50f7733c7b78b84ce872c3cc4b64bbfa479bc7f367a80b48957/langchain_graph_retriever-0.8.0-py3-none-any.whl (33 kB)
Collecting networkx>=3.4.2 (from langchain-graph-retriever[chroma])
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/eb/8d/776adee7bbf76365fdd7f2552710282c79a4ead5d2a46408c9043a2b70ba/networkx-3.5-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m944.6 kB/s[0m  [33m0:00:02[0m eta [36m0:00:01[0m
Installing collected packages: networkx, langchain-graph-retriever
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2/2[0m [langchain-graph-retriever]
[1A[2KSuccessfully installed langchain-graph-retriever-0.8.0 networkx-3.5


In [None]:
# 下载测试文档
from graph_rag_example_helpers.datasets.animals import fetch_documents

animals = fetch_documents()

ConnectionError: ('Connection aborted.', ConnectionResetError(104, 'Connection reset by peer'))

简单处理下数据

In [None]:
for doc in animals:
    keys_to_delete = []
    for key, value in doc.etadata.items():
        if isinstance(value, dict):
            keys_to_delete.append(key)
    for key in keys_to_delete:
        del doc.metadata[key]

print(animals)

NameError: name 'animals' is not defined

创建一个向量数据库测试

In [None]:
from langchain_chroma.vectorstores import Chroma
from langchain_graph_retriever.transformers import ShreddingTransformer

vector_store = Chroma.from_documents(
    documents=list(ShreddingTransformer().transform_documents(animals)),
    embedding=embeddings_model,
    collection_name="animals",
)

创建一个基于知识图谱的检索，在这个检索里检索器从与查询最匹配的单个动物开始，然后遍历到具有相同习性或来源的动物

In [None]:
from graph_retriever.strategies import Eager
from langchain_graph_retriever import GraphRetriever

traversal_retriever = GraphRetriever(
    store=vector_store,
    edges=[("habitat", "habitat"), ("origin", "origin")],
    strategy=Eager(k=5, start_k=1, max_depth=2),  # 查询策略 返回5 最大深度2级
)

# 查询
results = traversal_retriever.invoke("what animals could be found near acapybara?")
for doc in results:
    print(f"{doc.id}: {doc.page_content}")

整合到链中

In [None]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_template(
    """
    Answer the question based only on the context provided.
    
    Context:{context}
    
    Question: {question}
    the final answer should·be use chinese
    """
)


def format_docs(docs):
    return "\n\n".join(
        f"text: {doc.page_content} metadata: {doc.metadata}" for doc in docs
    )


chain = (
    {"context": traversal_retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

chain.invoke("what animals could be found near acapybara?")