In [4]:
from dotenv import load_dotenv, find_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings

# read local .env file
# find_dotenv 函数通常用于搜索和定位目录树中的 .env 文件，而 load_dotenv 函数用于将 .env 文件中的变量加载到环境中。
_ = load_dotenv(find_dotenv())

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.3)
embeddings = OpenAIEmbeddings()

In [2]:
"""
在文档中聊天，带有聊天记录
本教程演示了如何使用ConversationalRetrievalChain设置聊天过程中带有聊天历史的链。与RetrievalQAChain唯一的区别是，这个链允许传入聊天历史，以便进行后续提问。
"""
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain


In [3]:
"""
加载文档。您可以将其替换为您想要的任何类型的数据加载程序
"""
from langchain.document_loaders import TextLoader

loader = TextLoader("files/state_of_the_union.txt")
documents = loader.load()


In [None]:
"""
如果您有多个加载程序需要组合，可以执行以下操作：
"""
# loaders = [....]
# docs = []
# for loader in loaders:
# docs.extend(loader.load())


In [5]:
"""
现在我们将文档拆分、创建嵌入并将它们放入向量存储中。这使我们可以在它们之间进行语义搜索。
"""
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)

vectorstore = Chroma.from_documents(documents, embeddings)


In [6]:
"""
现在我们可以创建一个内存对象，用于跟踪输入/输出并进行对话。
"""
from langchain.memory import ConversationBufferMemory

memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)


In [7]:
"""
现在我们初始化ConversationalRetrievalChain
"""
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)


In [8]:
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query})


In [9]:
result["answer"]


"The President said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned that she has received broad support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."

In [None]:
""" output
"The President said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned that she has received broad support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."
"""

In [10]:
query = "Did he mention who she suceeded"
result = qa.invoke(input={"question": query})


In [11]:
result["answer"]


"The user's question is unclear. If they are referring to President Biden's mention of a successor to Justice Breyer on the Supreme Court, he did not mention a specific individual as a successor in the provided context. However, he did mention his recent nomination of Judge Ketanji Brown Jackson to the Circuit Court of Appeals, highlighting her qualifications and broad support."

In [None]:
""" output
"The user's question is unclear. If they are referring to President Biden's mention of a successor to Justice Breyer on the Supreme Court, he did not mention a specific individual as a successor in the provided context. However, he did mention his recent nomination of Judge Ketanji Brown Jackson to the Circuit Court of Appeals, highlighting her qualifications and broad support."
"""

In [None]:
"""
传入聊天历史

在上面的示例中，我们使用Memory对象来跟踪聊天历史。我们也可以直接传递它。为此，我们需要初始化一个没有任何内存对象的链。

"""
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())


In [12]:
"""
这里是一个没有聊天记录的提问示例
"""
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query, "chat_history": chat_history})


In [13]:
result["answer"]


'The president mentioned Circuit Court of Appeals Judge Ketanji Brown Jackson as a successor to Justice Breyer on the Supreme Court.'

In [16]:
"""
这里是一个有一些聊天记录的提问示例

"""
chat_history = [(query, result["answer"])]
query = "Did he mention who she suceeded"
result = qa.invoke(input={"question": query, "chat_history": chat_history})


In [17]:
result['answer']


'The president mentioned Circuit Court of Appeals Judge Ketanji Brown Jackson as the successor to Justice Breyer on the Supreme Court.'

In [None]:
""" output
'The president mentioned Circuit Court of Appeals Judge Ketanji Brown Jackson as the successor to Justice Breyer on the Supreme Court.'
"""

In [18]:
""" 
返回源文件

您还可以轻松地从ConversationalRetrievalChain返回源文档。这对于您想要检查哪些文档被返回时非常有用。

"""
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)


In [20]:
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query, "chat_history": chat_history})


In [21]:
result['source_documents'][0]


Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'files/state_of_the_union.txt'})

In [None]:
""" output
Document(page_content='Tonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.', metadata={'source': 'files/state_of_the_union.txt'})
"""

In [22]:
"""
带有search_distance的ConversationalRetrievalChain

如果您正在使用支持按搜索距离过滤的向量存储，则可以添加阈值参数。


"""
vectordbkwargs = {"search_distance": 0.9}

qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), return_source_documents=True)
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query, "chat_history": chat_history, "vectordbkwargs": vectordbkwargs})


In [23]:
result['answer']

"The President said that he nominated Ketanji Brown Jackson for the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned her background as a former top litigator in private practice and a former federal public defender. He also highlighted her support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."

In [None]:
""" output
"The President said that he nominated Ketanji Brown Jackson for the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned her background as a former top litigator in private practice and a former federal public defender. He also highlighted her support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."
"""

In [24]:
"""
带有map_reduce的ConversationalRetrievalChain

我们还可以使用不同类型的组合文档链与ConversationalRetrievalChain链。

"""
from langchain.chains import LLMChain
from langchain.chains.question_answering import load_qa_chain
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT


In [25]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(llm, chain_type="map_reduce")

chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)


In [26]:
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain.invoke(input={"question": query, "chat_history": chat_history})


In [27]:
result['answer']


'The president did not mention Ketanji Brown Jackson in the provided text.'

In [None]:
""" output
'The president did not mention Ketanji Brown Jackson in the provided text.'
"""

In [28]:
"""
带有源问题的ConversationalRetrievalChain问答

你也可以将这个链与带有来源的问答链一起使用。

"""
from langchain.chains.qa_with_sources import load_qa_with_sources_chain


In [29]:
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_with_sources_chain(llm, chain_type="map_reduce")

chain = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(),
    question_generator=question_generator,
    combine_docs_chain=doc_chain,
)


In [30]:
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = chain.invoke(input={"question": query, "chat_history": chat_history})


In [31]:
result['answer']


"The president mentioned Ketanji Brown Jackson and nominated her as a Circuit Court of Appeals Judge. He described her as one of our nation's top legal minds who will continue Justice Breyer's legacy of excellence.\nSOURCES: files/state_of_the_union.txt"

In [None]:
""" output
"The president mentioned Ketanji Brown Jackson and nominated her as a Circuit Court of Appeals Judge. He described her as one of our nation's top legal minds who will continue Justice Breyer's legacy of excellence.\nSOURCES: files/state_of_the_union.txt"
"""

In [32]:
"""
ConversationalRetrievalChain 与流式输出到 stdout

在这个例子中，链的输出会被逐个 token 地流式输出到 stdout。

"""
from langchain.chains.llm import LLMChain
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
from langchain.chains.question_answering import load_qa_chain

# Construct a ConversationalRetrievalChain with a streaming llm for combine docs
# and a separate, non-streaming llm for question generation
streaming_llm = ChatOpenAI(streaming=True, callbacks=[StreamingStdOutCallbackHandler()], temperature=0)

question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
doc_chain = load_qa_chain(streaming_llm, chain_type="stuff", prompt=QA_PROMPT)

qa = ConversationalRetrievalChain(
    retriever=vectorstore.as_retriever(), combine_docs_chain=doc_chain, question_generator=question_generator)


In [34]:
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query, "chat_history": chat_history})


The president said that Ketanji Brown Jackson is a former top litigator in private practice, a former federal public defender, and comes from a family of public school educators and police officers. They also mentioned that she is a consensus builder and has received support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.

In [None]:
""" output
The president said that Ketanji Brown Jackson is a former top litigator in private practice, a former federal public defender, and comes from a family of public school educators and police officers. They also mentioned that she is a consensus builder and has received support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans.
"""

In [35]:
chat_history = [(query, result["answer"])]
query = "Did he mention who she suceeded"
result = qa.invoke(input={"question": query, "chat_history": chat_history})


The president did not mention her successor in the given context.

In [None]:
""" output
The president did not mention her successor in the given context.
"""

In [36]:
"""
get_chat_history 函数
你也可以指定一个 get_chat_history 函数，用于格式化聊天历史字符串。

"""


def get_chat_history(inputs) -> str:
    res = []
    for human, ai in inputs:
        res.append(f"Human:{human}\nAI:{ai}")
    return "\n".join(res)


qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), get_chat_history=get_chat_history)


In [37]:
chat_history = []
query = "What did the president say about Ketanji Brown Jackson"
result = qa.invoke(input={"question": query, "chat_history": chat_history})
 

In [38]:
result['answer']


"The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned that she has received broad support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."

In [None]:
""" output
"The president said that he nominated Circuit Court of Appeals Judge Ketanji Brown Jackson to serve on the United States Supreme Court. He described her as one of the nation's top legal minds and mentioned that she has received broad support from various groups, including the Fraternal Order of Police and former judges appointed by Democrats and Republicans."
"""