In [None]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai.api_key = os.environ['OPENAI_API_KEY']
file =  os.environ["FILE_PATH"]

In [None]:
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma

In [None]:
loader = TextLoader(file+"sample.txt")
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

embeddings =OpenAIEmbeddings()
docserach = Chroma.from_documents(texts, embeddings)

qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docserach.as_retriever())

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

### 换用map_reduce 类型的chain:

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="map_reduce", retriever=docsearch.as_retriever())
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

### 也可以使用下面这种更模块化的方法：

In [None]:
from langchain.chains.question_answering import load_qa_chain
qa_chain = load_qa_chain(OpenAI(temperature=0), chain_type="stuff")
qa = RetrievalQA(combine_documents_chain=qa_chain, retriever=docsearch.as_retriever())

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

### 还可以将chain中默认的prompt做修改：

In [None]:
from langchain.prompts import PromptTemplate
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.

{context}

Question: {question}
Answer in Italian:"""
PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

### 用chain_type_kwargs来替换原来的prompt:

In [None]:
chain_type_kwargs = {"prompt": PROMPT}
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), chain_type_kwargs=chain_type_kwargs)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

### Return Source Documents: 通过在构造链时指定一个可选参数，我们可以返回用于回答问题的源文档。

In [None]:
qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever(), return_source_documents=True)

In [None]:
query = "What did the president say about Ketanji Brown Jackson"
result = qa({"query": query})

In [None]:
result["result"]

#### 或者，如果我们的文档有一个“source”元数据键，我们可以使用RetrievalQAWithSourceChain来引用我们的源:

In [None]:
docsearch = Chroma.from_texts(texts, embeddings, metadatas=[{"source": f"{i}-pl"} for i in range(len(texts))])

In [None]:
from langchain.chains import RetrievalQAWithSourcesChain
from langchain import OpenAI

chain = RetrievalQAWithSourcesChain.from_chain_type(OpenAI(temperature=0), chain_type="stuff", retriever=docsearch.as_retriever())

In [None]:
chain({"question": "What did the president say about Justice Breyer"}, return_only_outputs=True)