# Create a Question Answering Chat with a PDF

This is a quick template for creating a question answering chat with ChatGPT and 🦜🔗 LangChain using a PDF.

We load an example document and create an index using OpenAI text embeddings. Then, we can chat about the contents of this document.

In [None]:
%setup langchain openai chromadb pypdf

In [None]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain.chains import ConversationalRetrievalChain
from langchain.document_loaders import PyPDFLoader
from langchain.memory import ConversationBufferMemory
import urllib.request

# retrieve the "Attention Is All You Need" paper
urllib.request.urlretrieve("https://arxiv.org/pdf/1706.03762", "attention.pdf")
# retrieve "Language Models are Few-Shot Learners"
urllib.request.urlretrieve("https://arxiv.org/pdf/2005.14165v4", "gpt3.pdf")


pdfs = [
    "attention.pdf",
    "gpt3.pdf"
];

documents = []

for pdf in pdfs:
    loader = PyPDFLoader(pdf)
    docs = loader.load()
    documents.extend(docs)
    
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
documents = text_splitter.split_documents(documents)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)

In [None]:
memory = ConversationBufferMemory(memory_key="chat_history", input_key="question")
llm = ChatOpenAI(temperature=0)
# if you want GPT-4: 
# llm = ChatOpenAI(temperature=0, model_name="gpt-4")

qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory, get_chat_history=lambda inputs: inputs)