# Document Question Answering

An example of using Chroma DB and LangChain to do question answering over documents.

In [2]:
!pip install langchain
!pip install -U langchain-openai
!pip install chromadb
!pip install llama-index

Collecting langchain
  Downloading langchain-0.1.20-py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.1 MB/s[0m eta [36m0:00:00[0m
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain)
  Downloading dataclasses_json-0.6.6-py3-none-any.whl (28 kB)
Collecting langchain-community<0.1,>=0.0.38 (from langchain)
  Downloading langchain_community-0.0.38-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m50.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-core<0.2.0,>=0.1.52 (from langchain)
  Downloading langchain_core-0.1.52-py3-none-any.whl (302 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m302.9/302.9 kB[0m [31m27.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting langchain-text-splitters<0.1,>=0.0.1 (from langchain)
  Downloading langchain_text_splitters-0.0.1-py3-none-any.whl (21 kB)
Collecting langsmith<0.2.0,>=0.1.17 (from langc

In [3]:
OPEN_AI_API_KEY="API_KEY_HERE"
LANGCHAIN_API_KEY="API_KEY_HERE"

In [4]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import VectorDBQA
from langchain.document_loaders import TextLoader

## Load documents

Load documents to do question answering over. If you want to do this over your documents, this is the section you should replace.

In [5]:
loader = TextLoader('input.txt')
documents = loader.load()

## Split documents

Split documents into small chunks. This is so we can find the most relevant chunks for a query and pass only those into the LLM.

In [6]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_documents(documents)

## Initialize ChromaDB

Create embeddings for each chunk and insert into the Chroma vector database.

In [9]:
import os

os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_ENDPOINT"] =url = "https://api.langchain.plus/info"
os.environ['OPEN_AI_API_KEY']=OPEN_AI_API_KEY
os.environ['LANGCHAIN_API_KEY']=LANGCHAIN_API_KEY


embeddings = OpenAIEmbeddings(api_key=OPEN_AI_API_KEY)
vectordb = Chroma.from_documents(texts, embeddings)

  warn_deprecated(


## Create the chain

Initialize the chain we will use for question answering.

In [12]:
qa = VectorDBQA.from_chain_type(llm=OpenAI(api_key=OPEN_AI_API_KEY), chain_type="stuff", vectorstore=vectordb)



## Ask questions!

Now we can use the chain to ask questions!

In [13]:
query = "What did the president say about Ketanji Brown Jackson"
qa.run(query)

  warn_deprecated(


' The president mentioned that he nominated Ketanji Brown Jackson for the United States Supreme Court and praised her qualifications and support from both Democrats and Republicans.'

In [14]:
query = "How are democracies responding to the current global challenges, and what does this suggest about the prevailing global sentiment towards peace and security?"
qa.run(query)

' Democracies are rising to the occasion and showing unity in the face of global challenges, such as the conflict in Ukraine. This suggests that there is a prevailing sentiment towards peace and security, with people around the world, even in Russia, coming together to support Ukraine and reject aggression from autocratic leaders like Putin. This also highlights the importance of alliances, like NATO, in promoting and defending peace and stability in the world.'