# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [None]:
#pip install --upgrade langchain

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

Note: LLM's do not always produce the same results. When executing the code in your notebook, you may get slightly different answers that those in the video.

In [2]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

In [12]:
from langchain.chains import RetrievalQA #For Retrieval over documents
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader #For loading data to combine with the model 
from langchain.vectorstores import DocArrayInMemorySearch #For storing the documents in memory
from IPython.display import display, Markdown
from langchain.llms import OpenAI
from langchain_openai import OpenAIEmbeddings


In [8]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [9]:
from langchain.indexes import VectorstoreIndexCreator

In [10]:
#pip install docarray

In [13]:
embeddings = OpenAIEmbeddings()

index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])

  warn_deprecated(


In [None]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

**Note**:
- The notebook uses `langchain==0.0.179` and `openai==0.27.7`
- For these library versions, `VectorstoreIndexCreator` uses `text-davinci-003` as the base model, which has been deprecated since 1 January 2024.
- The replacement model, `gpt-3.5-turbo-instruct` will be used instead for the `query`.
- The `response` format might be different than the video because of this replacement model.

In [None]:
llm_replacement_model = OpenAI(temperature=0, 
                               model='gpt-3.5-turbo-instruct')

response = index.query(query, 
                       llm = llm_replacement_model)

In [None]:
display(Markdown(response))

## Step By Step

In [None]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [None]:
docs = loader.load()

In [None]:
docs[0]

In [None]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [None]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [None]:
print(len(embed))

In [None]:
print(embed[:5])

In [14]:
## To create embeddings for all peices of text in the document and store them in vector store

db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

NameError: name 'docs' is not defined

In [None]:
query = "Please suggest a shirt with sunblocking"

In [None]:
# Use the vector store to find similar documents to the query
docs = db.similarity_search(query)

In [None]:
len(docs)

In [None]:
docs[0]

In [None]:
# Interface that takes in a query and returns the most similar document
retriever = db.as_retriever()

In [None]:
llm = ChatOpenAI(temperature = 0.0, model=llm_model)

In [None]:
#combine the documents into a single piece of text

qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [None]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [None]:
display(Markdown(response))

In [15]:
## All the above steps can be combined into a single step as shown below
# Retrieval QA Chain, that does retrival based on query and does QnA on the retrived documents
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

NameError: name 'llm' is not defined

In [16]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [None]:
response = qa_stuff.run(query)

In [None]:
display(Markdown(response))

In [None]:
response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

Reminder: Download your notebook to you local computer to save your work.