# Langchain: Q&A over Documents

In [2]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [3]:
# account for deprecation of LLM model
import datetime
# Get the current date
current_date = datetime.datetime.now().date()

# Define the date after which the model should be set to "gpt-3.5-turbo"
target_date = datetime.date(2024, 6, 12)

# Set the model variable based on the current date
if current_date > target_date:
    llm_model = "gpt-3.5-turbo"
else:
    llm_model = "gpt-3.5-turbo-0301"

## Vector Store

In [4]:
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator

from IPython.display import display, Markdown

In [5]:
fpath = '../data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(fpath)

Default embedding = `OpenAIEmbedding`

In [45]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
).from_loaders([loader])

In [47]:
index.vectorstore.embedding

OpenAIEmbeddings(client=<class 'openai.api_resources.embedding.Embedding'>, model='text-embedding-ada-002', deployment='text-embedding-ada-002', openai_api_version=None, openai_api_base=None, openai_api_type=None, embedding_ctx_length=8191, openai_api_key=None, openai_organization=None, allowed_special=set(), disallowed_special='all', chunk_size=1000, max_retries=6, request_timeout=None, headers=None)

In [7]:
query = "Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

Default llm for query = `ChatOpenAI`

In [8]:
response = index.query(query)

In [9]:
display(Markdown(response))



| Name | Description |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | UPF 50+ rated, 100% polyester, wrinkle-resistant, front and back cape venting, two front bellows pockets |
| Men's Plaid Tropic Shirt, Short-Sleeve | UPF 50+ rated, 52% polyester and 48% nylon, machine washable and dryable, front and back cape venting, two front bellows pockets |
| Men's TropicVibe Shirt, Short-Sleeve | UPF 50+ rated, 71% Nylon, 29% Polyester, 100% Polyester knit mesh, wrinkle resistant, front and back cape venting, two front bellows pockets |
| Sun Shield Shirt by | UPF 50+ rated, 78% nylon, 22% Lycra Xtra Life fiber, wicks moisture, fits comfortably over swimsuit, abrasion resistant |

All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant. The Men's Plaid Trop

## Step by Step

### Load the documents

In [10]:
from langchain.document_loaders import CSVLoader

In [11]:
fpath = '../data/OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(fpath)

In [12]:
docs = loader.load()

In [13]:
type(docs)

list

In [14]:
type(docs[0])

langchain.schema.Document

### Embed documents

In [15]:
from langchain.embeddings import OpenAIEmbeddings

In [16]:
embeddings = OpenAIEmbeddings()

In [17]:
embed = embeddings.embed_query("Hi my name is Abhishek!")

In [18]:
print(len(embed))

1536


In [19]:
embed[:5]

[-0.010168172419071198,
 -0.007905704900622368,
 -0.012340139597654343,
 -0.025197414681315422,
 -0.0313383974134922]

### Create a Vector DB

In [25]:
from langchain.vectorstores import DocArrayInMemorySearch

In [20]:
db = DocArrayInMemorySearch.from_documents(
    docs,
    embeddings
)

### Find Similar documents to query

In [21]:
query = "Please suggest shirts with sunblocking"

In [22]:
docs = db.similarity_search(query)

In [24]:
len(docs)

4

### Pass All documents to LLM

In [27]:
from langchain.chat_models import ChatOpenAI

In [28]:
llm = ChatOpenAI(model=llm_model, temperature=0)

In [29]:
qdocs = "".join([doc.page_content for doc in docs])

In [30]:
prompt = f"""
From the following info:

{qdocs}

Question: Please list all your shirts with sun protection \
in a table in markdown and summarize each one.
"""

In [33]:
response = llm.call_as_llm(prompt)

In [34]:
display(Markdown(response))

| Shirt Name | Description |
| --- | --- |
| Sun Shield Shirt | High-performance sun shirt with UPF 50+ sun protection, made of 78% nylon and 22% Lycra Xtra Life fiber. Wicks moisture for quick-drying comfort and fits comfortably over swimsuits. Abrasion-resistant and recommended by The Skin Cancer Foundation. |
| Men's Plaid Tropic Shirt | Ultracomfortable shirt with UPF 50+ sun protection, made of 52% polyester and 48% nylon. Wrinkle-free and quickly evaporates perspiration. Features front and back cape venting and two front bellows pockets. |
| Men's TropicVibe Shirt | Men's sun-protection shirt with built-in UPF 50+ made of 71% Nylon and 29% Polyester. Wrinkle-resistant with front and back cape venting and two front bellows pockets. |
| Men's Tropical Plaid Short-Sleeve Shirt | Lightest hot-weather shirt with UPF 50+ sun protection, made of 100% polyester. Traditional fit that is relaxed through the chest, sleeve, and waist. Features front and back cape venting and two front bellows pockets. |

The Sun Shield Shirt is a high-performance sun shirt made of nylon and Lycra Xtra Life fiber with UPF 50+ sun protection. The Men's Plaid Tropic Shirt is ultracomfortable with UPF 50+ sun protection and features front and back cape venting and two front bellows pockets. The Men's TropicVibe Shirt is a sun-protection shirt with built-in UPF 50+ and front and back cape venting. The Men's Tropical Plaid Short-Sleeve Shirt is the lightest hot-weather shirt with UPF 50+ sun protection and features front and back cape venting and two front bellows pockets.

## RetrievalQA

In [36]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

In [37]:
llm = ChatOpenAI(model=llm_model, temperature=0.0)

In [38]:
retriever = db.as_retriever()

In [39]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    verbose=True
)

In [40]:
query = """Question: Please list all your shirts with sun protection \
in a table in markdown and summarize each one."""


In [41]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [43]:
display(Markdown(response))

| Shirt Name | Description |
| --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | Rated UPF 50+ for superior protection from the sun's UV rays. Made of 100% polyester and is wrinkle-resistant. With front and back cape venting that lets in cool breezes and two front bellows pockets. Provides the highest rated sun protection possible. |
| Men's Plaid Tropic Shirt, Short-Sleeve | Rated to UPF 50+, helping you stay cool and dry. Made with 52% polyester and 48% nylon, this shirt is machine washable and dryable. Additional features include front and back cape venting, two front bellows pockets and an imported design. With UPF 50+ coverage, you can limit sun exposure and feel secure with the highest rated sun protection available. |
| Sun Shield Shirt by | High-performance sun shirt is guaranteed to protect from harmful UV rays. Made of 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion-resistant for season after season of wear. |
| Men's TropicVibe Shirt, Short-Sleeve | Sun-protection shirt with built-in UPF 50+. Made of 71% Nylon, 29% Polyester. Lining: 100% Polyester knit mesh. Machine wash and dry. Additional features include wrinkle resistance, front and back cape venting, and two front bellows pockets. Provides the highest rated sun protection possible. |

All of the shirts listed provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. The Men's Tropical Plaid Short-Sleeve Shirt and Men's Plaid Tropic Shirt, Short-Sleeve are both made of wrinkle-resistant fabric and have front and back cape venting and two front bellows pockets. The Sun Shield Shirt by is made of nylon and Lycra Xtra Life fiber and is abrasion-resistant. The Men's TropicVibe Shirt, Short-Sleeve is made of nylon and polyester and is wrinkle-resistant with front and back cape venting and two front bellows pockets.