# LangChain: Q&A over Documents

In [1]:
import os

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

In [2]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch  # Deprecated
# from langchain_openai import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.llms import OpenAI

In [3]:
file = 'l4_outdoor_clothing_catalog_1000.csv'
loader = CSVLoader(file_path=file)  # Initialize a csv loader

In [4]:
# Install sqlalchemy: required in order to import VectorstoreIndexCreator
# %pip install --upgrade --force-reinstall sqlalchemy

In [5]:
# Import an index
from langchain.indexes import VectorstoreIndexCreator

In [6]:
# Install DocArray
# %pip install docarray

In [7]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])


  warn_deprecated(


In [8]:
query = "Please list all your shirt with sun protection in a table in markdown and summarize each one"

In [9]:
response = index.query(query)

In [10]:
display(Markdown(response))



| Name | Description | Sun Protection Rating |
| --- | --- | --- |
| Men's Tropical Plaid Short-Sleeve Shirt | Made of 100% polyester, UPF 50+ rating, wrinkle-resistant, front and back cape venting, two front bellows pockets | SPF 50+ |
| Men's Plaid Tropic Shirt, Short-Sleeve | Made of 52% polyester and 48% nylon, UPF 50+ rating, SunSmart technology, wrinkle-free, front and back cape venting, two front bellows pockets | SPF 50+ |
| Sun Shield Shirt | Made of 78% nylon and 22% Lycra Xtra Life fiber, UPF 50+ rating, moisture-wicking, abrasion-resistant, fits over swimsuit | SPF 50+ |
| Men's TropicVibe Shirt, Short-Sleeve | Made of 71% nylon and 29% polyester, UPF 50+ rating, wrinkle-resistant, front and back cape venting, two front bellows pockets | SPF 50+ |

Each of these shirts offers UPF 50+ sun protection, blocking 98% of the sun's harmful rays. They are all made of

## Step By Step

In [11]:
from langchain.document_loaders import CSVLoader

# Create Document loader
loader = CSVLoader(file_path=file)

In [12]:
# Load document
docs = loader.load()

In [13]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'l4_outdoor_clothing_catalog_1000.csv', 'row': 0})

In [14]:
# Create embeddings using OpenAI's embedding class
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings()

In [15]:
embed = embeddings.embed_query("Hi my name is Hassane")

In [16]:
print(len(embed))

1536


In [17]:
print(embed[:5])

[-0.019437601216748666, 0.01022392294959846, -0.018631964372620626, -0.023453000241461477, -0.005351734655516281]


In [18]:
len(docs)

1000

In [20]:
docs_750 = docs[:750]

[Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'l4_outdoor_clothing_catalog_1000.csv', 'row': 0}),
 Document(page_content=': 1\nname: Recycled Waterhog Dog Mat, Chevron Weave\ndescription: Protect your floors from spil

In [None]:
len(docs_750)

In [21]:
# Create embeddings for all pieces of text just loaded and store them in a vectore store
# using the `from_documents` method on the vector stor
db = DocArrayInMemorySearch.from_documents(
    docs_750,  # docs has 1000 element which require more than 150000 TPM (tohen per minute)
    # In free trial mode, the token limits for `text-embedding-3-small` model is 150000 TPM
    embeddings
)

In [22]:
query = "Please suggest a shirt with sunblocking"

In [23]:
docs_750 = db.similarity_search(query)

In [24]:
len(docs_750)

4

In [25]:
docs_750[0]

Document(page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'source': 'l4_outdoor_clothing_catalog_1000.csv', 'row': 255})

In [26]:
# Create a retriever
retriever = db.as_retriever()

In [27]:
# LLM model
llm = ChatOpenAI(temperature=0.0)

In [28]:
qdocs = "".join([docs[i].page_content for i in range(len(docs_750))])

In [29]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your shirts with sun \
protection in a table in markdown and summarize each one.") 

# call_as_llm is deprecated, use invoke
# response = llm.invoke(f"{qdocs} Question: Please list all your shirts with sun \
# protection in a table in markdown and summarize each one.") 

  warn_deprecated(


In [30]:
display(Markdown(response))

| Name                                    | Summary                                                                                                                                                                                                                   |
|-----------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| Refresh Swimwear, V-Neck Tankini Contrasts | This tankini top is designed for watersports, made with recycled nylon and Lycra spandex for stretch. It offers UPF 50+ sun protection and features racerback straps for easy wear. |
| Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece | A toddler's two-piece swimsuit with bright colors, ruffles, and exclusive prints. Made with four-way-stretch, chlorine-resistant fabric and UPF 50+ sun protection. |
| Women's Campside Oxfords                | Comfortable lace-to-toe Oxford shoes made of soft canvas with cushioning and support. Features antimicrobial odor control and a vintage motif on the innersole.          |
| Recycled Waterhog Dog Mat, Chevron Weave | An ultradurable dog mat made from recycled materials, designed to keep dirt and water off floors. Features thick and thin fibers for dirt absorption and water retention.   |

In [31]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [32]:
query =  "Please list all your shirts with sun protection in a table in markdown and summarize each one."

In [33]:
response = qa_stuff.run(query)
# response = qa_stuff.invoke(query)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m



[1m> Finished chain.[0m


In [None]:
len(response)

In [34]:
display(Markdown(response))

| Shirt Name                                | Description                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                

In [35]:
response = index.query(query, llm=llm)

In [36]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

RateLimitError: Error code: 429 - {'error': {'message': 'Rate limit reached for text-embedding-ada-002 in organization org-OQAm0D5NGQg76QasCmf9Cfrn on requests per min (RPM): Limit 3, Used 3, Requested 1. Please try again in 20s. Visit https://platform.openai.com/account/rate-limits to learn more. You can increase your rate limit by adding a payment method to your account at https://platform.openai.com/account/billing.', 'type': 'requests', 'param': None, 'code': 'rate_limit_exceeded'}}