# LangChain: Q&A over Documents
An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
#pip install --upgrade langchain

In [2]:
import os
import openai
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [3]:
openai.api_type = os.getenv('OPENAI_API_TYPE')
openai.api_base = os.getenv('OPENAI_API_BASE')
openai.api_version = os.getenv('OPENAI_API_VERSION')
# #openai.api_key = os.getenv('OPENAI_API_KEY')

In [4]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [5]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [6]:
from langchain.indexes import VectorstoreIndexCreator

In [9]:
# !pip install docarray

Collecting docarray
  Downloading docarray-0.34.0-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting orjson>=3.8.2 (from docarray)
  Downloading orjson-3.9.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (136 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m137.0/137.0 kB[0m [31m6.1 MB/s[0m eta [36m0:00:00[0m
Collecting rich>=13.1.0 (from docarray)
  Downloading rich-13.4.2-py3-none-any.whl (239 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m239.4/239.4 kB[0m [31m11.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting types-requests>=2.28.11.6 (from docarray)
  Downloading types_requests-2.31.0.1-py3-none-any.whl (14 kB)
Collecting markdown-it-py>=2.2.0 (from rich>=13.1.0->docarray)
  Downloading markdown_it_py-3.0.0-py3-none-any.whl (87 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.5/8

In [12]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

TypeError: VectorstoreIndexCreator.from_loaders() got an unexpected keyword argument 'chunk_size'

In [9]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [None]:
response = index.query(query)

In [None]:
display(Markdown(response))

In [11]:
loader = CSVLoader(file_path=file)

In [12]:
docs = loader.load()

In [13]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \r\n\r\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \r\n\r\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \r\n\r\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \r\n\r\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [21]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings(chunk_size=1)

In [22]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [23]:
print(len(embed))

1536


In [24]:
print(embed[:5])

[-0.02186359278857708, 0.006734037306159735, -0.01820078119635582, -0.03919587284326553, -0.014047075994312763]


In [25]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. Operation under Azure OpenAI API version 2023-05-15 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to further increase the default rate limit..
Retrying langchain.embeddings.openai.embed_with_retry.<locals>._embed_with_retry in 4.0 seconds as it raised RateLimitError: Requests to the Get a vector representation of a given input that can be easily consumed by machine learning models and algorithms. Operation under Azure OpenAI API version 2023-05-15 have exceeded call rate limit of your current OpenAI S0 pricing tier. Please retry after 3 seconds. Please go here: https://aka.ms/oai/quotaincrease if you would like to 

In [26]:
query = "Please suggest a shirt with sunblocking"

In [27]:
docs = db.similarity_search(query)

In [28]:
len(docs)

4

In [29]:
docs[0]

Document(page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \r\n\r\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\r\n\r\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\r\n\r\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\r\n\r\nSun Protection That Won\'t Wear Off\r\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255})

In [30]:
retriever = db.as_retriever()

In [31]:
llm = ChatOpenAI(engine='gpt-4', temperature = 0.0)

                    engine was transferred to model_kwargs.
                    Please confirm that engine is what you intended.


In [32]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])

In [33]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 

In [34]:
display(Markdown(response))

| Name | Description |
|------|-------------|
| Sun Shield Shirt | This high-performance sun shirt is made of 78% nylon and 22% Lycra Xtra Life fiber, providing UPF 50+ sun protection. It is slightly fitted, falls at the hip, and is abrasion-resistant for long-lasting wear. |
| Men's Plaid Tropic Shirt, Short-Sleeve | Designed for hot weather and offering UPF 50+ sun protection, this shirt is made of 52% polyester and 48% nylon. It features front and back cape venting, two front bellows pockets, and is wrinkle-free. |
| Men's TropicVibe Shirt, Short-Sleeve | This lightweight shirt provides UPF 50+ sun protection and is made of 71% nylon and 29% polyester. It has a traditional fit, front and back cape venting, and two front bellows pockets. |
| Men's Tropical Plaid Short-Sleeve Shirt | With a UPF 50+ rating, this 100% polyester shirt is designed for hot weather. It features a traditional fit, front and back cape venting, and two front bellows pockets. |

All four shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. They are designed for comfort and breathability in hot weather, with features such as cape venting and moisture-wicking fabric.

In [35]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [36]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [37]:
response = qa_stuff.run(query)



[1m> Entering new  chain...[0m

[1m> Finished chain.[0m


In [38]:
display(Markdown(response))

| Product ID | Name                                  | Fabric Composition       | Sun Protection | Fit                   | Special Features                                  |
|------------|---------------------------------------|--------------------------|----------------|-----------------------|---------------------------------------------------|
| 618        | Men's Tropical Plaid Short-Sleeve Shirt | 100% Polyester           | UPF 50+        | Traditional Fit       | Wrinkle-resistant, front and back cape venting, two front bellows pockets |
| 374        | Men's Plaid Tropic Shirt, Short-Sleeve  | 52% Polyester, 48% Nylon | UPF 50+        | Ultracomfortable      | Wrinkle-free, quick-drying, front and back cape venting, two front bellows pockets |
| 255        | Sun Shield Shirt                       | 78% Nylon, 22% Lycra     | UPF 50+        | Slightly Fitted       | Moisture-wicking, quick-drying, abrasion-resistant |
| 535        | Men's TropicVibe Shirt, Short-Sleeve   | 71% Nylon, 29% Polyester | UPF 50+        | Traditional Fit       | Wrinkle-resistant, front and back cape venting, two front bellows pockets |

Summary:

1. Men's Tropical Plaid Short-Sleeve Shirt (ID: 618): Made of 100% polyester, this shirt offers UPF 50+ sun protection and features a traditional fit, wrinkle resistance, and front and back cape venting.

2. Men's Plaid Tropic Shirt, Short-Sleeve (ID: 374): This ultracomfortable shirt is made of 52% polyester and 48% nylon, providing UPF 50+ sun protection. It is wrinkle-free, quick-drying, and features front and back cape venting.

3. Sun Shield Shirt (ID: 255): Made of 78% nylon and 22% Lycra, this slightly fitted shirt offers UPF 50+ sun protection. It is moisture-wicking, quick-drying, and abrasion-resistant.

4. Men's TropicVibe Shirt, Short-Sleeve (ID: 535): This traditional fit shirt is made of 71% nylon and 29% polyester, providing UPF 50+ sun protection. It is wrinkle-resistant and features front and back cape venting.

In [40]:
# response = index.query(query, llm=llm)

In [None]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])