# LangChain: Q&A Over Documents

In [1]:
import os
from openai import OpenAI

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file
openai_api_key = os.environ['OPENAI_API_KEY']

client = OpenAI(api_key=openai_api_key)

MODEL = "gpt-4o-mini"

In [13]:
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI, OpenAIEmbeddings, OpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
# from langchain.llms import OpenAI
from langchain.embeddings import OpenAIEmbeddings


In [8]:
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [9]:
from langchain.indexes import VectorstoreIndexCreator
embeddings = OpenAIEmbeddings()  

In [10]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings
).from_loaders([loader])



In [11]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [17]:
llm_replacement_model = ChatOpenAI(temperature=0, 
                               model='gpt-4o')

response = index.query(query, 
                       llm = llm_replacement_model)

In [18]:
display(Markdown(response))

Here is a table summarizing the shirts with sun protection:

| Name                                      | Summary                                                                                           |
|-------------------------------------------|---------------------------------------------------------------------------------------------------|
| Men's Tropical Plaid Short-Sleeve Shirt   | A lightweight, wrinkle-resistant shirt with UPF 50+ sun protection, made of 100% polyester. Features front and back cape venting and two front bellows pockets. Imported. |
| Men's Plaid Tropic Shirt, Short-Sleeve    | Designed for fishing, this shirt offers UPF 50+ coverage, is wrinkle-free, and quickly evaporates perspiration. Made of 52% polyester and 48% nylon, it includes front and back cape venting and two front bellows pockets. Imported. |
| Men's TropicVibe Shirt, Short-Sleeve      | A lightweight shirt with UPF 50+ sun protection, made of 71% nylon and 29% polyester. Features front and back cape venting and two front bellows pockets. Imported. |
| Sun Shield Shirt                          | A high-performance sun shirt with UPF 50+ protection, made of 78% nylon and 22% Lycra Xtra Life fiber. It wicks moisture, is abrasion-resistant, and fits comfortably over swimsuits. Handwash and line dry. Imported. |

Each shirt provides the highest rated sun protection possible, blocking 98% of the sun's harmful rays.

### Step by Step

In [19]:
from langchain.document_loaders import CSVLoader
loader = CSVLoader(file_path=file)

In [20]:
docs = loader.load()

In [21]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0}, page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.")

In [22]:
from langchain_openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [23]:
embed = embeddings.embed_query("Hi my name is Shankar")

In [24]:
print(len(embed))

1536


In [25]:
print(embed[:10])

[-0.001580761861987412, -0.012852772139012814, -0.005415603052824736, -0.03133751451969147, -0.018613914027810097, 0.015630004927515984, -0.023083321750164032, -0.016960492357611656, -0.009093821048736572, -0.005095898173749447]


In [26]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [27]:
query = "Please suggest a shirt with sunblocking"
docs = db.similarity_search(query)
len(docs)

4

In [28]:
docs[0]

Document(metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 255}, page_content=': 255\nname: Sun Shield Shirt by\ndescription: "Block the sun, not the fun – our high-performance sun shirt is guaranteed to protect from harmful UV rays. \n\nSize & Fit: Slightly Fitted: Softly shapes the body. Falls at hip.\n\nFabric & Care: 78% nylon, 22% Lycra Xtra Life fiber. UPF 50+ rated – the highest rated sun protection possible. Handwash, line dry.\n\nAdditional Features: Wicks moisture for quick-drying comfort. Fits comfortably over your favorite swimsuit. Abrasion resistant for season after season of wear. Imported.\n\nSun Protection That Won\'t Wear Off\nOur high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun\'s harmful rays. This fabric is recommended by The Skin Cancer Foundation as an effective UV protectant.')

In [29]:
retriever = db.as_retriever()

In [31]:
llm = ChatOpenAI(temperature = 0.0, model=MODEL)

In [32]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [34]:
response = llm.invoke(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [36]:
display(Markdown(response.content))

Here’s a summary of the shirts with sun protection in a markdown table format:

| Name                                   | Description Summary                                                                                                                                                                                                                     |
|----------------------------------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| **Sun Shield Shirt**                  | High-performance sun shirt with UPF 50+ protection, blocking 98% of UV rays. Slightly fitted, made of 78% nylon and 22% Lycra Xtra Life fiber. Features moisture-wicking, abrasion resistance, and is comfortable over swimsuits.               |
| **Men's Plaid Tropic Shirt, Short-Sleeve** | Lightweight, UPF 50+ rated shirt originally designed for fishing. Made of 52% polyester and 48% nylon, it is wrinkle-free and evaporates perspiration quickly. Features cape venting and two front bellows pockets for added comfort.         |
| **Men's TropicVibe Shirt, Short-Sleeve** | Lightweight sun-protection shirt with UPF 50+ rating. Traditional fit, made of 71% nylon and 29% polyester. Features include wrinkle resistance, cape venting, and two front bellows pockets. Machine washable and dryable.                     |
| **Men's Tropical Plaid Short-Sleeve Shirt** | Lightest hot-weather shirt with UPF 50+ protection, made of 100% polyester. Traditional fit with front and back cape venting and two front bellows pockets. Provides high-performance sun protection, blocking 98% of harmful rays.               |

This table summarizes the key features and benefits of each shirt, highlighting their sun protection capabilities.

In [37]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [38]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [40]:
response = qa_stuff.invoke(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [46]:
display(Markdown(response["result"]))

Here is a table listing all the shirts with sun protection along with a summary of each:

| Name                                      | Description Summary                                                                                          |
|-------------------------------------------|--------------------------------------------------------------------------------------------------------------|
| Men's Tropical Plaid Short-Sleeve Shirt   | Lightweight, UPF 50+ rated for sun protection, made of 100% polyester, wrinkle-resistant, with cape venting and two front pockets. |
| Men's Plaid Tropic Shirt, Short-Sleeve    | Designed for fishing, UPF 50+ coverage, made of 52% polyester and 48% nylon, wrinkle-free, evaporates perspiration, with cape venting and two front pockets. |
| Men's TropicVibe Shirt, Short-Sleeve      | Lightweight, UPF 50+ rated, made of 71% nylon and 29% polyester, wrinkle-resistant, with cape venting and two front pockets. |
| Sun Shield Shirt                          | Slightly fitted, UPF 50+ rated, made of 78% nylon and 22% Lycra Xtra Life fiber, moisture-wicking, abrasion resistant, recommended by The Skin Cancer Foundation. |

In [47]:
response = index.query(query, llm=llm)

In [48]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])