# LangChain: Q&A over Documents

An example might be a tool that would allow you to query a product catalog for items of interest.

In [1]:
#pip install --upgrade langchain

In [51]:
import os
import langchain

from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) # read local .env file

In [53]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [61]:
from langchain.document_loaders import PyPDFLoader

In [64]:
file = '5FerndaleValution.pdf'
loader = PyPDFLoader(file)
pages = loader.load_and_split()

In [65]:
pages[0]

Document(page_content='Mr\xa0Dan\xa0Cobley\n5,\xa0Ferndale\xa0Close\nTilehurst\nReading\nRG31\xa06UZ\n\xa0\n28th\xa0July\xa02023\nDear\xa0Mr\xa0Cobley,\nRe:\xa05,\xa0Ferndale\xa0Close,\xa0Tilehurst,\xa0Reading,\xa0RG31\xa06UZ\nThank\xa0you\xa0for\xa0meeting\xa0with\xa0me\xa0and\xa0giving\xa0Sansome\xa0and\xa0George\xa0the\xa0opportunity\xa0to\xa0value\xa0your\xa0home.\xa0 In\xa0an\nassessment\xa0of\xa0this\xa0nature\xa0it\xa0is\xa0impossible\xa0to\xa0be\xa0precise,\xa0however\xa0our\xa0advice\xa0is\xa0based\xa0on\xa0research\xa0into\xa0the\xa0current\nmarket\xa0and\xa0extensive\xa0local\xa0knowledge.\nIn\xa0my\xa0opinion\xa0and\xa0with\xa0comparable\xa0evidence\xa0of\xa0properties\xa0sold\xa0and\xa0currently\xa0marketed\xa0in\xa0the\xa0local\xa0area,\xa0I\xa0suggest\nintroducing\xa0your\xa0home\xa0to\xa0the\xa0open\xa0market\xa0at\xa0£1,250,000.\xa0Due\xa0to\xa0brackets\xa0on\xa0rightmove\xa0as\xa0discussed.\xa0\nMyself\xa0and\xa0Paul\xa0(the\xa0MD)\xa0have\xa0discussed\xa0your\xa0home

In [66]:
from langchain.indexes import VectorstoreIndexCreator

In [67]:
#pip install docarray

In [68]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [72]:
query ="How much is the house worth?"

In [73]:
response = index.query(query)

In [74]:
display(Markdown(response))

 The house is worth £1,250,000 according to the assistant branch manager of Sansome & George.

In [75]:
loader = PyPDFLoader(file_path=file, chunk_size=10000) 

TypeError: PyPDFLoader.__init__() got an unexpected keyword argument 'chunk_size'

In [13]:
docs = loader.load()

In [14]:
docs[0]

Document(page_content=": 0\nname: Women's Campside Oxfords\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \n\nQuestions? Please contact us for any inquiries.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [15]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [23]:
embed = embeddings.embed_query("This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \n\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \n\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \n\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported.")

In [24]:
print(len(embed))

1536


In [27]:
print(embed[1500])

-0.011957752518355846


In [28]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [29]:
query = "Please suggest a shirt with sunblocking"

In [30]:
docs = db.similarity_search(query)

In [31]:
len(docs)

4

In [34]:
docs[3]

Document(page_content=": 618\nname: Men's Tropical Plaid Short-Sleeve Shirt\ndescription: Our lightest hot-weather shirt is rated UPF 50+ for superior protection from the sun's UV rays. With a traditional fit that is relaxed through the chest, sleeve, and waist, this fabric is made of 100% polyester and is wrinkle-resistant. With front and back cape venting that lets in cool breezes and two front bellows pockets, this shirt is imported and provides the highest rated sun protection possible. \n\nSun Protection That Won't Wear Off. Our high-performance fabric provides SPF 50+ sun protection, blocking 98% of the sun's harmful rays.", metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 618})

In [35]:
retriever = db.as_retriever()

In [36]:
llm = ChatOpenAI(temperature = 0.0)


In [37]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [38]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [27]:
display(Markdown(response))

| Name | Description |
| --- | --- |
| Sun Shield Shirt | High-performance sun shirt with UPF 50+ sun protection, moisture-wicking, and abrasion-resistant fabric. Recommended by The Skin Cancer Foundation. |
| Men's Plaid Tropic Shirt | Ultracomfortable shirt with UPF 50+ sun protection, wrinkle-free fabric, and front/back cape venting. Made with 52% polyester and 48% nylon. |
| Men's TropicVibe Shirt | Men's sun-protection shirt with built-in UPF 50+ and front/back cape venting. Made with 71% nylon and 29% polyester. |
| Men's Tropical Plaid Short-Sleeve Shirt | Lightest hot-weather shirt with UPF 50+ sun protection, front/back cape venting, and two front bellows pockets. Made with 100% polyester. |

All of these shirts provide UPF 50+ sun protection, blocking 98% of the sun's harmful rays. They are made with high-performance fabrics that are moisture-wicking, wrinkle-resistant, and abrasion-resistant. The Men's Plaid Tropic Shirt and Men's Tropical Plaid Short-Sleeve Shirt both have front/back cape venting for added breathability. The Sun Shield Shirt is recommended by The Skin Cancer Foundation.

In [39]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [40]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [41]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [31]:
display(Markdown(response))

| Shirt Number | Name | Description |
| --- | --- | --- |
| 618 | Men's Tropical Plaid Short-Sleeve Shirt | This shirt is made of 100% polyester and is wrinkle-resistant. It has front and back cape venting that lets in cool breezes and two front bellows pockets. It is rated UPF 50+ for superior protection from the sun's UV rays. |
| 374 | Men's Plaid Tropic Shirt, Short-Sleeve | This shirt is made with 52% polyester and 48% nylon. It is machine washable and dryable. It has front and back cape venting, two front bellows pockets, and is rated to UPF 50+. |
| 535 | Men's TropicVibe Shirt, Short-Sleeve | This shirt is made of 71% Nylon and 29% Polyester. It has front and back cape venting that lets in cool breezes and two front bellows pockets. It is rated UPF 50+ for superior protection from the sun's UV rays. |
| 255 | Sun Shield Shirt | This shirt is made of 78% nylon and 22% Lycra Xtra Life fiber. It is handwashable and line dry. It is rated UPF 50+ for superior protection from the sun's UV rays. It is abrasion-resistant and wicks moisture for quick-drying comfort. |

The Men's Tropical Plaid Short-Sleeve Shirt is made of 100% polyester and is wrinkle-resistant. It has front and back cape venting that lets in cool breezes and two front bellows pockets. It is rated UPF 50+ for superior protection from the sun's UV rays.

The Men's Plaid Tropic Shirt, Short-Sleeve is made with 52% polyester and 48% nylon. It has front and back cape venting, two front bellows pockets, and is rated to UPF 50+.

The Men's TropicVibe Shirt, Short-Sleeve is made of 71% Nylon and 29% Polyester. It has front and back cape venting that lets in cool breezes and two front bellows pockets. It is rated UPF 50+ for superior protection from the sun's UV rays.

The Sun Shield Shirt is made of 78% nylon and 22% Lycra Xtra Life fiber. It is abrasion-resistant and wicks moisture for quick-drying comfort. It is rated UPF 50+ for superior protection from the sun's UV rays. It is handwashable and line dry.

In [42]:
response = index.query(query, llm=llm)

In [44]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])