## Setup document QA agent to interact with csv document

In [1]:
from langchain.chains import QAGenerationChain
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.llms import OpenAI
from langchain_community.document_loaders import CSVLoader
from langchain_community.vectorstores.docarray import DocArrayInMemorySearch
from langchain.indexes import VectorstoreIndexCreator
from langchain.chains import LLMChain

#### Working with documents is at the heart of most LLM based applications

In [2]:
import os

os.path.exists("outdoor.csv")

True

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
mistral_7b = ChatOpenAI(model="mistralai/Mistral-7B-Instruct-v0.2", temperature=0.0)
mistral_7b._default_params

{'model': 'mistralai/Mistral-7B-Instruct-v0.2',
 'stream': False,
 'n': 1,
 'temperature': 0.0}

In [5]:
from langchain_core.prompts import ChatPromptTemplate

In [6]:
p_template = """[INST] Generate a single name that describes a company that makes {product}.\n Give just the name and no other suggestions: [\INST]"""
prompt_template = ChatPromptTemplate.from_template(p_template)

In [7]:
chain = LLMChain(llm=mistral_7b, prompt=prompt_template)
product = "shoes"

In [8]:
chain.predict(product="bags")

'"BagCraft"\n\nThis name suggests the creation and crafting of high-quality bags. It\'s simple, memorable, and easy to pronounce in various languages.'

### Step by Step Guide on how to build a document retrieval chain

In [9]:
from langchain_community.document_loaders import CSVLoader

In [10]:
loader = CSVLoader(file_path="outdoor.csv")

In [11]:
docs = loader.load() # the csv document loader splits the document per row of the scv file

In [12]:
from pprint import pprint
print(docs[2].page_content)

: 2
name: Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece
description: She'll love the bright colors, ruffles and exclusive whimsical prints of this toddler's two-piece swimsuit! Our four-way-stretch and chlorine-resistant fabric keeps its shape and resists snags. The UPF 50+ rated fabric provides the highest rated sun protection possible, blocking 98% of the sun's harmful rays. The crossover no-slip straps and fully lined bottom ensure a secure fit and maximum coverage. Machine wash and line dry for best results. Imported.


In [13]:
docs[0].__fields__

{'page_content': ModelField(name='page_content', type=str, required=True),
 'metadata': ModelField(name='metadata', type=dict, required=False, default_factory='<function dict>'),
 'type': ModelField(name='type', type=Literal['Document'], required=False, default='Document')}

In [14]:
from langchain_community.embeddings import OpenAIEmbeddings
embedding = OpenAIEmbeddings(model="togethercomputer/m2-bert-80M-2k-retrieval")

  warn_deprecated(


In [15]:
embedding.embed_query("Hello world")



[-0.04118252770980938,
 -0.0013809511128117077,
 0.05303581361635882,
 -0.03313520694506061,
 0.024666046986177245,
 0.014827754219576394,
 0.019901404483791976,
 0.0162069448432265,
 0.025874716266457137,
 0.03134016981322425,
 -0.020058578667455116,
 -0.05020967234832017,
 -0.03800079513837035,
 -0.0240587746544471,
 -0.04504083088791846,
 -0.05917427258523314,
 0.0324704357674717,
 0.05628844264892108,
 -0.005543967772577993,
 -0.038885849644895575,
 0.045601246142366374,
 -0.014783769267271716,
 -0.0019559361393969472,
 -0.0081818690885366,
 -0.005064388724059238,
 -0.019538883865136754,
 0.04713616566688595,
 0.04790265729713082,
 -0.03916182361545118,
 0.008804407683991725,
 -0.03184568071316552,
 -0.032240276467505026,
 0.04649634565884519,
 -0.003291621445881083,
 -0.007525974247254717,
 0.04278122100991395,
 0.0528312970088081,
 0.00997534030525678,
 0.053158868830083986,
 0.07050522309658759,
 -0.0017323097840220693,
 0.010429533037059837,
 0.013279250368618059,
 0.0196762203

In [16]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embedding
)



In [30]:
query = "Please suggest a shirt with sunblocking"

In [31]:
docs = db.similarity_search(query, k=2)



In [32]:
docs

[Document(page_content=": 374\nname: Men's Plaid Tropic Shirt, Short-Sleeve\ndescription: Our Ultracomfortable sun protection is rated to UPF 50+, helping you stay cool and dry. Originally designed for fishing, this lightest hot-weather shirt offers UPF 50+ coverage and is great for extended travel. SunSmart technology blocks 98% of the sun's harmful UV rays, while the high-performance fabric is wrinkle-free and quickly evaporates perspiration. Made with 52% polyester and 48% nylon, this shirt is machine washable and dryable. Additional features include front and back cape venting, two front bellows pockets and an imported design. With UPF 50+ coverage, you can limit sun exposure and feel secure with the highest rated sun protection available.", metadata={'source': 'outdoor.csv', 'row': 374}),
 Document(page_content=": 952\nname: Women's Rangeley Performance Flannel Shirt, Striped\ndescription: This flannel striped shirt jac is rugged and packed with performance features. It will keep 

In [33]:
retriever = db.as_retriever()

In [34]:
retriever

VectorStoreRetriever(tags=['DocArrayInMemorySearch'], vectorstore=<langchain_community.vectorstores.docarray.in_memory.DocArrayInMemorySearch object at 0x16b1b7ad0>)

In [35]:
from langchain_openai.chat_models import ChatOpenAI

In [36]:
llm = ChatOpenAI(temperature = 0.0)

In [37]:
llm._default_params

{'model': 'gpt-3.5-turbo', 'stream': False, 'n': 1, 'temperature': 0.0}

In [38]:
len(docs)

2

In [39]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [41]:
len(qdocs)

1471

In [42]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template("[INST]You are a question ansering bot for an online store. \nYou will be provided context of products and a question, you are expected to give an answer only based on the context.\nContext:{qdocs} \nQuestion: Please list all your shirts with sun protection in a table in markdown and summarize each one.\n Answer: [/INST]").format_messages(qdocs=qdocs)

In [43]:
response = mistral_7b(prompt)


In [45]:
print(response.content)

| Product Name | Description and Sun Protection Features |
| --- | --- |
| Men's Plaid Tropic Shirt | Ultracomfortable sun protection rated UPF 50+. Lightweight and great for hot weather. Blocks 98% of UV rays, wrinkle-free, and quickly evaporates perspiration. Made of 52% polyester and 48% nylon. Machine washable and dryable. Features front and back cape venting, two front bellows pockets, and an imported design. |
| Women's Rangeley Performance Flannel Shirt | Rugged flannel shirt with UPF 50+ sun protection. Blend of 70% cotton and 30% polyester, high-performance hollow-core polyester yarns, and abrasion-resistant construction. Reinforced elbows, side-seam pockets, and machine wash and dry convenience. |


In [46]:
docs

[Document(page_content=": 374\nname: Men's Plaid Tropic Shirt, Short-Sleeve\ndescription: Our Ultracomfortable sun protection is rated to UPF 50+, helping you stay cool and dry. Originally designed for fishing, this lightest hot-weather shirt offers UPF 50+ coverage and is great for extended travel. SunSmart technology blocks 98% of the sun's harmful UV rays, while the high-performance fabric is wrinkle-free and quickly evaporates perspiration. Made with 52% polyester and 48% nylon, this shirt is machine washable and dryable. Additional features include front and back cape venting, two front bellows pockets and an imported design. With UPF 50+ coverage, you can limit sun exposure and feel secure with the highest rated sun protection available.", metadata={'source': 'outdoor.csv', 'row': 374}),
 Document(page_content=": 952\nname: Women's Rangeley Performance Flannel Shirt, Striped\ndescription: This flannel striped shirt jac is rugged and packed with performance features. It will keep 

In [48]:
from IPython.display import display, Markdown

In [50]:
display(Markdown(response.content))

| Product Name | Description and Sun Protection Features |
| --- | --- |
| Men's Plaid Tropic Shirt | Ultracomfortable sun protection rated UPF 50+. Lightweight and great for hot weather. Blocks 98% of UV rays, wrinkle-free, and quickly evaporates perspiration. Made of 52% polyester and 48% nylon. Machine washable and dryable. Features front and back cape venting, two front bellows pockets, and an imported design. |
| Women's Rangeley Performance Flannel Shirt | Rugged flannel shirt with UPF 50+ sun protection. Blend of 70% cotton and 30% polyester, high-performance hollow-core polyester yarns, and abrasion-resistant construction. Reinforced elbows, side-seam pockets, and machine wash and dry convenience. |

In [51]:
from langchain.chains import RetrievalQA

In [55]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=mistral_7b, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [56]:
query =  "[INST]Please list all your shirts with sun protection in a table \
in markdown and summarize each one.[\INST]"

In [57]:
response = qa_stuff.run(query)

  warn_deprecated(




[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [60]:
display(Markdown(response))

| Name | Description | Size & Fit | Fabric & Care | Additional Features | Sun Protection |
| --- | --- | --- | --- | --- | --- |
| Sun Shield Shirt by [Sun Brand] | Block the sun, not the fun – high-performance sun shirt with UPF 50+ rating, SPF 50+ sun protection, wicks moisture, abrasion resistant, handwash, line dry | Slightly Fitted | 78% nylon, 22% Lycra Xtra Life fiber | Wicks moisture for quick-drying comfort, fits comfortably over swimsuits | UPF 50+, SPF 50+ |
| Men's Plaid Tropic Shirt (Short-Sleeve) | Ultracomfortable sun protection shirt with UPF 50+ rating, wrinkle-free, quick-evaporating fabric, machine washable and dryable | N/A | 52% polyester, 48% nylon | Front and back cape venting, two front bellows pockets | UPF 50+, blocks 98% of harmful UV rays |
| Women's Rangeley Performance Flannel Shirt (Striped) | Rugged flannel shirt with UPF 50+ rating, wicks moisture, abrasion-resistant construction, reinforced elbows, side-seam pockets, machine wash and dry | Slightly Fitted | 70% cotton, 30% polyester, high-performance hollow-core polyester yarns | N/A | UPF 50+ |

1. Sun Shield Shirt by [Sun Brand]: A high-performance sun shirt with UPF 50+ and SPF 50+ sun protection, wicking moisture, and abrasion resistance. It is made of 78% nylon and 22% Lycra Xtra Life fiber, and is handwash and line dry.
2. Men's Plaid Tropic Shirt (Short-Sleeve): An ultracomfortable sun protection shirt with UPF 50+ rating and wrinkle-free, quick-evaporating fabric. It is machine washable and dryable, and features front and back cape venting and two front bellows pockets.
3. Women's Rangeley Performance Flannel Shirt (Striped): A rugged flannel shirt with UPF 50+ rating, moisture-wicking properties, and abrasion-resistant construction. It is made of a blend of 70% cotton and 30% polyester, and has reinforced elbows, side-seam pockets, and is machine wash and dry.

In [61]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embedding,
).from_loaders([loader]) # you can create an index from multiple loaders, each loader is linked to a particular document or document source

