In [1]:
import os
os.environ['OPENAI_API_KEY']=''

In [2]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown

In [3]:
from langchain.indexes import VectorstoreIndexCreator

In [55]:
# i created this file manually, it can be improved
file = 'OutdoorClothingCatalog_1000.csv'
loader = CSVLoader(file_path=file)

In [31]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [32]:
query ="Please list all your shirts with sun protection \
in a table in markdown and summarize each one."

In [33]:
response = index.query(query)

In [34]:
display(Markdown(response))



| Name | Description |
|------|-------------|
| Tshirt | Very comfy and protects against the sun |
| Blouse | Blue stuff for women |

In [35]:
loader = CSVLoader(file_path=file)

In [36]:
docs = loader.load()

In [37]:
docs[0]

Document(page_content='Name: Tshirt\nDescription: very comfy protects against the sun', metadata={'source': 'OutdoorClothingCatalog_1000.csv', 'row': 0})

In [38]:
from langchain.embeddings import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [39]:
embed = embeddings.embed_query("Hi my name is Harrison")

In [40]:
print(len(embed))

1536


In [41]:
db = DocArrayInMemorySearch.from_documents(
    docs, 
    embeddings
)

In [42]:
query = "Please suggest a shirt with sunblocking"

In [43]:
docs = db.similarity_search(query)

In [44]:
retriever = db.as_retriever()

In [45]:
llm = ChatOpenAI(temperature = 0.0)


In [46]:
qdocs = "".join([docs[i].page_content for i in range(len(docs))])


In [47]:
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
shirts with sun protection in a table in markdown and summarize each one.") 


In [48]:
display(Markdown(response))

| Shirt Name | Description |
| --- | --- |
| Tshirt | A very comfortable shirt that provides protection against the sun. |
| Sunblock Shirt | A lightweight shirt made with UPF 50+ fabric that blocks harmful UV rays. |
| Rash Guard | A tight-fitting shirt typically worn for water activities that provides sun protection and helps prevent rashes. |
| Fishing Shirt | A long-sleeved shirt made with breathable fabric and UPF protection, designed for fishing and other outdoor activities. |

Overall, each of these shirts provides protection against the sun, but they each have unique features such as lightweight fabric, tight-fitting design, or specific use for fishing.

In [49]:
qa_stuff = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [50]:
query =  "Please list all your shirts with sun protection in a table \
in markdown and summarize each one."

In [51]:
response = qa_stuff.run(query)



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


In [52]:
display(Markdown(response))

| Name | Description |
|------|-------------|
| Tshirt | Very comfy protects against the sun |

The Tshirt is a comfortable shirt that provides protection against the sun.

In [53]:
response = index.query(query, llm=llm)

In [54]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embedding=embeddings,
).from_loaders([loader])

In [56]:
# what we di with embeddings, similarity, summary and ranking based on similarity... here it happends in few lines of code

In [57]:
'''
additional methods
1. Map_reduce: I have a query, i get many answers from several different chunks, then i put all answers together and let anoter LLm summarize the final answer
    It works on so many documents, but many calls
2. Refine: same, works on the chunks, but it builds upon every answer, so the documents are not treated indipendently. Good to combine answer
3. Map_rerank: 1 call for every document and get back a score, then pick the best scores, all calls are indipendent

'''

'\nadditional methods\n1. Map_reduce: I have a query, i get many answers from several different chunks, then i put all answers together and let anoter LLm summarize the final answer\n    It works on so many documents, but many calls\n2. Refine: same, works on the chunks, but it builds upon every answer, so the documents are not treated indipendently. Good to combine answer\n3. Map_rerank: 1 call for every document and get back a score, then pick the best scores, all calls are indipendent\n\n'