In [None]:
import os
from openai import OpenAI
from dotenv import load_dotenv
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import CSVLoader
from langchain.vectorstores import DocArrayInMemorySearch
from IPython.display import display, Markdown
from langchain.indexes import VectorstoreIndexCreator
from langchain.embeddings import OpenAIEmbeddings




load_dotenv()
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')

In [None]:
file = 'Ski_Protection_Products.csv'
loader = CSVLoader(file_path=file)

In [None]:
#Create the index
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch
).from_loaders([loader])

In [None]:
query = "Please list all the products name and description related to helmets"

In [None]:
response = index.query(query)


In [None]:
display(Markdown(response))

In [None]:
# do it step by step
docs = loader.load()
docs[0]

In [13]:
embeddings = OpenAIEmbeddings()

In [15]:
embed = embeddings.embed_query("Hi my name is Alexandru")
print(len(embed))
print(embed[:5])

1536
[-0.01127497860432955, 0.008511940082149339, -0.02167138846327104, -0.03111919598132533, -0.012815659031988084]


In [16]:
#store into the vector store - takes in a list of documents, an embedding object and then create an vector store
db = DocArrayInMemorySearch.from_documents(docs, embeddings)

In [18]:
#return from docs
query = "Please suggest me a good helmet"
docs = db.similarity_search(query)
list(docs)
docs[0]

Document(page_content='Product Name: Advanced Thermal Ski Helmet\nDescription: This advanced thermal ski helmet offers maximum protection with a comfortable, lightweight design, perfect for long skiing sessions.', metadata={'source': 'Ski_Protection_Products.csv', 'row': 0})

In [21]:
#create a retrieval from the vectore store -  takes a query and retrieve documents

retriever = db.as_retriever()
llm = ChatOpenAI(temperature = 0.0)
qdocs = "".join([docs[i].page_content for i in range(len(docs))])
response = llm.call_as_llm(f"{qdocs} Question: Please list all your \
helmets.") 
display(Markdown(response))

1. Advanced Thermal Ski Helmet
2. Pro Grade Skiing Helmet

In [22]:
qa_stuff = RetrievalQA.from_chain_type(
    llm = llm,
    chain_type = "stuff",
    retriever=retriever,
    verbose=True
)

In [25]:
query = "Please list all the helmets"
response = qa_stuff.run(query)
display(Markdown(response))



[1m> Entering new RetrievalQA chain...[0m

[1m> Finished chain.[0m


The helmets mentioned in the context are:

1. Pro Grade Skiing Helmet
2. Advanced Thermal Ski Helmet

In [27]:
response = index.query(query, llm=llm)

In [29]:
index = VectorstoreIndexCreator(
    vectorstore_cls=DocArrayInMemorySearch,
    embeddings = embeddings,
). from_loaders([loader])

ValidationError: 1 validation error for VectorstoreIndexCreator
embeddings
  extra fields not permitted (type=value_error.extra)