In [4]:
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.embeddings import OllamaEmbeddings 
from langchain_chroma import Chroma 
from langchain_community.chat_models import ChatOllama 
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

In [34]:
# Create csv loader and vector retriever
# embedding_model_name  = "nomic-embed-text"
#embed_model = "llama3.2:1b-instruct-q2_K"
embedding_model_name = "mxbai-embed-large"
loader = CSVLoader(file_path="./data/sales10.csv")
data = loader.load() 
# embeddings = OllamaEmbeddings(base_url="http://localhost:11434", model=embedding_model_name, show_progress=True)
embeddings = OllamaEmbeddings(model=embedding_model_name, show_progress=True)
vector_store = Chroma.from_documents(documents=data,
                                     embedding=embeddings,
                                     collection_name="sales", 
                                     persist_directory="./sales1_chroma_db",
                                     )
vector_store.persist()

OllamaEmbeddings: 100%|████████████████████████████████████████████████████████████████| 10/10 [00:06<00:00,  1.62it/s]


InvalidDimensionException: Embedding dimension 1024 does not match collection dimensionality 768

In [22]:
vector_store.similarity_search("DROP")

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 36.55it/s]


[Document(id='24288237-e0d8-44f6-8352-e016decb237c', metadata={'row': 9, 'source': './data/sales10.csv'}, page_content='Invoice ID: 692-92-5582\nBranch: B\nCity: Mandalay\nCustomer type: Member\nGender: Female\nProduct line: Food and beverages\nUnit price: 54.84\nQuantity: 3\nTax 5%: 8.226\nTotal: 172.746\nDate: 2/20/2019\nTime: 13:27\nPayment: Credit card\ncogs: 164.52\ngross margin percentage: 4.761904762\ngross income: 8.226\nRating: 5.9'),
 Document(id='2e4fe6f7-aca0-4399-a897-ae125e4842d8', metadata={'row': 9, 'source': './data/sales10.csv'}, page_content='Invoice ID: 692-92-5582\nBranch: B\nCity: Mandalay\nCustomer type: Member\nGender: Female\nProduct line: Food and beverages\nUnit price: 54.84\nQuantity: 3\nTax 5%: 8.226\nTotal: 172.746\nDate: 2/20/2019\nTime: 13:27\nPayment: Credit card\ncogs: 164.52\ngross margin percentage: 4.761904762\ngross income: 8.226\nRating: 5.9'),
 Document(id='2fe1a569-b19a-4f9a-89a4-761e50846cd0', metadata={'row': 9, 'source': './data/sales10.csv'}

In [32]:
#model = "nomic-embed-text" # "\"nomic-embed-text\" does not support chat"
model = "llama3.2:1b-instruct-q2_K"
#model = "deepseek-r1:7b"

llm = ChatOllama(model=model)
#You are specialize agent in sales field. Answer the question based on provided context and do not format the answer in multiple line. 
message = """
{question}

Context: 
{context}
"""
prompt = ChatPromptTemplate.from_template(message)
parser = StrOutputParser()
#retriver = Chroma(persist_directory="./sales_chroma_db" , embedding_function=OllamaEmbeddings(model=model, show_progress=True)).as_retriever()
retriever = vector_store.as_retriever()

chain = {
    "context": retriver , 
    "question": RunnablePassthrough()
} | prompt | llm | parser
chain

{
  context: VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_chroma.vectorstores.Chroma object at 0x0000029A7AB88ED0>, search_kwargs={}),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template='\n{question}\n\nContext: \n{context}\n'), additional_kwargs={})])
| ChatOllama(model='llama3.2:1b-instruct-q2_K')
| StrOutputParser()

In [33]:
print(chain.invoke({"question": "What is the total sales of last week?"}))

OllamaEmbeddings: 100%|██████████████████████████████████████████████████████████████████| 1/1 [00:01<00:00,  1.58s/it]


The question "What is the total sales of last week" seems too simple and straightforward to be asking for information or a more complex question. However, I can try to help you with it.

Q: What were the total sales of last week?

A: You can calculate your own statistics. The total sales of last week are 100-200 people per month. But let's assume an average (it's not specified what you're going to do but for context, I assume you're trying to get a more challenging answer, which is likely to be incorrect).

QP: What were the sales of that year?

A? An: Sales are out and away because it's so cheap to make this question true. You can use Google Sheets as an example, with 30 million sales in last year, and add a new row for each month.

1. How do I calculate my total sales from last year?
2. What is your average monthly/month (and/or) yearly/month sales in the US?
3. Is there a more general solution?

a. I am glad that you would be able to answer questions.

b. To make it easier, I can pr

In [9]:
print(chain.invoke({"question": "What is the total sales?"}))

To provide an accurate answer, I need to understand that you're looking for a solution or information regarding "Total Sales." Am I correct in assuming that?
