In [None]:
import pandas as pd
import os
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_experimental.text_splitter import SemanticChunker
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.chat_models import ChatOpenAI
from langchain.schema import AIMessage, HumanMessage, SystemMessage
from langchain.chains import RetrievalQA
from langchain.chains.llm import LLMChain
from langchain.chains.combine_documents.stuff import StuffDocumentsChain
from langchain.prompts import PromptTemplate

In [6]:
csv_file_path = 'sales_report.csv'

In [None]:
# --- 2. Setup the LLM ---
# Point this to your local LLM server (e.g., LM Studio, Ollama)
llm = ChatOpenAI(openai_api_base="http://localhost:1234/v1", openai_api_key="lm_studio", model="local-model")


In [None]:
# --- 3. Load and Process the CSV Data ---
print("\nLoading CSV data...")
loader = CSVLoader(file_path=csv_file_path)
docs = loader.load()

# Display content of the first loaded document for verification
# print("\nContent of the first document chunk:")
# print(docs[0].page_content)

print("\nSplitting documents into semantic chunks...")
# Using a local embedding model for chunking
text_splitter = SemanticChunker(HuggingFaceEmbeddings())
documents = text_splitter.split_documents(docs)
print(f"Created {len(documents)} chunks.")

In [None]:
# --- 4. Create Vector Store ---
print("\nCreating vector store with embeddings...")
# Instantiate the embedding model
embedder = HuggingFaceEmbeddings()

# Create the vector store
vector = FAISS.from_documents(documents, embedder)
retriever = vector.as_retriever(search_type="similarity", search_kwargs={"k": 3})
print("Vector store created successfully.")

In [None]:
# --- 5. Define the RAG Chain ---
print("\nSetting up the RAG chain...")
# This prompt template is designed to guide the LLM to act as a sales analyst.
prompt_template = """
1. You are a helpful sales analyst.
2. Use the following pieces of context from the sales report to answer the question at the end.
3. If you don't know the answer from the context, just say that you don't know. Don't try to make up an answer.
4. Provide a concise answer based only on the provided sales data.

Context:
{context}

Question: {question}

Helpful Answer:"""

QA_CHAIN_PROMPT = PromptTemplate.from_template(prompt_template)

# LLM Chain
llm_chain = LLMChain(
    llm=llm,
    prompt=QA_CHAIN_PROMPT,
    verbose=False # Set to True for more detailed logs
)

# Document processing chain
combine_documents_chain = StuffDocumentsChain(
    llm_chain=llm_chain,
    document_variable_name="context",
    document_prompt=PromptTemplate(
        input_variables=["page_content"],
        template="Row:\n{page_content}"
    ),
)

# The final RetrievalQA chain
qa_chain = RetrievalQA(
    combine_documents_chain=combine_documents_chain,
    verbose=False, 
    retriever=retriever,
    return_source_documents=True,
)
print("RAG chain is ready.")

In [None]:
# --- 6. Query the Sales Data ---


query1 = "What was the total sale amount for the North region?"
print(f"\nQuestion: {query1}")
result1 = qa_chain({"query": query1})
print("Answer:", result1["result"].strip())



In [None]:
query2 = "how many laptops sold in south region"
print(f"\nQuestion: {query2}")
result2 = qa_chain({"query": query2})
print("Answer:", result2["result"].strip())