In [None]:
import os
os.environ["GROQ_API_KEY"] = "api key"

In [2]:
from langchain.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import ConversationalRetrievalChain
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# --- Step 1. Load your CSV data ---
# Ensure your CSV (e.g., "data.csv") is in the same folder or provide the full path.
loader = CSVLoader(file_path='data.csv')
documents = loader.load()

print(documents)

[Document(metadata={'source': 'data.csv', 'row': 0}, page_content='Region: Australia and Oceania\nCountry: Tuvalu\nItem Type: Baby Food\nSales Channel: Offline\nOrder Priority: H\nOrder Date: 5/28/2010\nOrder ID: 669165933\nShip Date: 6/27/2010\nUnits Sold: 9925\nUnit Price: 255.28\nUnit Cost: 159.42\nTotal Revenue: 2533654\nTotal Cost: 1582243.5\nTotal Profit: 951410.5'), Document(metadata={'source': 'data.csv', 'row': 1}, page_content='Region: Central America and the Caribbean\nCountry: Grenada\nItem Type: Cereal\nSales Channel: Online\nOrder Priority: C\nOrder Date: 8/22/2012\nOrder ID: 963881480\nShip Date: 9/15/2012\nUnits Sold: 2804\nUnit Price: 205.7\nUnit Cost: 117.11\nTotal Revenue: 576782.8\nTotal Cost: 328376.44\nTotal Profit: 248406.36'), Document(metadata={'source': 'data.csv', 'row': 2}, page_content='Region: Europe\nCountry: Russia\nItem Type: Office Supplies\nSales Channel: Offline\nOrder Priority: L\nOrder Date: 05-02-2014\nOrder ID: 341417157\nShip Date: 05-08-2014\nU

In [4]:
# --- Step 2. (Optional) Split documents ---
# This helps if your CSV rows are long. Adjust chunk size and overlap as needed.
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0)
docs = text_splitter.split_documents(documents)


In [5]:
# --- Step 3. Create embeddings ---
# We use a free model from Sentence Transformers.
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [6]:
# --- Step 4. Build the vector store ---
# We create a FAISS index from our documents.
vectorstore = FAISS.from_documents(docs, embedding_model)

In [None]:
from langchain_groq import ChatGroq

# Initialize the Groq model
llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)    # to try different models try models on groq


In [11]:
from langchain.chains import ConversationalRetrievalChain

# Create the retriever
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3})

# Set up the conversational retrieval chain
chatbot = ConversationalRetrievalChain.from_llm(llm, retriever)


In [13]:
chat_history = []
print("Chatbot is ready! Type 'exit' to end the conversation.\n")

while True:
    query = input("User: ")
    print()
    if query.lower() == "exit":
        print("Goodbye!")
        break
    result = chatbot({"question": query, "chat_history": chat_history})
    answer = result["answer"]
    chat_history.append((query, answer))
    print("Chatbot:", answer)


Chatbot is ready! Type 'exit' to end the conversation.

Chatbot: To calculate the total data for Australia, we need to consider the two orders from Australia:

1. Order ID: 158535134 (Office Supplies)
2. Order ID: 450563752 (Cereal)

Here's the total data for Australia:

- Units Sold: 2924 + 682 = 3606
- Total Revenue: $1904138.04 + $140287.4 = $2044425.44
- Total Cost: $1534983.04 + $79869.02 = $1619952.06
- Total Profit: $2044425.44 - $1619952.06 = $426473.38
Chatbot: To find the total data for Australia, we need to combine the data from the two orders related to Australia.

Order 1: 
- Item Type: Office Supplies
- Sales Channel: Online
- Order Priority: C
- Order Date: 10/27/2015
- Order ID: 158535134
- Ship Date: 11/25/2015
- Units Sold: 2924
- Unit Price: 651.21
- Unit Cost: 524.96
- Total Revenue: 1904138.04
- Total Cost: 1534983.04
- Total Profit: 369155

Order 2: 
- Item Type: Cereal
- Sales Channel: Offline
- Order Priority: H
- Order Date: 06-09-2013
- Order ID: 450563752
- S