In [None]:
import os
os.environ["GROQ_API_KEY"] = "groq_api_key"

In [2]:
import pandas as pd
from langchain.schema import Document

df = pd.read_csv('data.csv')


docs = []
for _, row in df.iterrows():
    metadata = {
        "Region": row["Region"],
        "Country": row["Country"],
        "Item Type": row["Item Type"],
        "Sales Channel": row["Sales Channel"],
        "Order Priority": row["Order Priority"],
        "Order Date": row["Order Date"],
        "Order ID": row["Order ID"],
        "Ship Date": row["Ship Date"],
        "Units Sold": row["Units Sold"],
        "Unit Price": row["Unit Price"],
        "Unit Cost": row["Unit Cost"],
        "Total Revenue": row["Total Revenue"],
        "Total Cost": row["Total Cost"],
        "Total Profit": row["Total Profit"]
    }
    
    # Convert row into a document
    doc_text = f"Order {row['Order ID']} in {row['Country']} ({row['Region']}) sold {row['Units Sold']} units of {row['Item Type']} at a price of {row['Unit Price']} per unit. Total revenue was {row['Total Revenue']} with a profit of {row['Total Profit']}."
    
    docs.append(Document(page_content=doc_text, metadata=metadata))

print(docs)


[Document(metadata={'Region': 'Australia and Oceania', 'Country': 'Tuvalu', 'Item Type': 'Baby Food', 'Sales Channel': 'Offline', 'Order Priority': 'H', 'Order Date': '5/28/2010', 'Order ID': 669165933, 'Ship Date': '6/27/2010', 'Units Sold': 9925, 'Unit Price': 255.28, 'Unit Cost': 159.42, 'Total Revenue': 2533654.0, 'Total Cost': 1582243.5, 'Total Profit': 951410.5}, page_content='Order 669165933 in Tuvalu (Australia and Oceania) sold 9925 units of Baby Food at a price of 255.28 per unit. Total revenue was 2533654.0 with a profit of 951410.5.'), Document(metadata={'Region': 'Central America and the Caribbean', 'Country': 'Grenada', 'Item Type': 'Cereal', 'Sales Channel': 'Online', 'Order Priority': 'C', 'Order Date': '8/22/2012', 'Order ID': 963881480, 'Ship Date': '9/15/2012', 'Units Sold': 2804, 'Unit Price': 205.7, 'Unit Cost': 117.11, 'Total Revenue': 576782.8, 'Total Cost': 328376.44, 'Total Profit': 248406.36}, page_content='Order 963881480 in Grenada (Central America and the C

In [3]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

# Initialize the embedding model
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


  from .autonotebook import tqdm as notebook_tqdm


In [25]:
from langchain.vectorstores import FAISS

# Embed the documents and create the vector store
vector_store = FAISS.from_documents(docs, embeddings)


In [26]:
from langchain_groq import ChatGroq

# Initialize the language model
llm = ChatGroq(model="gemma2-9b-it")


In [27]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# Custom system instructions
custom_prompt = PromptTemplate.from_template(
    """You are a financial AI assistant that answers questions based on the provided data.
    - Always return numerical values in a structured format.
    - If the data is unavailable, say 'Data not found' instead of guessing.
    - Keep responses concise.

    Context:
    {context}

    Question:
    {question}
    
    Answer:
    """
)

# Use it in the chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm, 
    retriever=vector_store.as_retriever(), 
    chain_type_kwargs={"prompt": custom_prompt}
)


In [28]:
# Example query
query = "what was the profit of Australia and Oceania"
response = qa_chain.run(query)
print(response)


```json
{
  "profit": 626480.12
}
``` 

