In [None]:
import os
import sys 
from dotenv import load_dotenv
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_community.vectorstores import Chroma
from langchain_huggingface.embeddings import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

sys.path.append(os.path.abspath("../"))
from src.prompt_templates import RAG_PROMPT_TEMPLATE

In [None]:
load_dotenv()
OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY")
CHROMA_DB_PATH = os.getenv("CHROMA_DB_PATH") or os.path.abspath("../data/chroma_db")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

In [None]:

model_name = "intfloat/multilingual-e5-large-instruct"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

llm = ChatOpenAI(
    model_name="gpt-4o", 
    api_key=OPENAI_API_KEY
    )

## Considerations

There are known to be several approaches on building a RAG system for csv files:
 
- Loading CSV into documents.  

In [None]:
vectorstore = Chroma(
    persist_directory=CHROMA_DB_PATH,
    embedding_function=embeddings
)

retriever = vectorstore.as_retriever()

In [None]:
rag_prompt_template = RAG_PROMPT_TEMPLATE
chat_history = []

chat_prompt_template = ChatPromptTemplate.from_messages([
    ("system", rag_prompt_template),
    MessagesPlaceholder("chat_history"),
    ("user", "{input}")
])

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, chat_prompt_template
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)
rag_chain = create_retrieval_chain(retriever, question_answer_chain)