In [None]:
from langchain_community.vectorstores import Chroma
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings.fastembed import FastEmbedEmbeddings
from langchain_community.document_loaders import CSVLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

class ChatCSVDoc:
    vector_store = None
    retriever = None
    chain = None

    def __init__(self):
        self.model = ChatOllama(model="llama3:8b")
        self.embedding = FastEmbedEmbeddings(model_name="llama3")
        self.text_splitter = CharacterTextSplitter(chunk_size=1024, chunk_overlap=100)
        self.prompt = ChatPromptTemplate.from_messages([
            ("system", """You are an assistant for question-answering tasks. Use only the following context to answer the question. If you don't know the answer, just say that you don't know.
             CONTEXT:{context}"""),
            ("human", "{input}"),
        ])

    def ingest(self, file_path):
        # Load CSV file
        loader = CSVLoader(file_path)
        docs = loader.load()
        chunks = self.text_splitter.split_documents(docs)

        # Create vector store
        self.vector_store = Chroma.from_documents(documents=chunks, embedding=self.embedding, persist_directory="./chroma_db")

    def load(self):
        # Load vector store
        self.vector_store = Chroma(persist_directory="./chroma_db", embedding_function=self.embedding)

        # Create chain
        self.retriever = self.vector_store.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={
                "k": 3,
                "score_threshold": 0.5,
            },
        )

        document_chain = create_stuff_documents_chain(self.model, self.prompt)
        self.chain = create_retrieval_chain(self.retriever, document_chain)

    def ask(self, query: str):
        if not self.chain:
            self.load()

        result = self.chain.invoke({"input": query})

        print(result["answer"])
        for doc in result["context"]:
            print("Source: ", doc.metadata["source"])

def build():
    w = ChatCSVDoc()
    w.ingest("./transcript/fine_food_reviews.csv")

    while True:
        query = input("Ask a question: ")
        w.ask(query)

build()