https://medium.com/ai-agent-insider/developing-rag-systems-with-deepseek-r1-ollama-66a520bf0b88

https://medium.com/@himeltasrif/run-deepseek-r1-locally-build-a-custom-vector-database-ai-chatbot-with-ollama-faiss-291ec9fe6ecf

In [1]:
from typing import List
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_ollama.llms import OllamaLLM
from langchain_community.vectorstores import FAISS
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain.chains import RetrievalQA
import logging
import psutil
import os

In [2]:
class RAGPipeline:
    def __init__(self, model_name: str = "llama2:7b-chat-q4", max_memory_gb: float = 3.0):
        self.setup_logging()
        self.check_system_memory(max_memory_gb)
        
        # Load the language model (LLM)
        self.llm = OllamaLLM(model=model_name,
                            temperature=0.3,  # Lower creativity for concise responses
                            top_p=0.85,       # Adjust diversity slightly
                            max_tokens=150    # Limit response length
                            )  
        
        # Initialize embeddings using a lightweight model
        self.embeddings = HuggingFaceEmbeddings(
            model_name="sentence-transformers/all-mpnet-base-v2",
            model_kwargs={'device': 'cpu'}  # Use CPU for efficiency
        )
        
        # Define the prompt template
        self.prompt = ChatPromptTemplate.from_template("""
        Answer the question based on the following context. Be concise.
        Reason about the context to provide a well-thought-out answer.
        If you cannot find the answer in the context, use your general knowledge to provide an answer.
        
        I'm giving you a document that contains the information about my spendings in the different months: in particular the 
        amount of money spent at the supermarket and the money for electricity.
        
        Context: {context}
        Question: {question}
        Answer: """)
    
    def setup_logging(self):
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

    def check_system_memory(self, max_memory_gb: float):
        available_memory = psutil.virtual_memory().available / (1024 ** 3)
        self.logger.info(f"Available system memory: {available_memory:.1f} GB")
        if available_memory < max_memory_gb:
            self.logger.warning("Memory is below recommended threshold.")
            
    def load_and_split_documents(self, file_path: str) -> List[Document]:
        # Check if the file is a CSV or a TXT file and load accordingly
        if file_path.endswith('.csv'):
            loader = CSVLoader(file_path=file_path, csv_args={'delimiter': ';'})
        elif file_path.endswith('.txt'):
            loader = TextLoader(file_path=file_path)
        else:
            raise ValueError("Unsupported file type. Please provide a .csv or .txt file.")
        documents = loader.load()
        print(documents)
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=500,
            chunk_overlap=50,
            length_function=len,
            add_start_index=True,
        )
        splits = text_splitter.split_documents(documents)
        self.logger.info(f"Created {len(splits)} document chunks")
        return splits
    
    def create_vectorstore(self, documents: List[Document]) -> FAISS:
        batch_size = 32
        vectorstore = FAISS.from_documents(documents[:batch_size], self.embeddings)
        
        for i in range(batch_size, len(documents), batch_size):
            batch = documents[i:i + batch_size]
            vectorstore.add_documents(batch)
            self.logger.info(f"Processed batch {i//batch_size + 1}")
        
        # save locally the db 
        DB_FAISS_PATH = 'vectorstore/db_faiss'
        vectorstore.save_local(DB_FAISS_PATH)
        
        return vectorstore
    
    def setup_rag_chain(self, vectorstore: FAISS):
        retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 3, "fetch_k": 1})
        #retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"fetch_k": 1})
        
        def format_docs(docs):
            return "\n\n".join(doc.page_content for doc in docs)
        
        rag_chain = (
            {"context": retriever | format_docs, "question": RunnablePassthrough()}
            | self.prompt
            | self.llm
            | StrOutputParser()
        )
        return rag_chain  
    
    def query(self, chain, question: str) -> str:
        memory_usage = psutil.Process(os.getpid()).memory_info().rss / 1024 / 1024
        self.logger.info(f"Memory usage: {memory_usage:.1f} MB")
        return chain.invoke(question) 
    
    def retrieval_qa_chain(self, db):
        qa_chain = RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type="stuff",
            retriever=db.as_retriever(search_type="similarity", search_kwargs={'k': 1}),  # Retrieve the most relevant document
            return_source_documents=True,
            chain_type_kwargs={'prompt': self.prompt}
        )
        return qa_chain 

In [3]:
def main():
    rag = RAGPipeline(model_name="deepseek-r1:1.5b", max_memory_gb=3.0)
    
    #documents = rag.load_and_split_documents("trial.txt")
    documents = rag.load_and_split_documents("trial.csv")
    vectorstore = rag.create_vectorstore(documents)
    chain = rag.setup_rag_chain(vectorstore)
    
    #question = "What do you think about finance?"
    question = "In which month did I spend the most money?"
    response = rag.query(chain, question)
    print(f"Question: {question}\nAnswer: {response}")

In [4]:
if __name__ == "__main__":
    main()

INFO:__main__:Available system memory: 12.1 GB
  from .autonotebook import tqdm as notebook_tqdm
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
INFO:__main__:Created 12 document chunks


[Document(metadata={'source': 'trial.csv', 'row': 0}, page_content='\ufeffMESE: Gennaio\nTOT SPESE SUPERMERCATO: 120\nSPESA UTENZE DOMESTICHE: 50'), Document(metadata={'source': 'trial.csv', 'row': 1}, page_content='\ufeffMESE: Febbraio\nTOT SPESE SUPERMERCATO: 200\nSPESA UTENZE DOMESTICHE: 200'), Document(metadata={'source': 'trial.csv', 'row': 2}, page_content='\ufeffMESE: Marzo\nTOT SPESE SUPERMERCATO: 300\nSPESA UTENZE DOMESTICHE: 122'), Document(metadata={'source': 'trial.csv', 'row': 3}, page_content='\ufeffMESE: Aprile\nTOT SPESE SUPERMERCATO: 345\nSPESA UTENZE DOMESTICHE: 34'), Document(metadata={'source': 'trial.csv', 'row': 4}, page_content='\ufeffMESE: Maggio\nTOT SPESE SUPERMERCATO: 50\nSPESA UTENZE DOMESTICHE: 56'), Document(metadata={'source': 'trial.csv', 'row': 5}, page_content='\ufeffMESE: Giugno\nTOT SPESE SUPERMERCATO: 234\nSPESA UTENZE DOMESTICHE: 23'), Document(metadata={'source': 'trial.csv', 'row': 6}, page_content='\ufeffMESE: Luglio\nTOT SPESE SUPERMERCATO: 133

INFO:faiss.loader:Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
INFO:__main__:Memory usage: 1082.6 MB
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/generate "HTTP/1.1 200 OK"


Question: In which month did I spend the most money?
Answer: <think>
Okay, so I need to figure out in which month I spent the most money based on the given context. Let me start by reading through the information carefully.

First, there's a table with months and two columns: "MESE" (which translates to "Month") and "TOT SPESE SUPERMERCATO." The third column seems to be "SPESA UTENZE DOMESTICHE," which I think stands for "Domestic Expenses."

Looking at each month:

- In April, TOT SPESE is 345 and Domestic Expenses are 34.
- In November, TOT SPESE is 132 and Domestic Expenses are 342.
- In February, both TOT SPESE and Domestic Expenses are 200.

Now, I need to compare the total expenses for each month. April has a higher total of 345 compared to November's 132 and February's 200. So, April must be where the most money was spent.
</think>

April


In [5]:
def qa_bot():
    rag = RAGPipeline(model_name="deepseek-r1:1.5b", max_memory_gb=3.0)
    
    #documents = rag.load_and_split_documents("trial.txt")
    documents = rag.load_and_split_documents("trial.csv")
    vectorstore = rag.create_vectorstore(documents)
    qa = rag.retrieval_qa_chain(vectorstore)
    
    return qa

In [6]:
import chainlit as cl

@cl.on_chat_start
async def start():
    chain = qa_bot()
    msg = cl.Message(content="Starting the bot...")
    await msg.send()
    msg.content = "Hi, Welcome to the DeepSeek ChatBot. How can I assist you today?"
    await msg.update()
    cl.user_session.set("chain", chain)

In [7]:
@cl.on_message
async def main(message: cl.Message):
    chain = cl.user_session.get("chain")
    if chain is None:
        await cl.Message(content="Error: Chain not initialized.").send()
        return

    cb = cl.AsyncLangchainCallbackHandler(
        stream_final_answer=True,
        answer_prefix_tokens=["FINAL", "ANSWER"]
    )

    response = await chain.acall({'query': message.content}, callbacks=[cb])
    answer = response["result"]
    sources = response.get("source_documents", [])

    if sources:
        answer += "\nSources:" + "\n".join([str(doc.metadata['source']) for doc in sources])
    else:
        answer += "\nNo sources found"

    await cl.Message(content=answer).send()