In [1]:
"""
-- Created by: Ashok Kumar Pant
-- Email: asokpant@gmail.com
-- Created on: 18/05/2025
"""
import os

import sys

sys.path.append('../..')

from langchain.chains import RetrievalQA
from langchain.docstore.document import Document
from langchain_anthropic import ChatAnthropic
from langchain_community.llms import LlamaCpp, HuggingFaceHub
from langchain_community.vectorstores import FAISS
from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_openai import ChatOpenAI
from aicl434llms.utils import hashutil

from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableLambda, RunnablePassthrough


# Dependencies:
# pip install langchain faiss-cpu llama-cpp-python
# pip install -U langchain-community langchain_google_genai
# pip install -U langchain_huggingface


class LLMFactory:
    @staticmethod
    def load_llm(llm_type: str = "llamacpp", model_name: str = None, api_key: str = None, ):
        if model_name is None:
            raise ValueError("Model name must be provided.")
        if llm_type == "localapi":
            return ChatOpenAI(
                model_name="local-model",  # name doesn't matter
                openai_api_base="http://localhost:1234/v1",  # LM Studio endpoint
                openai_api_key="lm-studio",  # dummy key
            )
        elif llm_type == "openai":
            return ChatOpenAI(model=model_name, api_key=api_key)
        elif llm_type == "anthropic":
            return ChatAnthropic(model=model_name, api_key=api_key)
        elif llm_type == "gemini":
            return ChatGoogleGenerativeAI(model=model_name, google_api_key=api_key)
        elif llm_type == "huggingfacehub":
            return HuggingFaceHub(repo_id=model_name, huggingfacehub_api_token=api_key)
        elif llm_type == "llamacpp":
            return LlamaCpp(
                model_path=model_name,
                n_gpu_layers=-1,
                max_tokens=500,
                n_ctx=2048,
                seed=42,
                verbose=False
            )
        else:
            raise ValueError(f"Unsupported LLM type: {llm_type}")


class RAGIndexer:
    def __init__(self, embedding_model_name: str = "thenlper/gte-small", index_path: str = "kullm.faiss.db"):
        self.embedding_model_name = embedding_model_name
        self.index_path = index_path
        self.embedding_model = self._load_embedding_model()
        self.vector_db = self._load_or_create_index()

    def _load_embedding_model(self):
        print("[Indexer] Loading embedding model...")
        return HuggingFaceEmbeddings(model_name=self.embedding_model_name)
        print("[Indexer] Loaded embedding model.")

    def _load_or_create_index(self):
        if os.path.exists(self.index_path):
            print(f"[Indexer] Loading FAISS index from '{self.index_path}'")
            return FAISS.load_local(self.index_path, self.embedding_model, allow_dangerous_deserialization=True)
        else:
            print("[Indexer] Creating a new FAISS index...")
            db = FAISS.from_texts(["__dummy__"], self.embedding_model, ids=["1"], )
            db.delete(["1"])
            return db

    def add_documents(self, texts: list):
        print(f"[Indexer] Adding {len(texts)} document(s) to the index...")
        existing_ids = set(self.vector_db.index_to_docstore_id.values())
        docs = []
        ids = []
        for text in texts:
            doc_id = hashutil.generate_hash(text)
            if doc_id in existing_ids:
                print(f"[Indexer] Document with ID '{doc_id}' already exists. Skipping...")
                continue
            ids.append(doc_id)
            docs.append(Document(page_content=text, metadata={"id": doc_id}))
        if not docs:
            print("[Indexer] No new documents to add.")
            return
        self.vector_db.add_documents(documents=docs, ids=ids)
        # self.vector_db.add_texts(texts)
        self.vector_db.save_local(self.index_path)

    def get_retriever(self):
        return self.vector_db.as_retriever()


class RAGModel:
    def __init__(self, llm_type: str, llm_model_name: str, embedding_model_name: str,
                 index_path: str = "kullm.faiss.db", api_key: str = None):
        self.llm_model_name = llm_model_name
        self.embedding_model_name = embedding_model_name
        self.index_path = index_path

        self.llm = LLMFactory.load_llm(llm_type, llm_model_name, api_key)
        self.prompt_template = self._default_prompt_template()
        self.indexer = RAGIndexer(self.embedding_model_name, self.index_path)
        self.rag_chain = self._build_rag_chain()
        print("[RAGModel] Model loaded.")

    def _default_prompt_template(self):
        prompt_template = PromptTemplate(
            input_variables=["context", "question"],
            template="""
Use the following context to answer the question.
    
Context:
{context}
Provide a concise answer to the following question using the relevant information provided above:

Question:
{question}
"""
        )
        return prompt_template

    def _build_rag_chain(self):
        print("[RAGModel] Building RAG chain...")
        return RetrievalQA.from_chain_type(
            llm=self.llm,
            chain_type='stuff',
            retriever=self.indexer.get_retriever(),
            chain_type_kwargs={"prompt": self.prompt_template},
            verbose=False
        )

    def add_documents(self, texts: list):
        self.indexer.add_documents(texts)
        self.rag_chain.retriever = self.indexer.get_retriever()

    def ask(self, query: str, debug=False) -> str:
        print(f"[RAGModel] Query: {query}")

        if debug:
            # Retrieve documents
            retrieved_docs = self.indexer.get_retriever().get_relevant_documents(query, k=3)
            print("Retrieved documents:")
            for i, doc in enumerate(retrieved_docs):
                print(f"Doc {i + 1} content:\n{doc.page_content}\n---")

            # Format function
            def format_docs(docs):
                print("[Formatter] Raw docs passed to formatter:")
                print(docs)
                return "\n\n".join(doc.page_content for doc in docs)

            # Set up the chain
            context_chain = self.indexer.get_retriever() | format_docs
            qa_chain = (
                    {
                        "context": context_chain,
                        "question": RunnablePassthrough(),
                    }
                    | self.prompt_template
                    | self.llm
                    | StrOutputParser()
            )

            # If you want to see the prompt input, intercept it using a custom wrapper
            def debug_prompt_input(input_dict):
                context = input_dict["context"]
                question = input_dict["question"]
                final_input = self.prompt_template.format(context=context, question=question)
                print("\n[Final prompt to LLM]:\n", final_input)
                return {"context": context, "question": question}

            # Build full chain with debug print before the LLM
            qa_chain = (
                    {
                        "context": context_chain,
                        "question": RunnablePassthrough(),
                    }
                    | RunnableLambda(debug_prompt_input)
                    | self.prompt_template
                    | self.llm
                    | StrOutputParser()
            )

            # Run the chain with the actual query (not hardcoded)
            response = qa_chain.invoke(query)
        else:
            response = self.rag_chain.invoke(query)
        return response if isinstance(response, str) else response.get("result", "[No answer generated]")

In [2]:
# Load RAG model
llm_type = "localapi"  # [openai, anthropic, gemini, huggingfacehub, llamacpp, localapi]
llm_model_name = "/Users/ashokpant/.cache/lm-studio/models/nomic-ai/nomic-embed-text-v1.5-GGUF/nomic-embed-text-v1.5.Q8_0.gguf"
embedding_model_name = "thenlper/gte-small"

rag_system = RAGModel(llm_type=llm_type, llm_model_name=llm_model_name, embedding_model_name=embedding_model_name)

[Indexer] Loading embedding model...
[Indexer] Loading FAISS index from 'kullm.faiss.db'
[RAGModel] Building RAG chain...
[RAGModel] Model loaded.


In [3]:
# Define your knowledge base
knowledge_base_texts = [
    "The Income generated by the film in 2014 was over $677 million worldwide.",
    "This made it the tenth-highest grossing film of that year.",
    "With subsequent releases, total earnings reached approximately $773 million.",
    "The release format transitioned from film stock to digital projectors in the US."
]

# Add documents and ask a question
rag_system.add_documents(knowledge_base_texts)

question = "Income generated"
answer = rag_system.ask(question)
print("\n[ANSWER]", answer)

[Indexer] Adding 4 document(s) to the index...
[Indexer] Document with ID 'dadb231909eb817a6494188260f986a413edb4a8e1a1346a071ee0f22a775ea3' already exists. Skipping...
[Indexer] Document with ID '0b7699a32251e9efc144375390cc1733b31c8149127de3b4e1f000c4bc5ab929' already exists. Skipping...
[Indexer] Document with ID 'fd126039ff097c6af3a98509da1eb10f52394dd21f2492f60e85ab46fc8b116e' already exists. Skipping...
[Indexer] Document with ID '118088041bc3f81844dcf05c0a0b7d4be92fe8d40f5923ce694076a9c8fd9487' already exists. Skipping...
[Indexer] No new documents to add.
[RAGModel] Query: Income generated

[ANSWER] Response:
The Income generated by the 2014 release was over $677 million worldwide, with subsequent reléeases increasing earnings to approximately $773 million. The film was released in the US, resulting in a total income of $773 million for the year.


In [4]:
# Add new documents to the index
new_documents = [
    "The film was released in 2014.",
    "It was a box office success."
]
rag_system.add_documents(new_documents)
print("\n[INFO] New documents added to the index.")

# Ask another question
question = "What year was the film released?"
answer = rag_system.ask(question)
print("\n[ANSWER]", answer)

[Indexer] Adding 2 document(s) to the index...
[Indexer] Document with ID '53ca7ebc4f75d078ecc5aaece056d3ccd4d5fd325ecd08f992062e6f62984da8' already exists. Skipping...
[Indexer] Document with ID 'f30aa22959631fb30ff5f94397b767cb87dc3d2436e95d5c5b0dd05b9d791c7c' already exists. Skipping...
[Indexer] No new documents to add.

[INFO] New documents added to the index.
[RAGModel] Query: What year was the film released?

[ANSWER] Answer: The film was releaased in 2014, which made it the fourth-highest box office gross of that year.


In [5]:
# Ask another question
question = "tell me a joke"
answer = rag_system.ask(question)
print("\n[ANSWER]", answer)

[RAGModel] Query: tell me a joke

[ANSWER] Telling a joke is easy! A joke is an entertaining story told for fun and amusement. There are many jokes that are known to all, but they may not be among the best. Some of these jokes may be difficult or complex, while others could be considered childish. In this context, it can be described as a lighthearted or humorous event where laughter is exhilarating and contagious. It's a fun way to break the ice and keep things moving along! I hope this helps clarify the situation that has been presented above. Please let me know if you have any further questions or concerns. Thank you for your prompt response. If there are any other contexts that require an instruction, I can provide more details about the film and its reliease format. I'd be happy to give further insights on that specific matter.
