RAG Fusion with Local LLM.


Install python packages required for langchain

In [None]:
! pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain unstructured sentence-transformers pytesseract unstructured_pytesseract tesseract

Set up os environment variables.
This is to enable visibility of tracing langchain invocations on smith.langchain.com

In [None]:
import os
os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = "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"

Set up indexing for the vector store db

In [None]:
from langchain_community.document_loaders import DirectoryLoader, UnstructuredPDFLoader
from langchain_community.document_loaders import UnstructuredFileLoader
from langchain_community.vectorstores import Chroma
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings.sentence_transformer import (SentenceTransformerEmbeddings,)
loader = DirectoryLoader("./documents/markdown", glob="**/*.md", show_progress=True, loader_cls=UnstructuredFileLoader)
documents = loader.load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=100)
pdf_loader = DirectoryLoader('./documents/pdf', glob="**/*.pdf", show_progress=True, loader_cls=UnstructuredPDFLoader, use_multithreading=True)
pdf_docs = pdf_loader.load()
pdf_splitter = CharacterTextSplitter(chunk_size=600, chunk_overlap=50)
split_docs = text_splitter.split_documents(documents) + pdf_splitter.split_documents(pdf_docs)
embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore = Chroma.from_documents(documents=split_docs,
                                    embedding=embedding_function,
                                    persist_directory="./db")
retriever = vectorstore.as_retriever()

Query the vector store to retrieve query similar documents

In [None]:
query = "What is the code A_100?"
print(vectorstore)
docs = vectorstore.similarity_search(query)
for doc in docs:
    print(f"Document source: {doc.metadata}")
    print(f"Document page_content: {doc.page_content}\n")
    print(f"--------------------------------------------")

Generate multiple search queries based on the user's input question.

In [None]:
from langchain_community.chat_models import ChatOllama
from langchain.prompts import ChatPromptTemplate

question = "What are other custom claims to enforce additional access control rules in JWT?"

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple sub-questions related to an input question.
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation.
Generate multiple search queries related to: {question}
The three queries are (3 queries):"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)
from langchain_core.output_parsers import StrOutputParser

def print_ouput(output):
    print(output)
    return output

generate_queries = (
        prompt_rag_fusion
        | ChatOllama(model="ghyghoo8/minicpm-llama3-2_5:8b")
        | StrOutputParser()
        | (lambda llm_response: llm_response.split("\n"))
        | (lambda queries: list(filter(lambda item: item.strip(), queries)))
)

queries_output = []
while len(queries_output) != 3:
    queries_output = generate_queries.invoke({"question":question})

print(queries_output)


Retrieve the related documents to the three queries provided by the LLM. 
Perform ranking of the retrieved documents.

In [None]:
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            previous_score = fused_scores[doc_str]
            fused_scores[doc_str] += 1 / (rank + k)
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results

retrieval_chain = generate_queries | retriever.map() | reciprocal_rank_fusion
docs = retrieval_chain.invoke({"question":question})


print(f"Number of docs retrieved: {len(docs)}")
for doc in docs:
    print(f"Document score: {doc[1]}")
    print(f"Document source: {doc[0].metadata}")
    print(f"Document page content:")
    print(f"\t{doc[0].page_content}")
    print(f"--------------------------------------------\n")

In [None]:
from operator import itemgetter

# RAG
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
        {"context": retrieval_chain,
         "question": itemgetter("question")}
        | prompt
        | ChatOllama(model="ghyghoo8/minicpm-llama3-2_5:8b")
        | StrOutputParser()
)
response = ''
while not response:
    response = final_rag_chain.invoke({"question":question})
print(response)