In [2]:
import os
import bs4
from langchain_community.document_loaders import WebBaseLoader
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from dotenv import load_dotenv

# Database part

Load the document

In [9]:
loader = WebBaseLoader(
    web_paths=("https://medium.com/@kbdhunga/advanced-rag-multi-query-retriever-approach-ad8cd0ea0f5b",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#    ),
)
blog_docs = loader.load()

Index and store in vector DB

In [13]:
# Spliting 
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
    chunk_size=50,
    chunk_overlap=10)

# Make splits
splits = text_splitter.split_documents(blog_docs)

# Index
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
from langchain_community.vectorstores import Chroma
vectorstore = Chroma.from_documents(documents=splits,
                                    embedding=embeddings)

  from tqdm.autonotebook import tqdm, trange


Create a retriever

In [15]:
retriever = vectorstore.as_retriever()

# Decomposition

Decomposition prompting

In [18]:
template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} \n
Output (4 queries):"""
prompt_decomposition = ChatPromptTemplate.from_template(template)

from langchain_groq import ChatGroq
llm = ChatGroq(temperature=0,api_key=os.getenv('groq_api_key'))

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run - Main Question
question = "How does the Multi Query method enhance the basic RAG technique?"

generate_queries_decomposition.invoke({"question":question})

['1. "What is the Multi Query method in the context of RAG technique?"',
 '2. "How does the Multi Query method differ from the basic RAG technique?"',
 '3. "What are the benefits of using the Multi Query method to enhance the RAG technique?"',
 '4. "What are some examples of how the Multi Query method has been used to improve the RAG technique?"']

In [19]:
prompt_rag = hub.pull("rlm/rag-prompt")

sub_questions = generate_queries_decomposition.invoke({"question":question})
rag_results = []
for sub_question in sub_questions:
  retrieved_docs = retriever.get_relevant_documents(sub_question)
  answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs,
                                                                "question": sub_question})
  rag_results.append(answer)

  retrieved_docs = retriever.get_relevant_documents(sub_question)


In [23]:
def format_qa_pairs(questions, answers):
    """Format Q and A pairs"""

    formatted_string = ""
    for i, (question, answer) in enumerate(zip(questions, answers), start=1):
        formatted_string += f"Question {i}: {question}\nAnswer {i}: {answer}\n\n"
    return formatted_string.strip()

context = format_qa_pairs(sub_questions, rag_results)
print(context)

Question 1: 1. "What is the Multi Query method in the context of RAG technique?"
Answer 1: The Multi Query method in the context of the RAG (Retrieval-Augmented Generation) technique refers to a process where multiple sets of documents are retrieved based on varied interpretations of the original query. This approach is particularly beneficial for vague queries, as it harnesses the power of diversity in the retrieved documents to produce the final output.

Question 2: 2. "How does the Multi Query method differ from the basic RAG technique?"
Answer 2: The Multi Query method in RAG (Retrieval-Augmented Generation) differs from the basic RAG technique in that it retrieves multiple sets of documents based on varied interpretations of the original query, as opposed to the basic method which retrieves a singular set of documents for an initial query. This makes the Multi Query method more suitable for vague or imprecisely formulated queries.

Question 3: 3. "What are the benefits of using th

In [24]:
template = """Here is a set of Q+A pairs:

{context}

Use these to synthesize an answer to the question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

final_rag_chain.invoke({"context":context,"question":question})

'The Multi Query method enhances the basic RAG (Retrieval-Augmented Generation) technique by introducing a process that retrieves multiple sets of documents based on varied interpretations of the original query. This approach differs from the basic RAG technique, which retrieves a singular set of documents for an initial query. The Multi Query method is particularly beneficial for handling vague or imprecise queries, as it increases the likelihood of finding the most relevant and accurate answers by reducing the dependence on a singular set of documents and mitigating the impact of query phrasing variations on the final outcome. This method has been used to improve the RAG technique by addressing vague or imprecise queries through casting a wider net with multiple queries, thereby increasing the likelihood of pinpointing the most relevant and accurate answers from a vast amount of documents.'