#### Query Translation in Retrieval-Augmented Generation (RAG) pipeline refers to the process of transforming the initial query into various forms or sub-queries that can enhance the retrieval process.

In [None]:
# Query Decomposition: Multi-query, Step-back, RAG-Fusion
# Query Decomposition involves breaking down a complex query into simpler sub-queries. This can help in cases where a single query might be too broad or too complex for effective retrieval.

## Multi-Query

In [1]:
#import Packages
import os
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings.sentence_transformer import SentenceTransformerEmbeddings
from langchain_openai import ChatOpenAI
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
load_dotenv()
import warnings
warnings.filterwarnings('ignore')
import logging
import getpass

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)


## initiate Vectordb here i have used persist vectordb which means i am loading the locally stored vectordb.
vectordb_path = "Vector_db"

if not os.environ.get("OPENAI_API_KEY"):
  os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter API key for OpenAI: ")


embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectordb = Chroma(persist_directory=vectordb_path,
                                       embedding_function=embeddings)
vectorstore_retriever = vectordb.as_retriever(
                search_kwargs={
                    "k": 1
                }
            )
llm = ChatOpenAI(temperature=0)

## Here we use MultiQueryRetriever from langcahin which will generate multiple queriesand retriever data accordingly.
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

question = "How to store the data into VectorDB?"

unique_docs = retriever_from_llm.invoke(question)
len(unique_docs)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What are the steps involved in saving data to VectorDB?', '2. Can you explain the process of storing information in VectorDB?', '3. How can I input data into VectorDB for storage?']


0

### 🔍 Query Generation, Retrieval & Fusion Process

- **Query Generation:**  
  Generate multiple sub-queries from the user’s input to capture diverse perspectives and fully understand the user’s intent.

- **Sub-query Retrieval:**  
  Retrieve relevant information for each sub-query from large datasets and repositories, ensuring comprehensive and in-depth search results.

- **Reciprocal Rank Fusion (RRF):**  
  Merge the retrieved documents using *Reciprocal Rank Fusion (RRF)* to combine their ranks, prioritizing the most relevant and comprehensive results.


In [None]:
from langchain.load import dumps, loads
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI
from operator import itemgetter
from langchain_community.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings



# -----------------------------------------
# 🔹 STEP 1: Setup RAG-Fusion Query Generator
# -----------------------------------------
template = """You are a helpful assistant that generates multiple search queries based on a single input query.
Generate 4 different search queries related to: {question}.
Output only the queries, one per line:"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

llm_query_gen = ChatOpenAI(temperature=0)

generate_queries = (
    prompt_rag_fusion 
    | llm_query_gen
    | StrOutputParser()
    | (lambda x: [q.strip() for q in x.split("\n") if q.strip()])
)



# -----------------------------------------
# 🔹 STEP 2: Load Local Chroma Vector DB
# -----------------------------------------
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
persist_directory = "local_vector_db"
vectordb = Chroma(persist_directory=persist_directory, embedding_function=embeddings)
vectorstore_retriever = vectordb.as_retriever(search_kwargs={"k": 2})



# -----------------------------------------
# 🔹 STEP 3: Define Reciprocal Rank Fusion (RRF)
# -----------------------------------------
def reciprocal_rank_fusion(results: list[list], k=60):
    fused_scores = {}
    for docs in results:
        for rank, doc in enumerate(docs):
            doc_str = dumps(doc)
            fused_scores[doc_str] = fused_scores.get(doc_str, 0) + 1 / (rank + k)
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]
    return reranked_results



# -----------------------------------------
# 🔹 STEP 4: Build Retrieval Chain
# -----------------------------------------
retrieval_chain_rag_fusion = generate_queries | vectorstore_retriever.map() | reciprocal_rank_fusion



# -----------------------------------------
# 🔹 STEP 5: Run Query through RAG-Fusion
# -----------------------------------------
question = "What is LLM?"



# Step 5a: Generate multiple sub-queries
sub_queries = generate_queries.invoke({"question": question})
print("\n🧠 Generated Sub-Queries:")
for i, q in enumerate(sub_queries, 1):
    print(f"{i}. {q}")



# Step 5b: Retrieve documents for each query
retrieved_docs = []
for q in sub_queries:
    docs = vectorstore_retriever.invoke(q)
    retrieved_docs.append(docs)
    print(f"\n📘 Retrieved documents for query '{q}':")
    for d in docs:
        print("-", d.page_content[:200].replace("\n", " "), "...")  # show snippet



# Step 5c: Apply Reciprocal Rank Fusion
reranked_docs = reciprocal_rank_fusion(retrieved_docs)
print(f"\n🔁 Top {len(reranked_docs)} fused documents after RRF:")
for i, (doc, score) in enumerate(reranked_docs[:3], 1):  # limit to top 3
    print(f"{i}. Score={score:.4f} | Snippet: {doc.page_content[:150]}...")



# -----------------------------------------
# 🔹 STEP 6: Final Answer Generation
# -----------------------------------------
from langchain_core.output_parsers import StrOutputParser

llm_answer = ChatOpenAI(temperature=0)
template = """Answer the following question based on this context:

{context}

Question: {question}"""
prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    {"context": lambda _: " ".join([doc.page_content for doc, _ in reranked_docs[:3]]),
     "question": itemgetter("question")}
    | prompt
    | llm_answer
    | StrOutputParser()
)

final_answer = final_rag_chain.invoke({"question": question})
print("\n💬 Final Answer:\n", final_answer)



🧠 Generated Sub-Queries:
1. 1. What does LLM stand for?
2. 2. Benefits of pursuing an LLM degree
3. 3. Top universities offering LLM programs
4. 4. Career opportunities for LLM graduates

📘 Retrieved documents for query '1. What does LLM stand for?':
- 3. Applications  LLMs: Used for a wide range of NLP tasks, from text generation and summarization to translation and sentiment analysis.  Transformers: Employed not just in NLP but also in other areas ...
- 3. Applications  LLMs: Used for a wide range of NLP tasks, from text generation and summarization to translation and sentiment analysis.  Transformers: Employed not just in NLP but also in other areas ...

📘 Retrieved documents for query '2. Benefits of pursuing an LLM degree':
- 3. Applications  LLMs: Used for a wide range of NLP tasks, from text generation and summarization to translation and sentiment analysis.  Transformers: Employed not just in NLP but also in other areas ...
- 3. Applications  LLMs: Used for a wide range of NLP