# WS24 - Intelligente Informationssysteme

## Block 3: Retrieval Augmented Generation

**Part 4: Advanced Retrieval - Question Translation**

1. Multi Query
2. RAG-Fusion
3. Decomposition

In [None]:
## FIRST: Initialize the VectorDB and LLM
from langchain_ollama import OllamaEmbeddings
from langchain_ollama import ChatOllama
from langchain_chroma import Chroma
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma(persist_directory="vector_store", collection_name="lils_blogs", embedding_function=embeddings)
retriever = vectorstore.as_retriever(search_type="similarity") #, search_kwargs={"k": 2})

llm = ChatOllama(model="llama3.2:latest", temperature=0)

## 1. Question Translation: Multi Query

Multi Query: Different perspectives on the same input question

![Multi Query](./media/LangChain_Multi_Query.png "Multi Query")


see: 
- https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb
- https://python.langchain.com/docs/how_to/MultiQueryRetriever/

In [None]:
### Prompt

template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newline. Original question: {question}"""

prompt_template = ChatPromptTemplate.from_template(template)


In [None]:
# LineListOutputParser split the LLM result into a list of queries

from typing import List
from langchain_core.output_parsers import BaseOutputParser

class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return list(filter(None, lines))  # Remove empty lines

output_parser = LineListOutputParser()

In [None]:
generate_queries = (
    prompt_template 
    | llm
    | output_parser
)

In [None]:
question = "What is Task Decomposition?"

multi_queries = generate_queries.invoke(question)
multi_queries

In [None]:
######### Retrieve doc for each query and aggregate 
import langchain
from langchain.load import dumps, loads

def get_unique_union(documents: list[list]):
    """ Unique union of retrieved docs """
    # Flatten list of lists, and convert each Document to string
    flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
    # Get unique documents
    unique_docs = list(set(flattened_docs))
    # Return
    return [loads(doc) for doc in unique_docs]

multi_docs = []
for query in multi_queries:
    multi_docs.append(retriever.invoke(query))
print(len(multi_docs))

new_docs = get_unique_union(multi_docs)
print(len(new_docs))

In [None]:
########## Final RAG Generation ##########
from operator import itemgetter
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

context = "\n\n".join([doc.page_content for doc in new_docs])

runnable = RunnablePassthrough(lambda x: str(x))

final_rag_chain = (
    {"context": itemgetter("context"), 
     "question": itemgetter("question")} 
    | prompt
    | llm
    | StrOutputParser()
)

for answer in  final_rag_chain.stream({"context":context, "question":question}):
    print(answer, end="")

In [None]:
###### Do the same with MultiQueryRetriever #######
from typing import List

from langchain_core.output_parsers import BaseOutputParser
from langchain_core.prompts import PromptTemplate

from langchain.retrievers.multi_query import MultiQueryRetriever

class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return list(filter(None, lines))  # Remove empty lines

output_parser = LineListOutputParser()

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

llm = ChatOllama(model="llama3.2:latest", temperature=0)

# Chain
llm_chain = QUERY_PROMPT | llm | output_parser

multi_query_retriever = MultiQueryRetriever(
    retriever=vectorstore.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

question = "What is Task Decomposition?"

# Results

new_docs = multi_query_retriever.invoke(question)
len(new_docs)

# Use the new docs and concatenate them to one context for final RAG Generation or use RAG Fusion

## 2. RAG Fusion

RAG-Fusion bridges the gap between what users explicitly ask and what they intend to ask.

![RAG Fusion](./media/LangChain_RAG_Fusion.png "RAG Fusion")


see: 
- https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb
- https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1

In [None]:
##### Reciprocal RAG Fusion
from langchain.load import dumps, loads

def reciprocal_rank_fusion(results: list[list], k=60):
    """ Reciprocal_rank_fusion that takes multiple lists of ranked documents 
        and an optional parameter k used in the RRF formula """
    
    # Initialize a dictionary to hold fused scores for each unique document
    fused_scores = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list, with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Convert the document to a string format to use as a key (assumes documents can be serialized to JSON)
            doc_str = dumps(doc)
            # If the document is not yet in the fused_scores dictionary, add it with an initial score of 0
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
            # Retrieve the current score of the document, if any
            previous_score = fused_scores[doc_str]
            # Update the score of the document using the RRF formula: 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order to get the final reranked results
    reranked_results = [
        (loads(doc), score)
        for doc, score in sorted(fused_scores.items(), key=lambda x: x[1], reverse=True)
    ]

    # Return the reranked results as a list of tuples, each containing the document and its fused score
    return reranked_results
print(len(new_docs))
rrf_docs = reciprocal_rank_fusion([new_docs]) # expects a list of lists
print(len(rrf_docs))

In [None]:
n = 5 # Use the top 5 reranked documents
new_docs = [doc for (doc, rank) in rrf_docs[0:n]]
# Use the new docs and concatenate them to one context for final RAG Generation!

## 3. Decomposition

Decompose the question into subqueries and answer recursively or individually:

![Decomposition: Answer recursively](./media/LangChain_Decomposition1.png "Decomposition")
![Decomposition: Answer individually](./media/LangChain_Decomposition2.png "Decomposition")

see: 
- https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_5_to_9.ipynb

In [None]:
# First decompose the question using the llm.
    
from langchain.prompts import ChatPromptTemplate

# Decomposition Prompt
#template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
#The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
#Generate multiple search queries related to: {question}.\n
#Output (3 queries):"""

template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
Generate multiple search queries related to: {question} Retrun only the list of search queries.\n
Output (3 queries):"""

prompt_decomposition = ChatPromptTemplate.from_template(template)

In [None]:
from langchain_ollama import ChatOllama
from langchain_core.output_parsers import StrOutputParser

# LLM
llm = ChatOllama(model="llama3.2:latest", temperature=0)

# Chain
generate_queries_decomposition = ( prompt_decomposition | llm | StrOutputParser() | (lambda x: x.split("\n")))

# Run
question = "What are the main components of an LLM-powered autonomous agent system?"
questions = generate_queries_decomposition.invoke({"question":question})

In [None]:
questions

#### Answer Recursively

In [None]:
# Final RAG Prompt
template = """Here is the question you need to answer:

\n --- \n {question} \n --- \n

Here is any available background question + answer pairs:

\n --- \n {q_a_pairs} \n --- \n

Here is additional context relevant to the question: 

\n --- \n {context} \n --- \n

Use the above context and any background question + answer pairs to answer the question: \n {question}
"""

decomposition_prompt = ChatPromptTemplate.from_template(template)

In [None]:
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser

def format_qa_pair(question, answer):
    """Format Q and A pair"""
    
    formatted_string = ""
    formatted_string += f"Question: {question}\nAnswer: {answer}\n\n"
    return formatted_string.strip()

# llm
llm = ChatOllama(model="llama3.2:latest", temperature=0, max_length=400)

q_a_pairs = ""
for q in questions:
    
    rag_chain = (
    {"context": itemgetter("question") | retriever, 
     "question": itemgetter("question"),
     "q_a_pairs": itemgetter("q_a_pairs")} 
    | decomposition_prompt
    | llm
    | StrOutputParser())

    answer = rag_chain.invoke({"question":q,"q_a_pairs":q_a_pairs})
    q_a_pair = format_qa_pair(q,answer)
    print(q_a_pair)
    print("="*80)
    q_a_pairs = q_a_pairs + "\n---\n"+  q_a_pair

In [None]:
print()

In [None]:
##### Final Answer
print(answer)

#### Answer Individually 

In [None]:
# Answer each sub-question individually 
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_core.output_parsers import StrOutputParser
from langchain_ollama import ChatOllama

# RAG prompt
prompt = """You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:"""

prompt_template = ChatPromptTemplate.from_template(prompt)

# llm
llm = ChatOllama(model="llama3.2:latest", temperature=0, max_length=400)

def retrieve_and_rag(question,prompt_rag,sub_question_generator_chain):
    """RAG on each sub-question"""
    
    # Use our decomposition / 
    sub_questions = sub_question_generator_chain.invoke({"question":question})
    
    # Initialize a list to hold RAG chain results
    rag_results = []
    
    for sub_question in sub_questions:
        
        # Retrieve documents for each sub-question
        #retrieved_docs = retriever.get_relevant_documents(sub_question)
        retrieved_docs = retriever.invoke(sub_question)
        
        # Use retrieved documents and sub-question in RAG chain
        answer = (prompt_rag | llm | StrOutputParser()).invoke({"context": retrieved_docs, 
                                                                "question": sub_question})
        rag_results.append(answer)
    
    return rag_results,sub_questions

# Wrap the retrieval and RAG process in a RunnableLambda for integration into a chain
answers, questions = retrieve_and_rag(question, prompt_template, generate_queries_decomposition)

In [None]:
for question, answer in zip(questions, answers):
    print(f"Question\n {question}\n")
    print(f"Answer\n {answer}\n")