# Process for RAG applications

* Query transformation
* Routing
* Query construction
* Multimodal Index
* Postprocessing

## Query transformation


In [19]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain_ollama import OllamaEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import OpenSearchVectorSearch
import dotenv
import os

dotenv.load_dotenv()
os_key = os.getenv("OPENSEARCH_INITIAL_ADMIN_PASSWORD")

raw_documents = TextLoader('../data/philotest.txt', encoding='utf-8').load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, 
    chunk_overlap=200)
documents = text_splitter.split_documents(raw_documents)

# embed each chunk and insert it into the vector store
model = OllamaEmbeddings(
    base_url='http://127.0.0.1:11434',
    model="deepseek-r1:1.5b"
)
docsearch = OpenSearchVectorSearch.from_documents(
    documents,
    model,
    opensearch_url="https://localhost:9200",
    http_auth=("admin", os_key),
    use_ssl=False,
    verify_certs=False,
    ssl_assert_hostname=False,
    ssl_show_warn=False,
    engine="faiss",
)

In [None]:
retriever = docsearch.as_retriever()

# fetch the 4 most relevant documents
docs = retriever.invoke("""Who are the key figures in the ancient greek history 
    of philosophy?""")

In [21]:
from langchain_ollama import ChatOllama
from langchain_core.prompts import ChatPromptTemplate


prompt = ChatPromptTemplate.from_template("""Answer the question based only on 
    the following context:
{context}

Question: {question}
""")

llm = ChatOllama(
    base_url='http://127.0.0.1:11434',
    model="deepseek-r1:1.5b",
    temperature=0,
)

In [22]:
from langchain_core.runnables import chain

@chain
def qa(input):
    # fetch relevant documents 
    docs = retriever.get_relevant_documents(input)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    return answer

In [23]:
answer = qa.invoke("""Today I woke up and brushed my teeth, then I sat down to read the 
    news. But then I forgot the food on the cooker. Who are some key figures in 
    the ancient greek history of philosophy?""")

answer.content

"<think>\nOkay, so I need to figure out who some key figures are in the ancient Greek history of philosophy based on the given context. Let me go through each document one by one and see what information is provided.\n\nFirst, there's Document id 'a863251f-1060-41b0-ba99-108f2e757856'. It mentions that it has long been established that ancient Greek philosophy begins in the Greek colonies of Ionia along the coast of Asia Minor. The first three Pre-Socratic philosophers all came from Ionian Miletus and the Milesian School is the first Greek philosophical school of thought. So, the key figures here are Thales, Anaximander, and Anaxagoras.\n\nNext, Document id '1568973b-41cf-42e3-ac46-647b40a4c3aa' talks about the Seven Sages of Ancient Greece. It mentions Socrates, Plato, and Xenophon as the main figures. So, these are Socrates, Plato, and Xenophon.\n\nThen, Document id 'fdae5ff6-4d99-45b8-9ec1-edfbc158919e' discusses Socrates' focus on individual character development and his teachings 

### Rewrite-Retrieve-Read (Rewrite the question)

In [24]:
rewrite_prompt = ChatPromptTemplate.from_template("""Provide a better search 
    query for web search engine to answer the given question, end the queries 
    with ’**’. Question: {x} Answer:""")

def parse_rewriter_output(message):
    return message.content.strip('"').strip("**")

rewriter = rewrite_prompt | llm | parse_rewriter_output

In [25]:
change_query = rewrite_prompt.invoke("""Today I woke up and brushed my teeth, then I sat down to read 
    the news. But then I forgot the food on the cooker. Who are some key 
    figures in the ancient greek history of philosophy?""")

In [26]:
change_query

ChatPromptValue(messages=[HumanMessage(content='Provide a better search \n    query for web search engine to answer the given question, end the queries \n    with ’**’. Question: Today I woke up and brushed my teeth, then I sat down to read \n    the news. But then I forgot the food on the cooker. Who are some key \n    figures in the ancient greek history of philosophy? Answer:', additional_kwargs={}, response_metadata={})])

In [27]:
llm.invoke(change_query)

AIMessage(content='<think>\nOkay, so I need to find a better search query for this question about key figures in ancient Greek philosophy. The original query is "Who are some key figures in the ancient greek history of philosophy?" and it should end with **.\n\nFirst, I\'ll break down the components of the question. It\'s asking about key figures in the ancient Greek history of philosophy. So, the main elements are:\n\n1. Ancient Greece\n2. History of philosophy\n3. Key figures\n\nI want to make sure my search query is comprehensive and covers all these aspects without being too broad or narrow.\n\nLet me think about how to phrase this. Maybe I can combine some parts for better clarity. For example, "ancient Greek history of philosophy" could be a good start. Then, adding "key figures" would directly address the user\'s question.\n\nI should also consider if there are any specific terms that might help narrow it down further. Words like "philosophers," "scholarship," or "contribution" 

In [28]:
rewrite_prompt = ChatPromptTemplate.from_template("""Provide a better search 
    query for web search engine to answer the given question, end the queries 
    with ’**’. Question: {x} Answer:""")

def parse_rewriter_output(message):
    return message.content.strip('"').strip("**")

rewriter = rewrite_prompt | llm | parse_rewriter_output

@chain
def qa_rrr(input):
    # rewrite the query
    new_query = rewriter.invoke(input)
    # fetch relevant documents 
    docs = retriever.get_relevant_documents(new_query)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    return answer

# run
qa_rrr.invoke("""Today I woke up and brushed my teeth, then I sat down to read 
    the news. But then I forgot the food on the cooker. Who are some key 
    figures in the ancient greek history of philosophy?""")

AIMessage(content="<think>\nOkay, so I need to figure out who some key figures are in the ancient Greek history of philosophy based on the provided documents. Let me go through each document one by one and see what information is relevant.\n\nFirst, Document 1: It's about Mary Harrsch's work on Thales of Miletus. The content talks about Thales seeking his own cause in a reasoned inquiry into nature, which is interesting because it shows that Thales was a key figure in ancient Greek philosophy. So, Thales should be included.\n\nDocument 2: This document discusses the Seven Sages of Ancient Greece, specifically mentioning Socrates, Plato, and Xenophon. These are all major figures in ancient Greek philosophy, so they should definitely be on the list.\n\nDocument 3: It talks about Amplitude Studios' explanation of ancient Greek philosophy, focusing on Ionian Miletus and the Milesian School. This reinforces that Thales was a key figure, as it's part of the early Ionian philosophical traditi

In [34]:
def remove_think(answer):
    if answer.content.find("</think>\n\n") > 0:
        return answer.content.split("</think>\n\n")[1]
    else:
        return answer.content


In [35]:
@chain
def qa_rrr_nothink(input):
    # rewrite the query
    new_query = rewriter.invoke(input)
    # fetch relevant documents 
    docs = retriever.get_relevant_documents(new_query)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    # Remove think tags if exist
    formatted_answer = remove_think(answer)
    
    return formatted_answer 

In [36]:
qa_rrr_nothink.invoke("""Today I woke up and brushed my teeth, then I sat down to read 
    the news. But then I forgot the food on the cooker. Who are some key 
    figures in the ancient greek history of philosophy?""")

'The key figures in the ancient Greek history of philosophy include:\n\n1. **Thales** - A pre-Socratic philosopher from Ionian Miletus.\n2. **Anaximander** - One of the first Pre-Socratic philosophers, known for his belief in the primordial nature of the universe.\n3. **Anaxagoras** - Another early Pre-Socratic philosopher who taught that all things are derived from water.\n4. **Socrates** - A central figure whose teachings influenced many later philosophers and thinkers.\n5. **Plato** - A key figure known for his idealist views on reality and education.\n6. **Xenophon** - One of the Seven Sages, a prominent figure in ancient Greek philosophy.\n7. **Antisthenes** - Another significant figure in the Socratic school, known for his role in spreading Platonic ideas.\n8. **Plotinus** - A key figure associated with Neo-Platonism, who emphasized the divine nature of existence.\n\nThese figures collectively shaped the foundations of Western philosophical thought.'

### Multiquery Retrieval (Many queries)

In [40]:
from langchain.prompts import ChatPromptTemplate

perspectives_prompt = ChatPromptTemplate.from_template("""You are an AI language 
    model assistant. Your task is to generate five different versions of the 
    given user question to retrieve relevant documents from a vector database. 
    By generating multiple perspectives on the user question, your goal is to 
    help the user overcome some of the limitations of the distance-based 
    similarity search. Provide these alternative questions separated by 
    newlines. Original question: {question}""")

def parse_queries_output(message):
    return message.split('\n')

query_gen = perspectives_prompt | llm | remove_think | parse_queries_output

In [41]:
def get_unique_union(document_lists):
    # Flatten list of lists, and dedupe them
    deduped_docs = {
        doc.page_content: doc
        for sublist in document_lists for doc in sublist
    }
    # return a flat list of unique docs
    return list(deduped_docs.values())

retrieval_chain = query_gen | retriever.batch | get_unique_union

In [42]:
prompt = ChatPromptTemplate.from_template("""Answer the following question based 
    on this context:

{context}

Question: {question}
""")

@chain
def multi_query_qa(input):
    # fetch relevant documents 
    print(query_gen.invoke(input))

    docs = retrieval_chain.invoke(input)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    # Remove think tags if exist
    formatted_answer = remove_think(answer)
    
    return formatted_answer

# run
multi_query_qa.invoke("""Who are some key figures in the ancient greek history 
    of philosophy?""")

['1. Who were some key figures in ancient Greek philosophy based on their historical timeline?', '2. What were the major contributions of specific key figures to ancient Greek philosophy?', '3. How did both classical and modern philosophers from ancient Greece shape later philosophical thought?', '4. Which key figures in ancient Greek history explored major themes or concepts in philosophy?', '5. How have the philosophies of key figures in ancient Greece influenced subsequent cultures?']


'The key figures in the ancient Greek history of philosophy include:\n\n1. **Socrates**: Often referred to as a "sophist," he taught without expecting reward and is central to Western philosophy through his dialogues like The Republic and Phaedo.\n\n2. **Plato**: A student of Socrates, he wrote extensively on ethics, metaphysics, and the theory of forms, influencing many later thinkers.\n\n3. **Aristotle**: A student of Plato, he explored various topics including biology, politics, and ethics, with his work on the four elements (earth, water, air, fire) being particularly influential.\n\n4. **Timaeus**: A student of Plato, he discussed the nature of the universe and the idea that everything is made of four elements.\n\n5. **Plato\'s Academy**: The intellectual center in Athens where he taught students like Timaeus and others, fostering a culture of critical thinking and inquiry.\n\nThese figures collectively contributed to the development of Western philosophy through their dialogues, 

### RAG Fusion (Reciprocal Rank Fusion)

In [52]:
from langchain.prompts import ChatPromptTemplate
from langchain_ollama import ChatOllama

prompt_rag_fusion = ChatPromptTemplate.from_template("""You are a helpful 
    assistant that generates multiple search queries based on a single input 
    query. \n
    Generate multiple search queries related to: {question} \n
    Output (Only 4 queries enumerate):""")

def parse_queries_output(message):
    return message.content.split('\n')

llm = ChatOllama(model="qwen:0.5b",temperature=0)

query_gen = prompt_rag_fusion | llm | parse_queries_output

query_gen.invoke("""Who are some key figures in the ancient greek history 
    of philosophy?""")

['Sure, here are 4 possible search queries:',
 '',
 '1. Who were some key figures in the ancient Greek history?',
 '2. What philosophical ideas did the ancient Greeks develop and apply to their daily lives?',
 '3. How did the ancient Greeks view and understand the concept of democracy? ',
 '4. What role did the ancient Greeks play in shaping the modern world, both in terms of philosophy and science?']

In [54]:
def reciprocal_rank_fusion(results: list[list], k=10):
    """reciprocal rank fusion on multiple lists of ranked documents 
       and an optional parameter k used in the RRF formula
    """
    
    # Initialize a dictionary to hold fused scores for each document
    # Documents will be keyed by their contents to ensure uniqueness
    fused_scores = {}
    documents = {}

    # Iterate through each list of ranked documents
    for docs in results:
        # Iterate through each document in the list,
        # with its rank (position in the list)
        for rank, doc in enumerate(docs):
            # Use the document contents as the key for uniqueness
            doc_str = doc.page_content
            # If the document hasn't been seen yet,
            # - initialize score to 0
            # - save it for later
            if doc_str not in fused_scores:
                fused_scores[doc_str] = 0
                documents[doc_str] = doc
            # Update the score of the document using the RRF formula:
            # 1 / (rank + k)
            fused_scores[doc_str] += 1 / (rank + k)

    # Sort the documents based on their fused scores in descending order 
    # to get the final reranked results
    reranked_doc_strs = sorted(
        fused_scores, key=lambda d: fused_scores[d], reverse=True
    )
    # retrieve the corresponding doc for each doc_str
    return [
        documents[doc_str]
        for doc_str in reranked_doc_strs
    ]

retrieval_chain = query_gen | retriever.batch | reciprocal_rank_fusion

In [55]:
prompt = ChatPromptTemplate.from_template("""Answer the following question based 
    on this context:

{context}

Question: {question}
""")


@chain
def multi_query_qa_fusion(input):
    # fetch relevant documents 
    docs = retrieval_chain.invoke(input)
    # format prompt
    formatted = prompt.invoke({"context": docs, "question": input})
    # generate answer
    answer = llm.invoke(formatted)
    return answer

multi_query_qa_fusion.invoke("""Who are some key figures in the ancient greek history 
    of philosophy?""")

AIMessage(content='Some key figures in the ancient Greek history of philosophy include Plato, Aristotle, and Socrates. These philosophers were known for their philosophical ideas, such as the concept of knowledge, which is based on the ability to understand and apply knowledge. These philosophers also had a significant impact on the development of Western philosophy, particularly the works of Aristotle and Socrates.', additional_kwargs={}, response_metadata={'model': 'qwen:0.5b', 'created_at': '2025-08-03T01:42:34.0028704Z', 'done': True, 'done_reason': 'stop', 'total_duration': 20660343900, 'load_duration': 41499600, 'prompt_eval_count': 1686, 'prompt_eval_duration': 18041691800, 'eval_count': 71, 'eval_duration': 2574593200, 'model_name': 'qwen:0.5b'}, id='run--193272c2-91b0-4f25-8c8f-3499e7db72bd-0', usage_metadata={'input_tokens': 1686, 'output_tokens': 71, 'total_tokens': 1757})

### Hypothetical Document Embeddings

In [57]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

prompt_hyde = ChatPromptTemplate.from_template("""Please write a passage to 
   answer the question.\n Question: {question} \n Passage:""")

generate_doc = (
    prompt_hyde | llm | StrOutputParser() 
)

In [58]:
retrieval_chain = generate_doc | retriever 

In [59]:
prompt = ChatPromptTemplate.from_template("""Answer the following question based 
    on this context:

{context}

Question: {question}
""")


@chain
def qa(input):
  # fetch relevant documents from the hyde retrieval chain defined earlier
  docs = retrieval_chain.invoke(input)
  # format prompt
  formatted = prompt.invoke({"context": docs, "question": input})
  # generate answer
  answer = llm.invoke(formatted)
  return answer

qa.invoke("""Who are some key figures in the ancient greek history of 
    philosophy?""")

AIMessage(content='Some key figures in the ancient Greek history of philosophy include Plato, Aristotle, and Socrates. These philosophers developed a wide range of philosophical theories and ideas that continue to be influential in modern-day philosophy.', additional_kwargs={}, response_metadata={'model': 'qwen:0.5b', 'created_at': '2025-08-03T01:47:14.5902099Z', 'done': True, 'done_reason': 'stop', 'total_duration': 11722857200, 'load_duration': 35895500, 'prompt_eval_count': 923, 'prompt_eval_duration': 10291991700, 'eval_count': 41, 'eval_duration': 1394400600, 'model_name': 'qwen:0.5b'}, id='run--085bcfb1-0187-453e-8e7c-d33de45fef85-0', usage_metadata={'input_tokens': 923, 'output_tokens': 41, 'total_tokens': 964})

In [60]:
generate_doc.invoke("""Who are some key figures in the ancient greek history of 
    philosophy?""")

'The ancient Greek history of philosophy is filled with many key figures. Some of the most important philosophers of ancient Greece include Plato, Aristotle, and Confucius. These philosophers have had a significant impact on Western culture and society.'