## 1. Self Querying retriever
- https://github.com/samwit/langchain-tutorials/blob/main/RAG/YT_LangChain_RAG_tips_and_Tricks_01_Self_Query.ipynb

In [2]:
import os,sys,getpass
import pprint
os.environ["OPENAI_API_KEY"] = getpass.getpass(prompt='OpenAI API Token:')

In [3]:
from langchain.schema import Document
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma

embeddings = OpenAIEmbeddings() ## probably want to check which embedding endpoint it is using

  warn_deprecated(


- #### Create some sample documents to work with 

In [4]:
docs = [
    Document(
        page_content="Complex, layered, rich red with dark fruit flavors",
        metadata={"name":"Opus One", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Luxurious, sweet wine with flavors of honey, apricot, and peach",
        metadata={"name":"Château d'Yquem", "year": 2015, "rating": 98, "grape": "Sémillon", "color":"white", "country":"France"},
    ),
    Document(
        page_content="Full-bodied red with notes of black fruit and spice",
        metadata={"name":"Penfolds Grange", "year": 2017, "rating": 97, "grape": "Shiraz", "color":"red", "country":"Australia"},
    ),
    Document(
        page_content="Elegant, balanced red with herbal and berry nuances",
        metadata={"name":"Sassicaia", "year": 2016, "rating": 95, "grape": "Cabernet Franc", "color":"red", "country":"Italy"},
    ),
    Document(
        page_content="Highly sought-after Pinot Noir with red fruit and earthy notes",
        metadata={"name":"Domaine de la Romanée-Conti", "year": 2018, "rating": 100, "grape": "Pinot Noir", "color":"red", "country":"France"},
    ),
    Document(
        page_content="Crisp white with tropical fruit and citrus flavors",
        metadata={"name":"Cloudy Bay", "year": 2021, "rating": 92, "grape": "Sauvignon Blanc", "color":"white", "country":"New Zealand"},
    ),
    Document(
        page_content="Rich, complex Champagne with notes of brioche and citrus",
        metadata={"name":"Krug Grande Cuvée", "year": 2010, "rating": 93, "grape": "Chardonnay blend", "color":"sparkling", "country":"New Zealand"},
    ),
    Document(
        page_content="Intense, dark fruit flavors with hints of chocolate",
        metadata={"name":"Caymus Special Selection", "year": 2018, "rating": 96, "grape": "Cabernet Sauvignon", "color":"red", "country":"USA"},
    ),
    Document(
        page_content="Exotic, aromatic white with stone fruit and floral notes",
        metadata={"name":"Jermann Vintage Tunina", "year": 2020, "rating": 91, "grape": "Sauvignon Blanc blend", "color":"white", "country":"Italy"},
    ),
]
vectorstore = Chroma.from_documents(docs, embeddings)

In [5]:
from langchain.llms import OpenAI
from langchain.retrievers.self_query.base import SelfQueryRetriever
from langchain.chains.query_constructor.base import AttributeInfo

In [6]:
metadata_field_info = [
    AttributeInfo(
        name="grape",
        description="The grape used to make the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="name",
        description="The name of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="color",
        description="The color of the wine",
        type="string or list[string]",
    ),
    AttributeInfo(
        name="year",
        description="The year the wine was released",
        type="integer",
    ),
    AttributeInfo(
        name="country",
        description="The name of the country the wine comes from",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="The Robert Parker rating for the wine 0-100", type="integer" #float
    ),
]
document_content_description = "Brief description of the wine"

- Set up the retriever

In [7]:

llm = OpenAI(temperature=0)

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    verbose=True
)
     

  warn_deprecated(


- it should generate a query for semetic matching and a filter for metadata filtering
- see [link](https://python.langchain.com/docs/modules/data_connection/retrievers/self_query/#constructing-from-scratch-with-lcel) to see how to further customize

In [17]:
docs = retriever.invoke("I want a wine that has fruity nodes")
print(docs) 

[Document(page_content='Crisp white with tropical fruit and citrus flavors', metadata={'color': 'white', 'country': 'New Zealand', 'grape': 'Sauvignon Blanc', 'name': 'Cloudy Bay', 'rating': 92, 'year': 2021}), Document(page_content='Intense, dark fruit flavors with hints of chocolate', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Caymus Special Selection', 'rating': 96, 'year': 2018}), Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}), Document(page_content='Complex, layered, rich red with dark fruit flavors', metadata={'color': 'red', 'country': 'USA', 'grape': 'Cabernet Sauvignon', 'name': 'Opus One', 'rating': 96, 'year': 2018})]


In [18]:
# This example specifies a query and a filter
retriever.invoke("I want a wine that has fruity nodes and has a rating above 97")

[Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018})]

- keep top k 

In [19]:
retriever = SelfQueryRetriever.from_llm(
    llm,
    vectorstore,
    document_content_description,
    metadata_field_info,
    enable_limit=True,  ### enable limit
    verbose=True,
)

In [20]:
retriever.get_relevant_documents("what are two that have a rating above 97")

[Document(page_content='Luxurious, sweet wine with flavors of honey, apricot, and peach', metadata={'color': 'white', 'country': 'France', 'grape': 'Sémillon', 'name': "Château d'Yquem", 'rating': 98, 'year': 2015}),
 Document(page_content='Highly sought-after Pinot Noir with red fruit and earthy notes', metadata={'color': 'red', 'country': 'France', 'grape': 'Pinot Noir', 'name': 'Domaine de la Romanée-Conti', 'rating': 100, 'year': 2018})]

## 2.  Parent Document Retriever

- You may want to have small documents, so that their embeddings can most accurately reflect their meaning. If too long, then the embeddings can lose meaning.
- You want to have long enough documents that the context of each chunk is retained.

In [23]:
from langchain.retrievers import ParentDocumentRetriever
from langchain.storage import InMemoryStore
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain_community.vectorstores import Chroma
from langchain.embeddings.openai import OpenAIEmbeddings

In [31]:
## reference : https://python.langchain.com/docs/modules/data_connection/document_loaders/file_directory
data_folder = '/root/workspace/data/Adv_RAG_temp_data'
loader = DirectoryLoader(data_folder, glob="*.txt",loader_cls=TextLoader,show_progress=True,use_multithreading=True)
docs = loader.load()

100%|██████████| 2/2 [00:00<00:00, 275.25it/s]


- Retrieving full documents : In this mode, we want to retrieve the full documents. Therefore, we only specify a child splitter.

In [32]:
# This text splitter is used to create the child documents
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="full_documents", embedding_function=OpenAIEmbeddings()
)
# The storage layer for the parent documents
store = InMemoryStore()
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
)
retriever.add_documents(docs, ids=None)

In [35]:
## we have two docs, so two store ids
list(store.yield_keys())

['34c0989f-e952-41cc-aded-17edfac1f3bf',
 '277aa046-c08c-4563-a1c9-3b873b3f6ba3']

In [42]:
## - Let’s now call the vector store search functionality - we should see that it returns small chunks (since we’re storing the small chunks).
sub_docs = vectorstore.similarity_search("Chinese history")
print('retrieve chunks in docs that is related ')
pprint.pprint(sub_docs)

retrieve chunks in docs that is related 
[Document(page_content='Chinese dynasty, which was at once involved in the usual internal and\nexternal struggles. For the moment, however, the southern region was\nrelatively at peace, and was accordingly attracting settlers.', metadata={'doc_id': '277aa046-c08c-4563-a1c9-3b873b3f6ba3', 'source': '/root/workspace/data/Adv_RAG_temp_data/chinahistory.txt'}),
 Document(page_content='North China before its defeat, and resumed these from 932 on; there were\neven relations with one of the South Chinese states; in the same way,\nKao-li continuously played one state against the other (M. Rogers _et\nal_.).', metadata={'doc_id': '277aa046-c08c-4563-a1c9-3b873b3f6ba3', 'source': '/root/workspace/data/Adv_RAG_temp_data/chinahistory.txt'}),
 Document(page_content='inside China we followed, had for the first time to defend itself\nagainst views and systems entirely opposed to it; for the Turkish and\nMongol peoples who ruled northern China brought with them

In [44]:
print(sub_docs[0].page_content)
print(len(sub_docs[0].page_content))

Chinese dynasty, which was at once involved in the usual internal and
external struggles. For the moment, however, the southern region was
relatively at peace, and was accordingly attracting settlers.
200


- Now we can retrieve the parent document using the parent document retriever

In [46]:
## so for instance in a news article setting, we can using small chunks for matching and return the entire document 
retrieved_docs = retriever.get_relevant_documents("Chinese history")
len(retrieved_docs[0].page_content)

959799

- ##### A second way to use it is to return a larger chunk rather than the entire document 

In [47]:
# This text splitter is used to create the parent documents
parent_splitter = RecursiveCharacterTextSplitter(chunk_size=2000)
# This text splitter is used to create the child documents
# It should create documents smaller than the parent
child_splitter = RecursiveCharacterTextSplitter(chunk_size=400)
# The vectorstore to use to index the child chunks
vectorstore = Chroma(
    collection_name="split_parents", embedding_function=OpenAIEmbeddings()
)
# The storage layer for the parent documents
store = InMemoryStore()

In [49]:
retriever = ParentDocumentRetriever(
    vectorstore=vectorstore,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

retriever.add_documents(docs)

In [50]:
## We can see that there are much more than two documents now - these are the larger chunks.
len(list(store.yield_keys()))

1004

In [51]:
## Let’s make sure the underlying vector store still retrieves the small chunks.
sub_docs = vectorstore.similarity_search("Chinese history")

In [52]:
print(sub_docs[0].page_content)

[Illustration: Map 3. China in the struggle with the Huns or Hsiung Nu
(_roughly 128-100 B.C._)]


In [56]:
## with the parent doc retriever to get larger chunks 
retrieved_docs = retriever.get_relevant_documents("Chinese history")
print(retrieved_docs[0].page_content)

[Illustration: Map 3. China in the struggle with the Huns or Hsiung Nu
(_roughly 128-100 B.C._)]

The first active step taken was to try, in 133 B.C., to capture the
head of the Hsiung-nu state, who was called a _shan-yü_ but the
_shan-yü_ saw through the plan and escaped. There followed a period of
continuous fighting until 119 B.C. The Chinese made countless attacks,
without lasting success. But the Hsiung-nu were weakened, one sign of
this being that there were dissensions after the death of the _shan-yü_
Chün-ch'en, and in 127 B.C. his son went over to the Chinese. Finally
the Chinese altered their tactics, advancing in 119 B.C. with a strong
army of cavalry, which suffered enormous losses but inflicted serious
loss on the Hsiung-nu. After that the Hsiung-nu withdrew farther to the
north, and the Chinese settled peasants in the important region of
Kansu.


## 3.  Hybrid Search BM25 & Ensemble Retriever

In [60]:
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain_community.vectorstores import FAISS, Chroma
from langchain.embeddings.openai import OpenAIEmbeddings
embedding = OpenAIEmbeddings()

In [61]:
doc_list = [
    "I like apples",
    "I like oranges",
    "Apples and oranges are fruits",
]

In [64]:
# initialize the bm25 retriever and faiss retriever
bm25_retriever = BM25Retriever.from_texts(doc_list)
bm25_retriever.k = 2

embedding = OpenAIEmbeddings()
faiss_vectorstore = FAISS.from_texts(doc_list, embedding)
faiss_retriever = faiss_vectorstore.as_retriever(search_kwargs={"k": 2})

- initialize the ensemble retriever : and provide a weighting system 
- it is often useful when people know exactly what they want to search for

In [65]:
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5]
)

In [66]:
docs = ensemble_retriever.get_relevant_documents("apples")
docs

[Document(page_content='I like apples'),
 Document(page_content='Apples and oranges are fruits')]

## 4. Contextual Compressors & Filters 

In [3]:
from langchain.embeddings import HuggingFaceBgeEmbeddings

model_name = "BAAI/bge-small-en-v1.5"
encode_kwargs = {'normalize_embeddings': True} # set True to compute cosine similarity

bge_embeddings = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs=encode_kwargs
)

In [12]:
import os
import pprint
import getpass
from langchain.vectorstores import FAISS
from langchain.schema import Document
from langchain.vectorstores import Chroma
## Text Splitting & Docloader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.storage import InMemoryStore
from langchain_community.document_loaders import TextLoader,DirectoryLoader
from langchain.embeddings import OpenAIEmbeddings


In [9]:
os.environ["OPENAI_API_KEY"] = getpass.getpass(prompt='OpenAI API Token:')

- Load raw data 

In [13]:
data_folder = '/root/workspace/data/Adv_RAG_temp_data'
loader = DirectoryLoader(data_folder, glob="*.txt",loader_cls=TextLoader,show_progress=True,use_multithreading=True)
docs = loader.load()

100%|██████████| 2/2 [00:00<00:00, 263.96it/s]


- basic recursive chunking of text 

In [15]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(docs)

In [16]:
def pretty_print_docs(docs):
    print(f"\n{'-' * 100}\n".join([f"Document {i+1}:\n\n" + d.page_content for i, d in enumerate(docs)]))

- basic retriever 

In [17]:
retriever = FAISS.from_documents(texts,
                                 bge_embeddings
                                #  OpenAIEmbeddings()
                                 ).as_retriever()

docs = retriever.get_relevant_documents("What is Chinese History?")
#lets look at the docs
pretty_print_docs(docs)

Document 1:

Until recently we were dependent for the beginnings of Chinese history
on the written Chinese tradition. According to these sources China's
history began either about 4000 B.C. or about 2700 B.C. with a
succession of wise emperors who "invented" the elements of a
civilization, such as clothing, the preparation of food, marriage, and a
state system; they instructed their people in these things, and so
brought China, as early as in the third millennium B.C., to an
astonishingly high cultural level. However, all we know of the origin of
civilizations makes this of itself entirely improbable; no other
civilization in the world originated in any such way. As time went on,
Chinese historians found more and more to say about primeval times. All
these narratives were collected in the great imperial history that
appeared at the beginning of the Manchu epoch. That book was translated
into French, and all the works written in Western languages until recent
---------------------------

- ### Adding contextual compression with an LLMChainExtractor
- it will do a summarization based on each retrieved chunk and see if they are useful 
- then you can pass them into llm to generate answer 

In [18]:
from langchain.llms import OpenAI
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

In [19]:
# making the compressor
llm = OpenAI(temperature=0)
compressor = LLMChainExtractor.from_llm(llm)

  warn_deprecated(


In [20]:
# it needs a base retriever (we're using FAISS Retriever) and a compressor (Made above)
compression_retriever = ContextualCompressionRetriever(base_compressor=compressor,
                                                       base_retriever=retriever)

In [25]:
# compressor prompt
compressor.llm_chain.prompt

PromptTemplate(input_variables=['context', 'question'], output_parser=NoOutputParser(), template='Given the following question and context, extract any part of the context *AS IS* that is relevant to answer the question. If none of the context is relevant return NO_OUTPUT. \n\nRemember, *DO NOT* edit the extracted parts of the context.\n\n> Question: {question}\n> Context:\n>>>\n{context}\n>>>\nExtracted relevant parts:')

In [26]:
compressed_docs = compression_retriever.get_relevant_documents("What is Chinese History?")
pretty_print_docs(compressed_docs)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Document 1:

- Until recently we were dependent for the beginnings of Chinese history
on the written Chinese tradition. According to these sources China's
history began either about 4000 B.C. or about 2700 B.C. with a
succession of wise emperors who "invented" the elements of a
civilization, such as clothing, the preparation of food, marriage, and a
state system; they instructed their people in these things, and so
brought China, as early as in the third millennium B.C., to an
astonishingly high cultural level. However, all we know of the origin of
civilizations makes this of itself entirely improbable; no other
civilization in the world originated in any such way. As time went on,
Chinese historians found more and more to say about primeval times. All
these narratives were collected in the great imperial history that
appeared at the beginning of the Manchu epoch.
----------------------------------------------------------------------------------------------------
Document 2:

- Chapter

- ### More built-in compressors: filters
- LLMChainFilter
    - Uses an LLM chain to select out the queries to show the final LLM - This could be shown to a model fine tuned to do this
    - "YES" we show it or "NO" we don't show it

In [27]:
from langchain.retrievers.document_compressors import LLMChainFilter

_filter = LLMChainFilter.from_llm(llm)

- basically use llm to determine if it is relevant 

In [28]:
_filter.llm_chain.prompt

PromptTemplate(input_variables=['context', 'question'], output_parser=BooleanOutputParser(), template="Given the following question and context, return YES if the context is relevant to the question and NO if it isn't.\n\n> Question: {question}\n> Context:\n>>>\n{context}\n>>>\n> Relevant (YES / NO):")

In [35]:
compression_retriever = ContextualCompressionRetriever(base_compressor=_filter, base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What is Chinese History?")



In [37]:
compressed_docs

[Document(page_content='Until recently we were dependent for the beginnings of Chinese history\non the written Chinese tradition. According to these sources China\'s\nhistory began either about 4000 B.C. or about 2700 B.C. with a\nsuccession of wise emperors who "invented" the elements of a\ncivilization, such as clothing, the preparation of food, marriage, and a\nstate system; they instructed their people in these things, and so\nbrought China, as early as in the third millennium B.C., to an\nastonishingly high cultural level. However, all we know of the origin of\ncivilizations makes this of itself entirely improbable; no other\ncivilization in the world originated in any such way. As time went on,\nChinese historians found more and more to say about primeval times. All\nthese narratives were collected in the great imperial history that\nappeared at the beginning of the Manchu epoch. That book was translated\ninto French, and all the works written in Western languages until recent', 

- ### EmbeddingsFilter
- Use an Embedding model to filter out the results that are closest to the query

In [38]:
from langchain.embeddings import OpenAIEmbeddings
from langchain.retrievers.document_compressors import EmbeddingsFilter

embeddings = OpenAIEmbeddings()
embeddings_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)
compression_retriever = ContextualCompressionRetriever(base_compressor=embeddings_filter, base_retriever=retriever)

compressed_docs = compression_retriever.get_relevant_documents("What is Chinese History?")
compressed_docs ## will add a state with embedding and query similarity 

[_DocumentWithState(page_content='Until recently we were dependent for the beginnings of Chinese history\non the written Chinese tradition. According to these sources China\'s\nhistory began either about 4000 B.C. or about 2700 B.C. with a\nsuccession of wise emperors who "invented" the elements of a\ncivilization, such as clothing, the preparation of food, marriage, and a\nstate system; they instructed their people in these things, and so\nbrought China, as early as in the third millennium B.C., to an\nastonishingly high cultural level. However, all we know of the origin of\ncivilizations makes this of itself entirely improbable; no other\ncivilization in the world originated in any such way. As time went on,\nChinese historians found more and more to say about primeval times. All\nthese narratives were collected in the great imperial history that\nappeared at the beginning of the Manchu epoch. That book was translated\ninto French, and all the works written in Western languages until

- ## Pipelines
    - ### Stringing compressors and document transformers together
    - DocumentCompressorPipeline allows us to string things together.
    - BaseDocumentTransformers - can do transformations on the docs -eg. split the text and
    - EmbeddingsRedundantFilter - filter out what is not related after a split or transformation

In [43]:
from langchain.document_transformers import EmbeddingsRedundantFilter
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.text_splitter import CharacterTextSplitter

splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=0, separator=". ")

redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
relevant_filter = EmbeddingsFilter(embeddings=embeddings, similarity_threshold=0.76)

## making the pipeline
pipeline_compressor = DocumentCompressorPipeline(
    transformers=[splitter, compressor, redundant_filter, relevant_filter]
)

In [44]:
compression_retriever = ContextualCompressionRetriever(base_compressor=pipeline_compressor,
                                                       base_retriever=retriever)
compressed_docs = compression_retriever.get_relevant_documents("What is Chinese History?")
compressed_docs[:2]

Created a chunk of size 304, which is longer than the specified 300
Created a chunk of size 399, which is longer than the specified 300




[_DocumentWithState(page_content='"China\'s history down into the three large periods--"Antiquity", "The Middle Ages", and "Modern Times""', metadata={'source': '/root/workspace/data/Adv_RAG_temp_data/chinahistory.txt'}, state={'embedded_doc': [0.015720563599641173, 0.004834137000035384, 0.018041462108573336, -0.015682094392487713, -0.01895186896712192, 6.423844686951297e-06, -0.027543041488473273, -0.026235131286090587, -0.023375682503459966, -0.010386342273432044, -0.005994586720162715, 0.03482630567008694, -0.007385843766969564, -0.004943129672121024, -0.001565164061242393, 0.01596419384015143, 0.011001829098792425, 0.006584428183522871, 0.0050777672405456385, 0.0013880513183491651, -0.02736352535145545, 0.019413484551803455, -0.0019586589930397783, -0.022837130367116508, -0.004651414319652693, 0.013656116848066252, 0.019503243551634866, -0.032056612161997726, 0.007296085232799405, -0.010565859341772363, -0.004125686028462472, -0.0001565765142604166, -0.01020041398100698, -0.0321079

### HyDE - Hypothetical Document Embeddings
- https://www.youtube.com/watch?v=v_BnBEubv58&list=PL8motc6AQftn-X1HkaGG9KjmKtWImCKJS&index=11
- https://colab.research.google.com/drive/1ncrpizJUITFaZ2E7P3ZV9mpNOOds7TMU?usp=sharing#scrollTo=Ln5WZyf05DVh

- the idea is to generate some answers given a question
- then use the answer to retrieve 

I am not sure this is going to be reliable for most situations 

## RAG Fusion
- https://towardsdatascience.com/forget-rag-the-future-is-rag-fusion-1147298d8ad1
- https://colab.research.google.com/drive/1rTO2qtBFw4voPr3yyvs9uNsIaGT7da0y?usp=sharing#scrollTo=K1i89ZetrjxS
- basic idea is based on simple user query, generate a bunch of queries from different angle; how the re-writting is performed can be tuned 
- then used RRF to re-rank all results
- then generate answer based on all those info 