# Query Enhancement - Query Expansion Techniques

In a RAG Pipeline, the quality of the query sent to the retriever determines how good the retrieved context is - and therefore, how accurate the final llm answer will be.

In [37]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_ollama import ChatOllama
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap

In [38]:
## Step 1: Load and Split
loader = TextLoader("langchain_crewai_dataset.txt")
text = loader.load()

splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(text)

In [39]:
# Step 2: Embedding
embedding = HuggingFaceEmbeddings(model="all-MiniLM-L6-v2")

# Step 3: Vectorstror
vectorstore = FAISS.from_documents(
    embedding=embedding,
    documents=chunks
)

# Step 4: Retriever
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={'k': 5, 'lambda_mult': 0.7}
)

In [None]:
# Step 5: LLM
llm = ChatOllama(
    model="qwen3:4b",
    num_ctx=32768,
    reasoning=False
)

# Step 6: Query Expansion
query_expansion_prompt = PromptTemplate.from_template("""
You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms and useful context.


Original query: "{query}

Expanded query:                                                      
"""
)

query_expension_chain = query_expansion_prompt | llm | StrOutputParser()

In [None]:
from IPython.display import Markdown

display(Markdown(query_expension_chain.invoke({"query": "Langchain memory"})))

In [42]:
# RAG asnwering prompt
answer_prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}                                             
"""
)

In [None]:
# Chains
document_chain = create_stuff_documents_chain(
    llm,
    answer_prompt
)

# Use RunnableMap to give multiple inputs to a chain
rag_pipeline = (
    RunnableMap({
        'input': lambda x: x['input'],
        'context': lambda x: retriever.invoke(query_expension_chain.invoke({'query': x['input']}))
}) | document_chain 
)

In [None]:
# Run query
query = {"input" :"What type of memory does LangChain support?"}
print(query_expension_chain.invoke({'query': query}))
response = rag_pipeline.invoke(query)
display(Markdown("Answer:\n", response))