In [1]:
### Query Enhancement – Query Expansion Techniques

In [2]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
## step1 : Load and split the dataset
loader = TextLoader("data/langchain_crewai_dataset.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)

In [5]:
### step 2: Vector Store
embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(chunks,embedding_model)

## step 3:MMR Retriever
retriever=vectorstore.as_retriever(search_type="mmr",search_kwargs={"k":5})

In [6]:
## step 4 : LLM and Prompt
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

llm=init_chat_model("openai:o4-mini")

In [7]:
# Query expansion
query_expansion_prompt = PromptTemplate.from_template("""
You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.

Original query: "{query}"

Expanded query:
""")

query_expansion_chain=query_expansion_prompt| llm | StrOutputParser()
query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.\n\nOriginal query: "{query}"\n\nExpanded query:\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000026A2D94CB30>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000026A2F06CE00>, root_client=<openai.OpenAI object at 0x0000026A223795E0>, root_async_client=<openai.AsyncOpenAI object at 0x0000026A2D9674A0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [8]:
query_expansion_chain.invoke({"query":"Langchain memory"})

'LangChain memory OR “Lang Chain memory” OR “LangChain memory modules” OR “LangChain conversational memory” OR “LangChain ChatMemory” OR “LangChain ConversationBufferMemory” OR “LangChain ConversationSummaryMemory” OR “LangChain MemoryChain” OR “LangChain vector‐store memory” OR “LangChain session memory” OR “LangChain persistent memory” OR “LangChain short-term memory” OR “LangChain long-term memory” OR “LangChain context management” OR “LangChain history management” OR “LangChain stateful conversations” OR “LangChain memory strategies” OR “LangChain memory patterns” OR “LangChain memory best practices” OR “LangChain memory storage backends” OR “LangChain Redis memory” OR “LangChain Pinecone memory” OR “LangChain FAISS memory” OR “LangChain Weaviate memory” OR “LangChain embedding‐based retrieval” OR “LangChain LLM context window” OR “LangChain prompt context” OR “LangChain memory API” OR “LangChain Python memory example” OR “LangChain memory tutorial” OR “LangChain memory code sample

In [9]:
# RAG answering prompt
answer_prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

document_chain=create_stuff_documents_chain(llm=llm,prompt=answer_prompt)

In [10]:
# Step 5: Full RAG pipeline with query expansion
rag_pipeline = (
    RunnableMap({
        "input": lambda x: x["input"],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
    | document_chain
)

In [11]:
# Step 6: Run query
query = {"input": "What types of memory does LangChain support?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Expanded query:  
“What memory modules, storage backends, and context‐management mechanisms does LangChain provide? For example, what short-term and long-term memory implementations (in-memory buffers, session memory, chat history, conversational context), embedding-based and vector-store memory (FAISS, Pinecone, Weaviate), database-backed memories (SQL, Redis, MongoDB), file-based or JSON/CSV persistence, and other memory classes or interfaces are supported in LangChain for managing and retrieving LLM context?”
✅ Answer:
 LangChain today ships with two primary conversational‐memory modules:  
1. ConversationBufferMemory – keeps a running buffer of past turns in full detail  
2. ConversationSummaryMemory – rolls up older exchanges into a concise summary to stay within LLM token limits


In [12]:
# Step 6: Run query
query = {"input": "CrewAI agents?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Here’s one possible expanded query, adding synonyms, technical terms and application contexts to surface a broad set of relevant documents:

(“CrewAI” OR “Crew AI” OR “Crew Artificial Intelligence”)  
AND (“agent” OR “intelligent agent” OR “autonomous agent” OR “cognitive agent” OR “virtual assistant”)  
AND (“multi-agent system” OR “MAS” OR “AI-driven decision support” OR “automated resource allocation”)  
AND (“crew management” OR “crew scheduling” OR “crew coordination” OR “crew resource management”)  
AND (“flight crew” OR “aviation operations” OR “maritime crew” OR “film production crew” OR “hospitality staffing”)  
AND (“machine learning” OR “reinforcement learning” OR “natural language processing” OR “task automation” OR “predictive analytics”)  

You can also adjust or remove application-specific terms (e.g. “aviation,” “maritime,” “film”) to focus on your domain of interest.
✅ Answer:
 CrewAI agents are the building blocks of a CrewAI “crew.” Each agent is defined by:

• Role 