In [34]:
import os

from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain.schema.runnable import RunnableMap
from dotenv import load_dotenv

import warnings
warnings.filterwarnings('ignore')

load_dotenv()

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')


In [35]:
data_loader = TextLoader("query_enhancement_dataset.txt")
raw_docs = data_loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)
print(f'total chunks: {len(chunks)}')
print(f'first chunks: {chunks[0]}')

total chunks: 241
first chunks: page_content='LangChain is an open-source framework designed for developing applications powered by large language models (LLMs). It simplifies the process of building, managing, and scaling complex chains of thought by abstracting prompt management, retrieval, memory, and agent orchestration. Developers can use' metadata={'source': 'query_enhancement_dataset.txt'}


In [36]:
embedding_model = OpenAIEmbeddings()
vector_store = FAISS.from_documents(chunks, embedding_model)
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x2bca6ce0890>

In [37]:
retriever = vector_store.as_retriever(search_type='mmr', search_kwargs={'k':5})
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000002BCA6CE0890>, search_type='mmr', search_kwargs={'k': 5})

In [38]:
llm = ChatOpenAI(model='o4-mini')
llm

ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002BCE48353D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002BCA79025D0>, root_client=<openai.OpenAI object at 0x000002BCAA7D5710>, root_async_client=<openai.AsyncOpenAI object at 0x000002BCE4835490>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [55]:
query_expansion_prompt = PromptTemplate.from_template(
    """
    You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant technical terms, and useful context.

    Original query: "{query}"

    Expanded query:
    
"""
)

query_expansion_chain = prompt | llm | StrOutputParser()
query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\n    You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant technical terms, and useful context.\n\n    Original query: "{query}"\n\n    Expanded query:\n\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002BCE48353D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002BCA79025D0>, root_client=<openai.OpenAI object at 0x000002BCAA7D5710>, root_async_client=<openai.AsyncOpenAI object at 0x000002BCE4835490>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [56]:
query = {"query": "LangChain memory"}

response = query_expansion_chain.invoke(query)
response

'Expanded query:  \n“LangChain memory modules, memory management and retention strategies for conversational AI—short-term vs. long-term memory, memory buffer, summary memory, conversational history storage; vector-store integration (Chroma, FAISS, Pinecone, Redis) for persistent memory; retrieval-augmented generation (RAG) with LangChain; Memory classes and API usage; memory middleware patterns; optimizing context windows; stateful agent workflows in LangChain documentation.”'

In [41]:
type(response)

str

In [42]:
print(response)

Expanded query:  
“LangChain memory modules conversation history management”  
“LangChain ConversationBufferMemory”  
“LangChain ConversationSummaryMemory”  
“LangChain vector‐store memory RAG”  
“persistent memory Redis FAISS Pinecone Chroma”  
“LangChain memory classes API reference tutorial”  
“LLM app state management best practices”


In [43]:
answer_prompt = PromptTemplate.from_template("""
                                             Answer the question based on the context below.

                                             Context:
                                             {context}

                                             Question: {input}
                                             """)
answer_prompt

PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n                                             Answer the question based on the context below.\n\n                                             Context:\n                                             {context}\n\n                                             Question: {input}\n                                             ')

In [44]:
document_chain = create_stuff_documents_chain(llm=llm, prompt=answer_prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n                                             Answer the question based on the context below.\n\n                                             Context:\n                                             {context}\n\n                                             Question: {input}\n                                             ')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002BCE48353D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002BCA79025D0>, root_client=<openai.OpenAI object at 0x000002BCAA7D5710>, root_async_client=<openai.AsyncOpenAI object at 0x000002BCE4835490>, model_name='o4-mini', model_kwargs={}, openai_ap

In [47]:
rag_pipeline = (
    RunnableMap({
        "input": lambda x: x['input'],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x['input']}))
    })
    | document_chain
)

rag_pipeline

{
  input: RunnableLambda(...),
  context: RunnableLambda(...)
}
| RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
    context: RunnableLambda(format_docs)
  }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
  | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template='\n                                             Answer the question based on the context below.\n\n                                             Context:\n                                             {context}\n\n                                             Question: {input}\n                                             ')
  | ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x000002BCE48353D0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x000002BCA79025D0>, root_client=<openai.OpenAI object at 0x000002BCAA7D5710>, root_async_client=<openai.AsyncOpenAI obje

In [57]:
query1 = {"query": "What type of memory does LangChain support?"}
query2 = {"input": "What type of memory does LangChain support?"}

enhanced_query = query_expansion_chain.invoke(query1)
response = rag_pipeline.invoke(query2)

In [59]:
print(enhanced_query)

Expanded query:  

“What memory modules and backends does LangChain provide? Specifically, what short-term versus long-term memory types are supported (e.g. in-memory buffers vs. summary memory), which built-in classes and interfaces exist (ConversationBufferMemory, ConversationSummaryMemory, CombinedMemory, ChatMessageHistory, etc.), and what external storage backends can be plugged in (RedisMemory, SQLMemory/SQLite/PostgreSQL, vector-store memory via Chroma/Pinecone/FAISS/Weaviate/Milvus)? How do memory_key, return_messages, k (for k-NN retrieval), and other memory parameters work in the LangChain Python SDK, and how is memory used in RAG pipelines, multi-turn chatbots, and agent applications?”


enhanced_query:
“What memory modules and backends does LangChain provide? Specifically, what short-term versus long-term memory types are supported (e.g. in-memory buffers vs. summary memory), which built-in classes and interfaces exist (ConversationBufferMemory, ConversationSummaryMemory, CombinedMemory, ChatMessageHistory, etc.), and what external storage backends can be plugged in (RedisMemory, SQLMemory/SQLite/PostgreSQL, vector-store memory via Chroma/Pinecone/FAISS/Weaviate/Milvus)? How do memory_key, return_messages, k (for k-NN retrieval), and other memory parameters work in the LangChain Python SDK, and how is memory used in RAG pipelines, multi-turn chatbots, and agent applications?”

In [61]:
print(response)

LangChain provides conversational “memory” modules—most notably:  
• ConversationBufferMemory – keeps a running buffer of past turns  
• ConversationSummaryMemory – summarizes long or growing dialogues to stay within token limits


In [62]:
query1 = {"query": "What type of memory does CrewAI support?"}
query2 = {"input": "What type of memory does CrewAI support?"}

enhanced_query = query_expansion_chain.invoke(query1)
response = rag_pipeline.invoke(query2)

In [63]:
print(enhanced_query)

Expanded query:  
“What types of memory mechanisms and stores does the CrewAI conversational platform support—e.g. short-term (session) vs. long-term (persistent) memory, episodic or user-profile memory, knowledge-base memory, vector-embedding stores or semantic RAG memory, file/document memory, cache vs. database backends (e.g. Pinecone, Weaviate, Redis), context-window limits, memory indexing approaches (inverted index, FAISS), memory pruning/garbage-collection strategies, and how these interact with CrewAI’s LLM prompting pipeline?”


In [64]:
print(response)

The provided context does not mention or specify any memory model or memory type that CrewAI supports.
