In [1]:
from dotenv import load_dotenv
from langchain.chat_models import init_chat_model
from langchain.agents import create_agent, AgentState
from langchain_core.messages import HumanMessage, AIMessage, SystemMessage
from langgraph.checkpoint.memory import InMemorySaver
from langchain.agents.middleware import before_agent, after_agent, dynamic_prompt, ModelRequest
from langgraph.runtime import Runtime
from langchain.embeddings import init_embeddings
from langgraph.store.base import BaseStore
from langgraph.store.memory import InMemoryStore
from dataclasses import dataclass
from pydantic import BaseModel, Field
from typing import List
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
import uuid
load_dotenv()

True

In [2]:
@dataclass
class Context:
    user_name: str
    store: BaseStore
    memories: List[str] | None = None

class MemoryItem(BaseModel):
    text: str = Field(description="Atomic user memory")
    is_new: bool = Field(description="True if new, false if duplicate")

class MemoryDecision(BaseModel):
    should_write: bool
    memories: List[MemoryItem] = Field(default_factory=list)

llm = init_chat_model("gpt-4o")
memory_decide_llm = llm.with_structured_output(MemoryDecision)
embedding = init_embeddings("openai:text-embedding-3-small")
store: BaseStore = InMemoryStore(index={"embed": embedding, "dims": 1536})


In [3]:
SYSTEM_PROMPT_TEMPLATE = """You are a helpful assistant with memory capabilities.
If user-specific memory is available, use it to personalize 
your responses based on what you know about the user.

Your goal is to provide relevant, friendly, and tailored 
assistance that reflects the user‚Äôs preferences, context, and past interactions.

If the user‚Äôs name or relevant personal context is available, always personalize your responses by:
    ‚Äì Always Address the user by name (e.g., "Sure, Aayushmaan...") when appropriate
    ‚Äì Referencing known projects, tools, or preferences (e.g., "your MCP server python based project")
    ‚Äì Adjusting the tone to feel friendly, natural, and directly aimed at the user

Avoid generic phrasing when personalization is possible.

Use personalization especially in:
    ‚Äì Greetings and transitions
    ‚Äì Help or guidance tailored to tools and frameworks the user uses
    ‚Äì Follow-up messages that continue from past context

Always ensure that personalization is based only on known user details and not assumed.
"""


MEMORY_PROMPT = """Analyze if this user message contains new, important memories to store.
User: {user_name}
Existing memories: {memories}

Should you store new memories? Return structured decision."""

In [4]:
@dynamic_prompt
def change_prompt(request:ModelRequest):
    system_prompt = SYSTEM_PROMPT_TEMPLATE 
    memories = request.runtime.context.memories
    if memories:
        memories_str = "\n".join([f"- {m}" for m in memories])
        system_prompt += f"\n\nUser's relevant memories:\n{memories_str}"
        print("adding memory in system prompt")
    return system_prompt

@before_agent
def load_messages(state: AgentState, runtime: Runtime):
    print("üîç Loading memories...")
    
    ctx = runtime.context
    if not ctx or not isinstance(ctx, Context):
        print("Warning: No valid context")
        return None
    
    user_name = ctx.user_name
    store = ctx.store
    
    # Use string namespace
    namespace = f"users/{user_name}/details"
    last_msg = state["messages"][-1].content
    
    items = store.search(namespace, query=last_msg, limit=5)
    
    if items:
        # Store retrieved memories in context
        ctx.memories = [item.value.get("data", "") for item in items]
        print(f"Found {len(items)} relevant memories")
    else:
        ctx.memories = []
    
    return None  # No state change needed


@after_agent
def store_messages(state: AgentState, runtime: Runtime):
    print("üíæ Storing memories...")
    
    ctx = runtime.context
    if not ctx or not isinstance(ctx, Context):
        print("Warning: No valid context")
        return None
    
    user_name = ctx.user_name
    store = ctx.store
    
    namespace = f"users/{user_name}/details"
    
    #Get existing memories WITHOUT context kwarg
    existing_items = store.search(namespace, limit=10)
    existing = "\n".join(item.value.get("data", "") for item in existing_items) if existing_items else "(no memories)"
    
    #Pass context as prompt variables, not LLM context kwarg
    last_msg = state["messages"][-2].content
        
    memory_prompt_text = MEMORY_PROMPT.format(
        user_name=user_name,
        memories=existing
    )
    
    # Create a simple prompt with the formatted text
    prompt = ChatPromptTemplate.from_messages([
        ("system", memory_prompt_text),
        ("human", last_msg),
    ])
    
    decision = memory_decide_llm.invoke(prompt.format_prompt().to_messages())
    
    if decision.should_write:
        for mem in decision.memories:
            if mem.is_new and mem.text.strip():
                store.put(namespace, str(uuid.uuid4()), {"data": mem.text.strip()})
        print(f"Stored {len([m for m in decision.memories if m.is_new])} new memories")
    else:
        print("Skipping memory storage")
    
    return None  # Don't modify state


   

In [5]:
agent = create_agent(
    model=llm,
    tools=[],
    system_prompt=SYSTEM_PROMPT_TEMPLATE,
    checkpointer=InMemorySaver(),
    context_schema=Context,
    middleware = [load_messages,store_messages,change_prompt],
    store = store
)


In [6]:
# Same thread_id persists history across calls
config = {"configurable": {"thread_id": "chat-1"}}  # This enables checkpointing

name = input("Please enter your name: ")
# First message - pass context as keyword argument, not in config
result1 = agent.invoke({
    "messages": [HumanMessage(content="Hi, my name is Aayushmaan")]
}, config, context=Context(user_name=name, store=store))

print(result1["messages"][-1].content)  



üîç Loading memories...
üíæ Storing memories...
Stored 1 new memories
Hello, Aayushmaan! How can I assist you today? Whether it's about a project you're working on or any other queries you have, feel free to let me know!


In [7]:

result2 = agent.invoke({
    "messages": [HumanMessage(content="i like f1 a lot")]
}, config, context=Context(user_name=name, store=store))

print(result2["messages"][-1].content)  


üîç Loading memories...
Found 1 relevant memories
adding memory in system prompt
üíæ Storing memories...
Stored 1 new memories
That's great to hear, Aayushmaan! Formula 1 is such an exciting sport with its high-speed drama and intricate strategies. Do you have a favorite team or driver you‚Äôre rooting for this season?


In [None]:
result2 = agent.invoke({
    "messages": [HumanMessage(content="what do i like the most?")]
}, config, context=Context(user_name=name, store=store))

print(result2["messages"][-1].content)  

In [8]:
from langchain_core.messages import HumanMessage

# Test config - same thread_id persists history
config = {"configurable": {"thread_id": "chat-1"}}

name = "Aayushmaan"

print("=" * 60)
print("TEST 1: First message - No memories yet")
print("=" * 60)
result1 = agent.invoke({
    "messages": [HumanMessage(content="Hi, my name is Aayushmaan. I'm working on a RAG evaluation system using RAGAS.")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result1["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 2: Second message - Should store memory from first message")
print("=" * 60)
result2 = agent.invoke({
    "messages": [HumanMessage(content="What frameworks should I use for my project?")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result2["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 3: Related question - Should retrieve RAG memory")
print("=" * 60)
result3 = agent.invoke({
    "messages": [HumanMessage(content="How do I evaluate RAG systems?")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result3["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 4: New context - Add more memories")
print("=" * 60)
result4 = agent.invoke({
    "messages": [HumanMessage(content="I also use LangGraph for multi-agent systems and I'm preparing for AI engineering interviews.")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result4["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 5: Related to new memory - Should use LangGraph memory")
print("=" * 60)
result5 = agent.invoke({
    "messages": [HumanMessage(content="What are best practices for LangGraph?")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result5["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 6: Unrelated question - Minimal memory retrieval")
print("=" * 60)
result6 = agent.invoke({
    "messages": [HumanMessage(content="What's the weather like?")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result6["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 7: Interview prep question - Should use interview memory")
print("=" * 60)
result7 = agent.invoke({
    "messages": [HumanMessage(content="How should I prepare for AI engineering interviews?")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result7["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 8: Different user - No shared memories")
print("=" * 60)
other_name = "John"
result8 = agent.invoke({
    "messages": [HumanMessage(content="Hi, I'm working on web development")]
}, {"configurable": {"thread_id": "chat-2"}}, context=Context(user_name=other_name, store=store))

print("\nAgent response:")
print(result8["messages"][-1].content)
print("\n")


print("=" * 60)
print("TEST 9: Back to original user - Should still have old memories")
print("=" * 60)
result9 = agent.invoke({
    "messages": [HumanMessage(content="Remind me what I'm working on")]
}, config, context=Context(user_name=name, store=store))

print("\nAgent response:")
print(result9["messages"][-1].content)
print("\n")

TEST 1: First message - No memories yet
üîç Loading memories...
üíæ Storing memories...
Stored 2 new memories

Agent response:
Hi Aayushmaan! That sounds like a fascinating project. Developing a RAG evaluation system using RAGAS must involve some intricate work with retrieval-augmented generation. How's the project coming along? If you need any specific help or information, feel free to ask!


TEST 2: Second message - Should store memory from first message
üîç Loading memories...
Found 2 relevant memories
adding memory in system prompt
üíæ Storing memories...
Skipping memory storage

Agent response:
For your RAG evaluation system using RAGAS, there are several frameworks that can be quite beneficial:

1. **PyTorch or TensorFlow**: Both are well-suited for machine learning and deep learning tasks. You can use them to build or refine models if needed.

2. **Hugging Face Transformers**: This library is perfect for working with pre-trained models and could be helpful if you're utilizin