<a href="https://colab.research.google.com/github/chayan141/Langgraph-Projects/blob/main/Langgraph_Long_Memory.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Long Term Memory General

Semantic Memory : Facts about the users.

Episodic Memory : Involves recalling Past Experience or events or acions. It is sometimes implemented through the fewshot prompting.

Procedural Memory : Both humans and AI agents remembers set of rules to perform any tasks.

In [1]:
%%capture --no-stderr
%pip install -U langchain_openai langgraph trustcall langchain_core langchain-google-genai

In [2]:
from google.colab import userdata
gemini = userdata.get('gemini_api_key')
import os
os.environ['gemini_api_key'] = gemini

In [3]:
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_google_genai import ChatGoogleGenerativeAI

In [4]:
model = ChatGoogleGenerativeAI(model='gemini-2.0-flash',api_key=gemini)

In [5]:
from langchain_core.runnables import RunnableConfig
from langgraph.config import get_store
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore

In [6]:
store = InMemoryStore()

In [7]:
# TypedDict instance
user_profile = {
    "user_name": "Lance",
    "interests": ["biking", "technology", "coffee"]
}
user_profile

{'user_name': 'Lance', 'interests': ['biking', 'technology', 'coffee']}

In [8]:
namespace_for_memory = ("1", "memory")

key = "user_profile"
value = user_profile

In [9]:
store.put(namespace_for_memory, key, value)

In [10]:
for m in store.search(namespace_for_memory):
    print(m.dict())

{'namespace': ['1', 'memory'], 'key': 'user_profile', 'value': {'user_name': 'Lance', 'interests': ['biking', 'technology', 'coffee']}, 'created_at': '2025-07-05T07:50:47.495076+00:00', 'updated_at': '2025-07-05T07:50:47.495082+00:00', 'score': None}


In [11]:
type(m)

In [12]:
profile = store.get(namespace_for_memory, key)
profile.value

{'user_name': 'Lance', 'interests': ['biking', 'technology', 'coffee']}

# Read Long Term Memory

In [13]:
def get_user_info(config: RunnableConfig):
  """Look up user info."""
    # Same as that provided to `create_react_agent`
  store = get_store()
  user_id = config['configurable'].get("user_id")
  user_info = store.get(("1",  "memory"), user_id)
  return user_info.value if user_info else "Unknown Users"

In [14]:
agent = create_react_agent(
    model = model,
    tools = [get_user_info],
    store = store
)

In [15]:
agent.invoke(
    {"messages": [{"role": "user", "content": "look up user information"}]},
    config={"configurable": {"user_id": "1"}}
)

{'messages': [HumanMessage(content='look up user information', additional_kwargs={}, response_metadata={}, id='948ff7f6-0e66-4c48-8805-7b904bdadcd4'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'get_user_info', 'arguments': '{}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--73e939be-5f0b-4456-8e49-d1198937f9b5-0', tool_calls=[{'name': 'get_user_info', 'args': {}, 'id': '32841e0c-c9a6-43ed-9071-8949bbb4e2c8', 'type': 'tool_call'}], usage_metadata={'input_tokens': 14, 'output_tokens': 5, 'total_tokens': 19, 'input_token_details': {'cache_read': 0}}),
  ToolMessage(content='Unknown Users', name='get_user_info', id='ff7c255d-984c-4368-b66f-02b96a19743c', tool_call_id='32841e0c-c9a6-43ed-9071-8949bbb4e2c8'),
  AIMessage(content='I am unable to retrieve user information at this time. Please try again later.', additional_kwargs={}, response

# Write Long Term Memory

In [16]:
from typing_extensions import TypedDict
from typing import List

from langgraph.config import get_store
from langgraph.prebuilt import create_react_agent
from langgraph.store.memory import InMemoryStore

store = InMemoryStore()

class UserProfile(TypedDict):
    """User profile schema with typed fields"""
    user_name: str  # The user's preferred name
    interests: List[str]  # A list of the user's interests

In [17]:
def save_user_info(user_info: UserProfile, config: RunnableConfig):
    """Save user info."""

    store = get_store()
    user_id = config['configurable'].get("user_id")
    store.put(("2", "memory"), user_id, user_info)
    return "Success"

In [18]:
agent = create_react_agent(
    model = model,
    tools = [save_user_info],
    store = store
)

In [19]:
# Run the agent
agent.invoke(
    {"messages": [{"role": "user", "content": "My name is John Smith, I have interests in bike riding"}]},
    config={"configurable": {"user_id": "user_123"}}
)

{'messages': [HumanMessage(content='My name is John Smith, I have interests in bike riding', additional_kwargs={}, response_metadata={}, id='c1112740-29d5-4ff9-90e3-2b001fdb9252'),
  AIMessage(content='', additional_kwargs={'function_call': {'name': 'save_user_info', 'arguments': '{"user_info": {"interests": ["bike riding"], "user_name": "John Smith"}}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--4f75ac9b-ea70-45ff-900a-090c52f226cd-0', tool_calls=[{'name': 'save_user_info', 'args': {'user_info': {'interests': ['bike riding'], 'user_name': 'John Smith'}}, 'id': '1ddb933d-5a6b-4c86-bb1f-77e327e60524', 'type': 'tool_call'}], usage_metadata={'input_tokens': 46, 'output_tokens': 16, 'total_tokens': 62, 'input_token_details': {'cache_read': 0}}),
  ToolMessage(content='Success', name='save_user_info', id='92d198a8-42ce-4f25-a560-e84eb084d465', tool_call_id='1ddb

In [20]:
namespace_for_memory = ("2", "memory")
for m in store.search(namespace_for_memory):
    print(m.dict())

{'namespace': ['2', 'memory'], 'key': 'user_123', 'value': {'user_name': 'John Smith', 'interests': ['bike riding']}, 'created_at': '2025-07-05T07:50:49.270852+00:00', 'updated_at': '2025-07-05T07:50:49.270859+00:00', 'score': None}


# Complex Schema Processing Using TrustCall

Complex schemas can be difficult to extract.

In addition, updating even simple schemas can pose challenges.

Consider our above chatbot.

We regenerated the profile schema from scratch each time we chose to save a new memory.

This is inefficient, potentially wasting model tokens if the schema contains a lot of information to re-generate each time.

Worse, we may loose information when regenerating the profile from scratch.

Addressing these problems is the motivation for TrustCall!

In [21]:

from langchain_core.messages import HumanMessage, SystemMessage, AIMessage
# Conversation
conversation = [HumanMessage(content="Hi, I'm Lance."),
                AIMessage(content="Nice to meet you, Lance."),
                HumanMessage(content="I really like biking around San Francisco.")]

In [22]:
from trustcall import create_extractor
from pydantic import BaseModel, Field



# Schema
class UserProfile(BaseModel):
    """User profile schema with typed fields"""
    user_name: str = Field(description="The user's preferred name")
    interests: List[str] = Field(description="A list of the user's interests")

In [23]:
# Create the extractor
trustcall_extractor = create_extractor(
    model,
    tools=[UserProfile],
    tool_choice="UserProfile"
)

In [24]:
# Instruction
system_msg = "Extract the user profile from the following conversation"

# Invoke the extractor
result = trustcall_extractor.invoke({"messages": [SystemMessage(content=system_msg)]+conversation})

In [25]:
for m in result["messages"]:
    m.pretty_print()

Tool Calls:
  UserProfile (d52863cf-0fc4-4184-a85e-24edbe3bcfab)
 Call ID: d52863cf-0fc4-4184-a85e-24edbe3bcfab
  Args:
    interests: ['biking']
    user_name: Lance


In [26]:
schema = result["responses"]
schema

[UserProfile(user_name='Lance', interests=['biking'])]

In [27]:

schema[0].model_dump()

{'user_name': 'Lance', 'interests': ['biking']}

In [28]:
result["response_metadata"]

[{'id': 'd52863cf-0fc4-4184-a85e-24edbe3bcfab'}]

In [29]:
# Update the conversation
updated_conversation = [HumanMessage(content="Hi, I'm Lance."),
                        AIMessage(content="Nice to meet you, Lance."),
                        HumanMessage(content="I really like biking around San Francisco."),
                        AIMessage(content="San Francisco is a great city! Where do you go after biking?"),
                        HumanMessage(content="I really like to go to a bakery after biking."),]

In [30]:
# Update the instruction
system_msg = f"""Update the memory (JSON doc) to incorporate new information from the following conversation"""

In [31]:
# Invoke the extractor with the updated instruction and existing profile with the corresponding tool name (UserProfile)
result = trustcall_extractor.invoke({"messages": [SystemMessage(content=system_msg)]+updated_conversation},
                                    {"existing": {"UserProfile": schema[0].model_dump()}})

for m in result["messages"]:
    m.pretty_print()


Tool Calls:
  UserProfile (c02305eb-f36e-498e-967d-051a48760dca)
 Call ID: c02305eb-f36e-498e-967d-051a48760dca
  Args:
    interests: ['biking', 'bakery']
    user_name: Lance


In [32]:
result["response_metadata"]

[{'id': 'c02305eb-f36e-498e-967d-051a48760dca'}]

In [33]:
updated_schema = result["responses"][0]
updated_schema.model_dump()

{'user_name': 'Lance', 'interests': ['biking', 'bakery']}

# Managing Short Term Memory

With short-term memory enabled, long conversations can exceed the LLM's context window. Common solutions are:

1. Trim Messages : Remove first or last N messages (before calling an LLM).

2. Delete Messages from langgraph state permanantly.

3. Summarize Messages : Summarize earlier messages in the history and replace them with a summary.

4. Manage Checkpoints to store and retrieve message history.

Trim Messages : To trim message history in an agent, use pre_model_hook with the trim_messages function

In [34]:
from langchain_core.messages.utils import trim_messages, count_tokens_approximately
from langgraph.graph import StateGraph, START, MessagesState
from langgraph.checkpoint.memory import InMemorySaver

summarization_model = model.bind(max_tokens=128)

def call_model(state: MessagesState):
  messages = trim_messages(
      state['messages'],
      strategy="last",
      token_counter = count_tokens_approximately,
      max_tokens=128,
      start_on = "human",
      end_on = ("human","tool")
  )

  response = model.invoke(messages)
  return {"messages":[response]}

In [35]:
checkpointer = InMemorySaver()
builder = StateGraph(MessagesState)
builder.add_node("call_model",call_model)
builder.add_edge(START, "call_model")
graph = builder.compile(checkpointer=checkpointer)

In [36]:
config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": "hi, my name is bob"}, config)
graph.invoke({"messages": "write a short poem about cats"}, config)
graph.invoke({"messages": "now do the same but for dogs"}, config)
final_response = graph.invoke({"messages": "what's my name?"}, config)

final_response["messages"][-1].pretty_print()


I'm an AI, so I don't know your name. You haven't told me! You can tell me your name if you'd like. 😊


# Summarize Messages

In [37]:
!pip install langmem --quiet

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/66.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.9/66.9 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[?25h[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/292.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m [32m286.7/292.8 kB[0m [31m14.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m292.8/292.8 kB[0m [31m7.9 MB/s[0m eta [36m0:00:00[0m
[?25h

In [38]:
from langmem.short_term import SummarizationNode
from langchain_core.messages.utils import count_tokens_approximately
from langgraph.prebuilt import create_react_agent
from langgraph.prebuilt.chat_agent_executor import AgentState
from langgraph.checkpoint.memory import InMemorySaver
from typing import Any
from langchain_core.messages import AnyMessage

We will keep track of our running summary in the context field (expected by the SummarizationNode).

In [39]:
class State(MessagesState):
    context: dict[str, Any]

Define private state that will be used only for filtering the inputs to call_model node.

In [40]:
class LLMInputState(TypedDict):
    summarized_messages: list[AnyMessage]
    context: dict[str, Any]


In [41]:
summarization_node = SummarizationNode(
    token_counter=count_tokens_approximately,
    model=summarization_model,
    max_tokens=256,
    max_tokens_before_summary=256,
    max_summary_tokens=128,
)

In [42]:
def call_model(state: LLMInputState):
    response = model.invoke(state["summarized_messages"])
    return {"messages": [response]}

In [43]:
checkpointer = InMemorySaver()
builder = StateGraph(State)
builder.add_node(call_model)
builder.add_node("summarize", summarization_node)
builder.add_edge(START, "summarize")
builder.add_edge("summarize", "call_model")
graph = builder.compile(checkpointer=checkpointer)

In [44]:
# Invoke the graph
config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": "hi, my name is bob"}, config)
graph.invoke({"messages": "write a short poem about cats"}, config)
graph.invoke({"messages": "now do the same but for dogs"}, config)
final_response = graph.invoke({"messages": "what's my name?"}, config)

In [45]:
final_response["messages"][-1].pretty_print()
final_response


Your name is Bob. You told me at the beginning of our conversation. 😊


{'messages': [HumanMessage(content='hi, my name is bob', additional_kwargs={}, response_metadata={}, id='a8cc54af-b4bc-41b1-bc5f-4b7f2666745c'),
  AIMessage(content="Hi Bob! It's nice to meet you. How can I help you today?", additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-2.0-flash', 'safety_ratings': []}, id='run--d71e7a49-66aa-43d1-a9ef-518caf5d78cc-0', usage_metadata={'input_tokens': 6, 'output_tokens': 19, 'total_tokens': 25, 'input_token_details': {'cache_read': 0}}),
  HumanMessage(content='write a short poem about cats', additional_kwargs={}, response_metadata={}, id='e34e8917-4668-44e2-99d6-0de433bd64be'),
  AIMessage(content="A velvet paw, a silent tread,\nEmerald eyes within its head.\nA purring rumble, soft and low,\nA feline grace in gentle flow.\n\nA hunter's instinct, sharp and keen,\nA sunbeam nap, a sleepy scene.\nFrom whisker twitch to playful leap,\nA cat's a secret, 

In [46]:
for m in final_response["messages"]:
    m.pretty_print()


hi, my name is bob

Hi Bob! It's nice to meet you. How can I help you today?

write a short poem about cats

A velvet paw, a silent tread,
Emerald eyes within its head.
A purring rumble, soft and low,
A feline grace in gentle flow.

A hunter's instinct, sharp and keen,
A sunbeam nap, a sleepy scene.
From whisker twitch to playful leap,
A cat's a secret, buried deep.

now do the same but for dogs

A wagging tail, a happy bark,
A loyal friend, a light in dark.
With boundless joy and muddy paws,
He breaks the rules and knows no flaws.

A wet, cold nose, a loving gaze,
Through fields he runs in sunny haze.
A furry cuddle, warm and true,
A dog's devotion sees you through.

what's my name?

Your name is Bob. You told me at the beginning of our conversation. 😊


# Context

Context Engineering is the practice of building dynamic systems that provide the right information and tools in the right format, so that a language model can plausibly accomplish a task.

config : data passed at the start of a run.

short_term_memory(State)

long_term_memory(Store)

# Human In The Loop

Approve or reject: Pause the graph before a critical step.

Edit Graph State: Pause the graph to review and edit the graph state.

Reviewing tool calls and validate human input.

# Subgraphs

Shared State Schemas : Parent and subgraph have shared state keys in their state schemas.

Different State Schemas: No shared state keys in parent graph and subgraph.

TypedDict is the time hint for dictionaries, no runtime class.

In [52]:
from typing_extensions import TypedDict

# Define subgraph
class SubgraphState(TypedDict):
    foo: str
    bar: str

def subgraph_node_1(state: SubgraphState):
    return {"bar": "bar"}

def subgraph_node_2(state: SubgraphState):
    # note that this node is using a state key ('bar') that is only available in the subgraph
    # and is sending update on the shared state key ('foo')
    return {"foo": state["foo"] + state["bar"]}

In [53]:
subgraph_builder = StateGraph(SubgraphState)
subgraph_builder.add_node(subgraph_node_1)
subgraph_builder.add_node(subgraph_node_2)
subgraph_builder.add_edge(START, "subgraph_node_1")
subgraph_builder.add_edge("subgraph_node_1", "subgraph_node_2")
subgraph = subgraph_builder.compile()

In [54]:
# Define parent graph
class ParentState(TypedDict):
    foo: str

def node_1(state: ParentState):
    return {"foo": "hi! " + state["foo"]}

In [55]:
builder = StateGraph(ParentState)
builder.add_node("node_1", node_1)
builder.add_node("node_2", subgraph)
builder.add_edge(START, "node_1")
builder.add_edge("node_1", "node_2")
graph = builder.compile()

for chunk in graph.stream({"foo": "foo"}):
    print(chunk)

{'node_1': {'foo': 'hi! foo'}}
{'node_2': {'foo': 'hi! foobar'}}


# Subgraph for DIfferent State Schema

In [56]:
from typing_extensions import TypedDict
from langgraph.graph.state import StateGraph, START

# Define subgraph
class SubgraphState(TypedDict):
    # note that none of these keys are shared with the parent graph state
    bar: str
    baz: str

def subgraph_node_1(state: SubgraphState):
    return {"baz": "baz"}

def subgraph_node_2(state: SubgraphState):
    return {"bar": state["bar"] + state["baz"]}

subgraph_builder = StateGraph(SubgraphState)
subgraph_builder.add_node(subgraph_node_1)
subgraph_builder.add_node(subgraph_node_2)
subgraph_builder.add_edge(START, "subgraph_node_1")
subgraph_builder.add_edge("subgraph_node_1", "subgraph_node_2")
subgraph = subgraph_builder.compile()

In [57]:
# Define parent graph
class ParentState(TypedDict):
    foo: str

def node_1(state: ParentState):
    return {"foo": "hi! " + state["foo"]}

def node_2(state: ParentState):
    response = subgraph.invoke({"bar": state["foo"]})
    return {"foo": response["bar"]}


builder = StateGraph(ParentState)
builder.add_node("node_1", node_1)
builder.add_node("node_2", node_2)
builder.add_edge(START, "node_1")
builder.add_edge("node_1", "node_2")
graph = builder.compile()

In [58]:
for chunk in graph.stream({"foo": "foo"}, subgraphs=True):
    print(chunk)

((), {'node_1': {'foo': 'hi! foo'}})
(('node_2:9202bb38-192f-f05a-c1d2-88676d80e119',), {'subgraph_node_1': {'baz': 'baz'}})
(('node_2:9202bb38-192f-f05a-c1d2-88676d80e119',), {'subgraph_node_2': {'bar': 'hi! foobaz'}})
((), {'node_2': {'foo': 'hi! foobaz'}})


# Different Schemas : 2 Level Subgraph

In [59]:
# Grandchild graph
from typing_extensions import TypedDict
from langgraph.graph.state import StateGraph, START, END

class GrandChildState(TypedDict):
    my_grandchild_key: str

def grandchild_1(state: GrandChildState) -> GrandChildState:
    # NOTE: child or parent keys will not be accessible here
    return {"my_grandchild_key": state["my_grandchild_key"] + ", how are you"}

In [61]:
grandchild = StateGraph(GrandChildState)
grandchild.add_node("grandchild_1", grandchild_1)

grandchild.add_edge(START, "grandchild_1")
grandchild.add_edge("grandchild_1", END)

grandchild_graph = grandchild.compile()

In [62]:
# Child graph
class ChildState(TypedDict):
    my_child_key: str

def call_grandchild_graph(state: ChildState) -> ChildState:
    # NOTE: parent or grandchild keys won't be accessible here
    grandchild_graph_input = {"my_grandchild_key": state["my_child_key"]}
    grandchild_graph_output = grandchild_graph.invoke(grandchild_graph_input)
    return {"my_child_key": grandchild_graph_output["my_grandchild_key"] + " today?"}

In [63]:
child = StateGraph(ChildState)
child.add_node("child_1", call_grandchild_graph)
child.add_edge(START, "child_1")
child.add_edge("child_1", END)
child_graph = child.compile()

In [64]:
# Parent graph
class ParentState(TypedDict):
    my_key: str

def parent_1(state: ParentState) -> ParentState:
    # NOTE: child or grandchild keys won't be accessible here
    return {"my_key": "hi " + state["my_key"]}

def parent_2(state: ParentState) -> ParentState:
    return {"my_key": state["my_key"] + " bye!"}

def call_child_graph(state: ParentState) -> ParentState:
    child_graph_input = {"my_child_key": state["my_key"]}
    child_graph_output = child_graph.invoke(child_graph_input)
    return {"my_key": child_graph_output["my_child_key"]}


In [65]:
parent = StateGraph(ParentState)
parent.add_node("parent_1", parent_1)
parent.add_node("child", call_child_graph)
parent.add_node("parent_2", parent_2)

parent.add_edge(START, "parent_1")
parent.add_edge("parent_1", "child")
parent.add_edge("child", "parent_2")
parent.add_edge("parent_2", END)

parent_graph = parent.compile()

for chunk in parent_graph.stream({"my_key": "Bob"}, subgraphs=True):
    print(chunk)

((), {'parent_1': {'my_key': 'hi Bob'}})
(('child:4bac90dd-7659-865a-926d-6311fdd65803', 'child_1:fc7b58c6-5438-845d-43b7-9f019d100918'), {'grandchild_1': {'my_grandchild_key': 'hi Bob, how are you'}})
(('child:4bac90dd-7659-865a-926d-6311fdd65803',), {'child_1': {'my_child_key': 'hi Bob, how are you today?'}})
((), {'child': {'my_key': 'hi Bob, how are you today?'}})
((), {'parent_2': {'my_key': 'hi Bob, how are you today? bye!'}})


# Multiagent Systems

An agent is a system that uses LLM to decide the control flow of an application.

- If agent has too many tools at its disposal and makes poor decisions about which tool to call next.

Single Agent, Network Agent, Supervisor, Hierarchical, Custom.