# Memory Mechanism

[LangChain AI](https://langchain-ai.github.io/langgraph/agents/memory/
)

In [None]:
# Memory Mechanism

from langchain.memory import ChatMessageHistory

history = ChatMessageHistory()

history.add_user_message("hi!")
history.add_ai_message("whats up?")

history.messages
# [HumanMessage(content='hi!'), AIMessage(content='whats up?')]

## ConversationChain -> RunnableWithMessageHistory

from dotenv import load_dotenv
load_dotenv()

# True

from langchain_openai import OpenAI
from langchain.chains import ConversationChain

llm = OpenAI(
    temperature=0.5,
    model_name='gpt-3.5-turbo-instruct'  # 4096 Token
)

conv_chain = ConversationChain(llm=llm)

print(conv_chain.prompt.template)


In [None]:
## Using ConversationBufferMemory
from langchain.chains.conversation.memory import ConversationBufferMemory

conversation = ConversationChain(llm=llm, memory=ConversationBufferMemory())

# round 1
conversation("My sister's birthday is tomorrow, and I need a birthday bouquet.")
print(conversation)

print(conversation.memory.buffer)


In [None]:
# round 2
conversation("She likes pink roses, specifically the color pink.")
print(conversation.memory.buffer)


In [None]:
# round 3
conversation("I'm back again. Do you remember why I came to buy flowers just?")
print("template::", conversation.prompt.template)

print("memory::", conversation.memory.buffer)


LangGraph --> workflow

In [None]:
from dotenv import load_dotenv
load_dotenv()

import uuid
from langchain_openai import OpenAI
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

# Initialize the LLM
llm = OpenAI(temperature=0.5, model_name='gpt-3.5-turbo-instruct')

# Define the Workflow Using StateGraph
workflow = StateGraph(state_schema=MessagesState)

def call_model(state: MessagesState):
    response = llm.invoke(state["messages"])
    return {"messages": response}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

# Add Memory Management
memory = MemorySaver()
app = workflow.compile(checkpointer=memory)

# Set thread ID and config
thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}


In [None]:
# Multi-Turn Dialogue
query_1 = "My sister's birthday is tomorrow, and I need a birthday bouquet."
input_messages_1 = [
    {"role": "system", "content": "You are a florist. Answer the following questions as best you can."},
    {"role": "user", "content": query_1},
]

for event in app.stream({"messages": input_messages_1}, config, stream_mode="values"):
    event["messages"][-1].pretty_print()


In [None]:
query_2 = "She likes pink roses, specifically the color pink."
input_messages_2 = [{"role": "user", "content": query_2}]

for event in app.stream({"messages": input_messages_2}, config, stream_mode="values"):
    event["messages"][-1].pretty_print()



In [None]:
query_3 = "I'm back again. Do you remember why I came to buy flowers just?"
input_messages_3 = [{"role": "user", "content": query_3}]

for event in app.stream({"messages": input_messages_3}, config, stream_mode="values"):
    event["messages"][-1].pretty_print()


In [None]:
## Conversation Buffer Window: ConversationBufferWindowMemory
from langchain.chains.conversation.memory import ConversationBufferWindowMemory

conversation = ConversationChain(llm=llm, memory=ConversationBufferWindowMemory(k=1))

# round 1
result = conversation("My sister's birthday is tomorrow, and I need a birthday bouquet.")
print(result)

# round 2
result = conversation("She likes pink roses, specifically the color pink.")
print(result)

# round 3
result = conversation("I'm back again. Do you remember why I came to buy flowers yesterday?")
print(result)


In [None]:
# ConversationBufferWindowMemory, ConversationTokenBufferMemory -> trim_messages

# use trim_messages - k, n
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, trim_messages
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4o", temperature=0.7)

system_message = SystemMessage("You are a helpful assistant that remembers recent messages.")
messages = [system_message]


In [None]:
# set a trimming window size
k = 1
# messages.append(HumanMessage(content=user_input))


In [None]:
def trimmed_conversation(user_input):
    global messages
    messages.append(HumanMessage(content=user_input))

    trimmed_messages = trim_messages(
        messages=messages,
        token_counter=len,
        max_tokens=k + 1,
        strategy="last",
        start_on="human",
        include_system=True,
        allow_partial=False
    )

    prompt = ChatPromptTemplate.from_messages(trimmed_messages)
    chain = prompt | llm | StrOutputParser()

    response = chain.invoke({})

    messages.append(AIMessage(content=response))
    return response


In [None]:
# Round 1
result = trimmed_conversation("My sister's birthday is tomorrow, and I need a birthday bouquet.")
print("Assistant:", result)


In [None]:
# Round 2
result = trimmed_conversation("She likes pink roses, specifically the color pink.")
print("Assistant:", result)


In [None]:
# Round 3
result = trimmed_conversation("I'm back again. Do you remember why I came to buy flowers yesterday?")
print("Assistant:", result)


In [None]:
# ConversationTokenBufferMemory -> trimmed_conversation
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, trim_messages
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

system_message = SystemMessage("You are a helpful assistant that remembers recent messages.")

messages = [system_message]

max_tokens = 80

def trimmed_conversation(user_input):
    global messages

    messages.append(HumanMessage(content=user_input))

    trimmed_messages = trim_messages(
        messages=messages,
        token_counter=llm,
        max_tokens=max_tokens,
        strategy="last",
        start_on="human",
        include_system=True,
        allow_partial=False
    )

    prompt = ChatPromptTemplate.from_messages(trimmed_messages)
    chain = prompt | llm | StrOutputParser()

    response = chain.invoke({})

    messages.append(AIMessage(content=response))
    return response

if __name__ == "__main__":
    print("Assistant:", trimmed_conversation("My sister's birthday is tomorrow, and I need a birthday bouquet."))
    print("Assistant:", trimmed_conversation("She likes pink roses, specifically the color pink."))
    print("Assistant:", trimmed_conversation("I'm back again. Do you remember why I came to buy flowers yesterday?"))


In [None]:
import uuid
from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, trim_messages
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import START, MessagesState, StateGraph

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

system_message = SystemMessage("You are a helpful assistant that remembers recent messages.")

workflow = StateGraph(state_schema=MessagesState)

def call_model(state: MessagesState):
    trimmed_messages = trim_messages(
        state["messages"],
        token_counter=llm,
        max_tokens=800,
        strategy="last",
        start_on="human",
        include_system=True,
        allow_partial=False
    )

    prompt = ChatPromptTemplate.from_messages(trimmed_messages)
    chain = prompt | llm | StrOutputParser()

    response = chain.invoke({})

    return {"messages": state["messages"] + [AIMessage(content=response)]}

workflow.add_edge(START, "model")
workflow.add_node("model", call_model)

memory = MemorySaver()

app = workflow.compile(checkpointer=memory)

thread_id = uuid.uuid4()
config = {"configurable": {"thread_id": thread_id}}

if __name__ == "__main__":
    input_message = HumanMessage(content="My sister's birthday is tomorrow, and I need a birthday bouquet.")
    for event in app.stream({"messages": [system_message, input_message]}, config, stream_mode="values"):
        event["messages"][-1].pretty_print()

    input_message = HumanMessage(content="She likes pink roses, specifically the color pink.")
    for event in app.stream({"messages": [input_message]}, config, stream_mode="values"):
        event["messages"][-1].pretty_print()

    input_message = HumanMessage(content="I'm back again. Do you remember why I came to buy flowers yesterday?")
    for event in app.stream({"messages": [input_message]}, config, stream_mode="values"):
        event["messages"][-1].pretty_print()


ConversationBufferMemory    

ConversationBufferWindowMemory    

ConversationTokenBufferMemory    

LangGraph


In [None]:
## ConversationSummaryMemory

from langchain.chains.conversation.memory import ConversationSummaryMemory

conversation = ConversationChain(llm=llm, memory=ConversationSummaryMemory(llm=llm))

# round 1
result = conversation("My sister's birthday is tomorrow, and I need a birthday bouquet.")
print(result)

# round 2
result = conversation("She likes pink roses, specifically the color pink.")
print(result)

# round 3
result = conversation("I'm back again. Do you remember why I came to buy flowers yesterday?")
print(result)


In [None]:
# persistence -> Session Summary

from langchain_openai import ChatOpenAI
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage, RemoveMessage
from langgraph.checkpoint.memory import MemorySaver
from langgraph.graph import MessagesState, StateGraph, START, END
from typing import Literal

memory = MemorySaver()

class State(MessagesState):
    summary: str

model = ChatOpenAI(model="gpt-4o-mini", temperature=0.7)

def call_model(state: State):
    summary = state.get("summary", "")
    if summary:
        system_message = f"Summary of conversation earlier: {summary}"
        messages = [SystemMessage(content=system_message)] + state["messages"]
    else:
        messages = state["messages"]

    response = model.invoke(messages)
    return {"messages": [response]}

def should_continue(state: State) -> Literal["summarize_conversation", END]:
    messages = state["messages"]
    if len(messages) > 6:
        return "summarize_conversation"
    return END

def summarize_conversation(state: State):
    summary = state.get("summary", "")
    if summary:
        summary_message = (
            f"This is summary of the conversation to date: {summary}\n\n"
            "Extend the summary by taking into account the new messages above:"
        )
    else:
        summary_message = "Create a summary of the conversation above:"

    messages = state["messages"] + [HumanMessage(content=summary_message)]
    response = model.invoke(messages)

    delete_messages = [RemoveMessage(id=m.id) for m in state["messages"][:-2]]
    return {"summary": response.content, "messages": delete_messages}


workflow = StateGraph(State)

workflow.add_node("conversation", call_model)
workflow.add_node("summarize_conversation", summarize_conversation)

workflow.add_edge(START, "conversation")
workflow.add_conditional_edges("conversation", should_continue)
workflow.add_edge("summarize_conversation", END)

app = workflow.compile(checkpointer=memory)


In [None]:
def print_update(update):
    for k, v in update.items():
        for m in v["messages"]:
            m.pretty_print()
        if "summary" in v:
            print(v["summary"])

In [None]:
from langchain_core.messages import HumanMessage

config = {"configurable": {"thread_id": "4"}}

input_message = HumanMessage(content="My sister's birthday is tomorrow, and I need a birthday bouquet.")
input_message.pretty_print()
for event in app.stream({"messages": [input_message]}, config, stream_mode="updates"):
    print_update(event)

input_message = HumanMessage(content="She likes pink roses, specifically the color pink.")
input_message.pretty_print()
for event in app.stream({"messages": [input_message]}, config, stream_mode="updates"):
    print_update(event)

input_message = HumanMessage(content="I'm back again. Do you remember why I came to buy flowers yesterday?")
input_message.pretty_print()
for event in app.stream({"messages": [input_message]}, config, stream_mode="updates"):
    print_update(event)

In [None]:
values = app.get_state(config).values
values


In [None]:
# If there are more than 2 messages, you need to summarize the conversation --->   if len(messages) > 2:
values = app.get_state(config).values
values


In [None]:
## ConversationSummaryBufferMemory

from langchain.chains.conversation.memory import ConversationSummaryBufferMemory

conversation = ConversationChain(llm=llm, memory=ConversationSummaryBufferMemory(llm=llm, max_token_limit=100))

# round 1
result = conversation("My sister's birthday is tomorrow, and I need a birthday bouquet.")
print(result)

# round 2
result = conversation("She likes pink roses, specifically the color pink.")
print(result)

# round 3
result = conversation("I'm back again. Do you remember why I came to buy flowers yesterday?")
print(result)


![Memory Comparison](buffer-summary.png)


| Memory Type                        | Advantages                                                                                      | Disadvantages                                                                                         |
|-----------------------------------|--------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------|
| ConversationSummaryBufferMemory   | Able to recall early interactions<br>Does not miss recent information<br>High level of flexibility | For short conversations, the summarizer may increase token usage.<br>Storing original interactions may also increase token usage. |
| ConversationSummaryMemory         | Reduces token usage for long conversations.<br>Allows for longer conversation durations.<br>Straightforward and intuitive | Adding token usage required for the summarizing LLM.<br>Conversation memory depends on the summarizing LLM's aggregation ability |
| ConversationBufferWindowMemory    | Retains only the most recent interactions, resulting in fewer token usage.<br>Adjustable window size for flexibility. | Unable to recall early interaction.<br>Too large of a window may result in excessive token usage.     |
| ConversationBufferMemory          | Provides the largest amount of information for LLM.<br>Simple and intuitive                      | Uses more tokens, resulting in increased response time and higher costs.<br>Long conversations may exceed token limits. |

---

**How to test memory behaviors:**
1. Try changing the value of `k` in the `ConversationBufferWindowMemory` and increase the number of conversation rounds to observe the memory effect.
2. Try changing the `max_token_limit` in the `ConversationSummaryBufferMemory` to observe the memory effect.
3. (Updated) Implement these memory techniques using your own business scenarios.


**Summary**

1. Memory Management Mechanisms:
   - Conversation Buffer Memory
   - Window Memory with key value limitations
   - Token Buffer Memory
   - Long Graph implementation

2. Memory Types:
   - Short-term memory
   - Long-term memory
   - Summary memory

3. Implementation Approaches:
   - Message handling and truncation
   - Conversation summarization techniques
   - State management and persistence
   - Token limit management
   - Memory checkpoints and namespace handling

4. Technical Considerations:
   - Token usage optimization
   - Context window sizing
   - History message storage
   - Real-time message processing
   - Multi-thread support
