In [1]:
from langgraph.checkpoint.memory import InMemorySaver
from langchain_community.chat_models.tongyi import ChatTongyi
# model = init_chat_model("claude-sonnet-4-5-20250929")
model = ChatTongyi(model="qwen-max", streaming=True)  # type: ignore

## 裁剪对话

用`trim_message`来裁剪上下文

In [2]:
from langchain_core.messages.utils import (
    trim_messages,  
    count_tokens_approximately  
)
from langgraph.graph import StateGraph, START, MessagesState

summarization_model = model.bind(max_tokens=128)

def call_model(state: MessagesState):
    messages = trim_messages(  
        state["messages"],
        strategy="last",
        token_counter=count_tokens_approximately,
        max_tokens=512,  # 128就被裁剪掉第一轮的对话了
        start_on="human",
        end_on=("human", "tool"),
    )
    response = model.invoke(messages)
    return {"messages": [response]}

checkpointer = InMemorySaver()
builder = StateGraph(MessagesState)
builder.add_node(call_model)
builder.add_edge(START, "call_model")
graph = builder.compile(checkpointer=checkpointer)

config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": "hi, my name is bob"}, config)
graph.invoke({"messages": "write a short poem about cats"}, config)
graph.invoke({"messages": "now do the same but for dogs"}, config)
final_response = graph.invoke({"messages": "what's my name?"}, config)

final_response["messages"][-1].pretty_print()


Your name is Bob! How can I assist you further, Bob?


# 删除对话

用`RemoveMessage`来删除对话

In [3]:
from langchain.messages import RemoveMessage  

def delete_messages(state):
    messages = state["messages"]
    if len(messages) > 2:
        # remove the earliest two messages
        return {"messages": [RemoveMessage(id=m.id) for m in messages[:2]]}  

def call_model(state: MessagesState):
    response = model.invoke(state["messages"])
    return {"messages": response}

builder = StateGraph(MessagesState)
builder.add_sequence([call_model, delete_messages])
builder.add_edge(START, "call_model")

checkpointer = InMemorySaver()
app = builder.compile(checkpointer=checkpointer)

for event in app.stream(
    {"messages": [{"role": "user", "content": "hi! I'm bob"}]},
    config,
    stream_mode="values"
):
    print([(message.type, message.content) for message in event["messages"]])

for event in app.stream(
    {"messages": [{"role": "user", "content": "what's my name?"}]},
    config,
    stream_mode="values"
):
    print([(message.type, message.content) for message in event["messages"]])

[('human', "hi! I'm bob")]
[('human', "hi! I'm bob"), ('ai', "Hello Bob! It's nice to meet you. How can I assist you today?")]
[('human', "hi! I'm bob"), ('ai', "Hello Bob! It's nice to meet you. How can I assist you today?"), ('human', "what's my name?")]
[('human', "hi! I'm bob"), ('ai', "Hello Bob! It's nice to meet you. How can I assist you today?"), ('human', "what's my name?"), ('ai', "Your name is Bob! Is there anything else you'd like to know or discuss?")]
[('human', "what's my name?"), ('ai', "Your name is Bob! Is there anything else you'd like to know or discuss?")]


# 消息摘要

用`SummarizationNode`来获取消息摘要

```python
summarization_node = SummarizationNode(  
    token_counter=count_tokens_approximately,
    model=summarization_model,
    max_tokens=256,  # 最终给llm的长度
    max_tokens_before_summary=256,  # 触发总结的长度
    max_summary_tokens=128,  # 最大总结长度
)
```

官方教程给的这个设置就不合理，因为>=256才触发总结，但是触发总结时给llm的上限长度也是256，就意味着>的部分肯定被裁剪了，合理的设置区间应该是max_token > max_tokens_before_summary。

上面这个设置就记不住下面的name是bob

In [10]:
from typing import Any, TypedDict

from langchain.messages import AnyMessage
from langchain_core.messages.utils import count_tokens_approximately
from langgraph.graph import StateGraph, START, MessagesState
from langgraph.checkpoint.memory import InMemorySaver
from langmem.short_term import SummarizationNode, RunningSummary  

summarization_model = model.bind(max_tokens=128)

class State(MessagesState):
    context: dict[str, RunningSummary]  

class LLMInputState(TypedDict):  
    summarized_messages: list[AnyMessage]
    context: dict[str, RunningSummary]

summarization_node = SummarizationNode(  
    token_counter=count_tokens_approximately,
    model=summarization_model,
    max_tokens=256,
    max_tokens_before_summary=128,
    max_summary_tokens=64,
)

def call_model(state: LLMInputState):  
    response = model.invoke(state["summarized_messages"])
    return {"messages": [response]}

checkpointer = InMemorySaver()
builder = StateGraph(State)
builder.add_node(call_model)
builder.add_node("summarize", summarization_node)  
builder.add_edge(START, "summarize")
builder.add_edge("summarize", "call_model")
graph = builder.compile(checkpointer=checkpointer)

# Invoke the graph
config = {"configurable": {"thread_id": "1"}}
graph.invoke({"messages": "hi, my name is bob"}, config)
graph.invoke({"messages": "write a short poem about cats"}, config)
graph.invoke({"messages": "now do the same but for dogs"}, config)
final_response = graph.invoke({"messages": "what's my name?"}, config)

final_response["messages"][-1].pretty_print()
print("\nSummary:", final_response["context"]["running_summary"].summary)


You mentioned your name earlier as Bob. Is that correct, or did you have a different name in mind?

Summary: Sure, here's a short poem about dogs:

With tails wagging, joyful and free,
Bounding through fields with glee.
Eyes full of trust, hearts so true,
Loyal friends, steadfast and new.

Barking at life with open cheer,
In every moment, they're always near.
Paws that patter, a warm embrace,
Dogs, the light in our daily race.

### Summary of the Conversation:
- Bob introduced himself.
- He requested a short poem about cats, which was provided.
- He then asked for a similar poem, but this time about dogs, which was also provided.
