# 修剪 messages
大模型的上下文窗口是有限的，这就意味着输入的 token 数是有限制的。 如果你的消息非常长或者上下文有非常长的记录，那么你需要去控制输入的长度。
这个时候就需要裁剪消息。

## 基于token数裁剪

In [8]:
import getpass
import os

try:
    # load environment variables from .env file (requires `python-dotenv`)
    from dotenv import load_dotenv

    load_dotenv()
except ImportError:
    pass

os.environ["LANGSMITH_TRACING"] = "true"
if "LANGSMITH_API_KEY" not in os.environ:
    os.environ["LANGSMITH_API_KEY"] = getpass.getpass(
        prompt="Enter your LangSmith API key (optional): "
    )
if "LANGSMITH_PROJECT" not in os.environ:
    os.environ["LANGSMITH_PROJECT"] = getpass.getpass(
        prompt='Enter your LangSmith Project Name (default = "default"): '
    )
    if not os.environ.get("LANGSMITH_PROJECT"):
        os.environ["LANGSMITH_PROJECT"] = "default"
if "OPENAI_API_KEY" not in os.environ:
    os.environ["OPENAI_API_KEY"] = getpass.getpass(
        prompt="Enter your OpenAI API key (required if using OpenAI): "
    )

base_url = os.environ.get("BASE_URL")
model_name = os.environ.get("MODEL_NAME")

print(base_url, model_name)

https://dashscope.aliyuncs.com/compatible-mode/v1 qwen-plus


In [None]:
from langchain_core.messages import (
    AIMessage,
    HumanMessage,
    SystemMessage,
    ToolMessage,
    trim_messages,
)
from langchain_core.messages.utils import count_tokens_approximately

messages = [
    SystemMessage("you're a good assistant, you always respond with a joke."),
    HumanMessage("i wonder why it's called langchain"),
    AIMessage(
        'Well, I guess they thought "WordRope" and "SentenceString" just didn\'t have the same ring to it!'
    ),
    HumanMessage("and who is harrison chasing anyways"),
    AIMessage(
        "Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!"
    ),
    HumanMessage("what do you call a speechless parrot"),
]


trim_messages(
    messages,
    # 保留消息中最后 <= n_count 个 token
    strategy="last",
    # 记得根据您的模型调整
    # 或者传入自定义的 token_counter
    token_counter=count_tokens_approximately,
    # 大多数聊天模型期望聊天历史以以下方式开始：
    # (1) 一个 HumanMessage 或
    # (2) 一个 SystemMessage 后跟一个 HumanMessage
    # 记得根据期望的对话长度进行调整
    max_tokens=45,
    # 大多数聊天模型期望聊天历史以以下方式开始：
    # (1) 一个 HumanMessage 或
    # (2) 一个 SystemMessage 后跟一个 HumanMessage
    start_on="human",
    # 大多数聊天模型期望聊天历史以以下方式结束：
    # (1) 一个 HumanMessage 或
    # (2) 一个 ToolMessage
    end_on=("human", "tool"),
    # 通常，如果原始历史中存在 SystemMessage，
    # 我们希望保留它。
    # SystemMessage 包含模型的特殊指令。
    include_system=True,
    allow_partial=False,
)

[SystemMessage(content="you're a good assistant, you always respond with a joke.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]

## 基于消息数裁剪
另一种方法是根据消息数量来裁剪聊天记录，只需设置 token_counter=len。此时每条消息将被视为单个 token，而 max_tokens 参数将控制允许保留的最大消息数量。

当基于消息数量使用 trim_messages 功能时，这是推荐的默认配置。请根据实际需求调整 max_tokens 的数值。

In [6]:
trim_messages(
    messages,
    # Keep the last <= n_count tokens of the messages.
    strategy="last",
    token_counter=len,
    # When token_counter=len, each message
    # will be counted as a single token.
    # Remember to adjust for your use case
    max_tokens=4,
    # Most chat models expect that chat history starts with either:
    # (1) a HumanMessage or
    # (2) a SystemMessage followed by a HumanMessage
    start_on="human",
    # Most chat models expect that chat history ends with either:
    # (1) a HumanMessage or
    # (2) a ToolMessage
    end_on=("human", "tool"),
    # Usually, we want to keep the SystemMessage
    # if it's present in the original history.
    # The SystemMessage has special instructions for the model.
    include_system=True,
)

[SystemMessage(content="you're a good assistant, you always respond with a joke.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='and who is harrison chasing anyways', additional_kwargs={}, response_metadata={}),
 AIMessage(content="Hmmm let me think.\n\nWhy, he's probably chasing after the last cup of coffee in the office!", additional_kwargs={}, response_metadata={}),
 HumanMessage(content='what do you call a speechless parrot', additional_kwargs={}, response_metadata={})]

使用模型统计 token

In [None]:
from langchain_openai import ChatOpenAI

trim_messages(
    messages,
    max_tokens=45,
    strategy="first",
    token_counter=ChatOpenAI(model="gpt-4o"),  # 不会消耗 token
)

[SystemMessage(content="you're a good assistant, you always respond with a joke.", additional_kwargs={}, response_metadata={}),
 HumanMessage(content="i wonder why it's called langchain", additional_kwargs={}, response_metadata={})]