# Middleware 中间件

![](https://mintcdn.com/langchain-5e9cc07a/RAP6mjwE5G00xYsA/oss/images/middleware_final.png?w=1100&fit=max&auto=format&n=RAP6mjwE5G00xYsA&q=85&s=ec45e1932d1279b1beee4a4b016b473f)

In [None]:
# 基本用法

from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware, HumanInTheLoopMiddleware

agent = create_agent(
  model="gpt-4o",
  tools=[],
  middleware=[SummarizationMiddleware(), HumanInTheLoopMiddleware()],
)

## Built-in middleware 内置中间件

In [None]:
# 1. Summarization 摘要中间件

from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware

agent = create_agent(
  model="gpt-4o",
  tools=[],
  middleware=[
    SummarizationMiddleware(
      model="gpt-4o-mini",
      max_tokens_before_summary=4000,  # 当历史消息超过 4000 个 token 时触发总结
      messages_to_keep=20,  # 压缩旧内容后，保留最近的 20 条消息原样（以维持局部上下文）
      summary_prompt="Custom prompt for summarization...",  # Optional
    ),
  ],
)

In [1]:
# 2.Human-in-the-loop 人类介入中间件

from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver

agent = create_agent(
  model="gpt-4o",
  tools=[],
  checkpointer=InMemorySaver(),
  middleware=[
    HumanInTheLoopMiddleware(
      interrupt_on={
        # Require approval, editing, or rejection for sending emails
        # 智能体每次尝试调用这个工具时，会暂停执行；系统提示人类介入；人类可以审批、编辑或拒绝这个操作。 
        "send_email_tool": {
          "allowed_decisions": ["approve", "edit", "reject"],
        },
        # Auto-approve reading emails
        "read_email_tool": False,  # 表示无需人工介入；True 表示需要人工介入
      }
    ),
  ],
)

In [4]:
# 3.Anthropic prompt caching Anthropic 提示缓存

from langchain_anthropic import ChatAnthropic
from langchain_anthropic.middleware import AnthropicPromptCachingMiddleware
from langchain.agents import create_agent
from langchain.messages import HumanMessage

LONG_PROMPT = """
Please be a helpful assistant.

<Lots more context ...>
"""

agent = create_agent(
  model=ChatAnthropic(model="claude-sonnet-4"),
  system_prompt=LONG_PROMPT,
  middleware=[AnthropicPromptCachingMiddleware(ttl="5m")],
)

# cache store
agent.invoke({"messages": [HumanMessage("Hi, my name is Bob")]})

# cache hit, system prompt is cached
agent.invoke({"messages": [HumanMessage("What's my name?")]})

PermissionDeniedError: Error code: 403 - {'error': {'type': 'forbidden', 'message': 'Request not allowed'}}

In [5]:
# 4. Model call limit 模型调用限制

from langchain.agents import create_agent
from langchain.agents.middleware import ModelCallLimitMiddleware

agent = create_agent(
  model="gpt-4o",
  tools=[],
  middleware=[
    ModelCallLimitMiddleware(
      thread_limit=10,  # 每个线程最多允许10次模型调用
      run_limit=5,  # 单次运行（一次 invoke）最多5次模型调用
      exit_behavior="end",  # 达到上限后的行为
    ),
  ],
)

In [None]:
# 5.Model fallback 模型回退

from langchain.agents import create_agent
from langchain.agents.middleware import ModelFallbackMiddleware


agent = create_agent(
    model="gpt-4o",  # Primary model
    tools=[],
    middleware=[
        ModelFallbackMiddleware(
            "gpt-4o-mini",  # Try first on error
            "claude-3-5-sonnet-20241022",  # Then this
        ),
    ],
)

In [None]:
# 6.个人身份信息检测

from langchain.agents import create_agent
from langchain.agents.middleware import PIIMiddleware


agent = create_agent(
    model="gpt-4o",
    tools=[],
    middleware=[
        # Redact emails in user input
        PIIMiddleware("email", strategy="redact", apply_to_input=True),
        # Mask credit cards (show last 4 digits)
        PIIMiddleware("credit_card", strategy="mask", apply_to_input=True),
        # Custom PII type with regex
        PIIMiddleware(
            "api_key",
            detector=r"sk-[a-zA-Z0-9]{32}",
            strategy="block",  # Raise error if detected
        ),
    ],
)

In [12]:
# 7.规划

from langchain.agents import create_agent
from langchain.agents.middleware import TodoListMiddleware
from langchain.messages import HumanMessage


agent = create_agent(
    model="gpt-5",
    tools=[],
    middleware=[TodoListMiddleware()],
)

result = agent.invoke({"messages": [HumanMessage("Help me refactor my codebase")]})
print(result["todos"])  # Array of todo items with status tracking

[{'content': 'Gather key details about the codebase (language, frameworks, size, repo access, goals, pain points)', 'status': 'in_progress'}, {'content': 'Obtain repository link or representative code samples (or a directory tree) to review', 'status': 'pending'}, {'content': 'Confirm how to build/run/tests locally and CI details', 'status': 'pending'}, {'content': 'Define refactoring objectives and constraints with you (maintainability, performance, testability, API stability, deadlines)', 'status': 'pending'}, {'content': 'Set up or review linting/static analysis tooling appropriate for the stack', 'status': 'pending'}, {'content': 'Run a quick audit to identify hot spots (complexity, duplication, dead code, outdated deps)', 'status': 'pending'}, {'content': 'Draft a prioritized refactoring plan with milestones and small PRs', 'status': 'pending'}, {'content': 'Execute the first refactor on a low-risk area and ensure tests pass', 'status': 'pending'}, {'content': 'Document changes an

In [None]:
# 8. LLM 工具选择器

from langchain.agents import create_agent
from langchain.agents.middleware import LLMToolSelectorMiddleware


agent = create_agent(
    model="gpt-4o",
    tools=[tool1, tool2, tool3, tool4, tool5, ...],  # Many tools
    middleware=[
        LLMToolSelectorMiddleware(
            model="gpt-4o-mini",  # Use cheaper model for selection
            max_tools=3,  # Limit to 3 most relevant tools
            always_include=["search"],  # Always include certain tools
        ),
    ],
)

In [None]:
# 9.工具重试

from langchain.agents import create_agent
from langchain.agents.middleware import ToolRetryMiddleware


agent = create_agent(
    model="gpt-4o",
    tools=[search_tool, database_tool],
    middleware=[
        ToolRetryMiddleware(
            max_retries=3,  # Retry up to 3 times
            backoff_factor=2.0,  # Exponential backoff multiplier
            initial_delay=1.0,  # Start with 1 second delay
            max_delay=60.0,  # Cap delays at 60 seconds
            jitter=True,  # Add random jitter to avoid thundering herd
        ),
    ],
)

In [None]:
# 10.LLM 工具模拟器

# 但在很多场景下——你只是想测试智能体的行为或不想调用真实 API（例如发邮件、操作数据库）——就可以启用 LLMToolEmulator
# 它的作用是：“当模型想用工具时，不让它真的执行工具，而是让模型自己编造工具的返回结果。”

from langchain.agents import create_agent
from langchain.agents.middleware import LLMToolEmulator


agent = create_agent(
    model="gpt-4o",
    tools=[get_weather, search_database, send_email],
    middleware=[
        # Emulate all tools by default
        LLMToolEmulator(),

        # Or emulate specific tools
        # LLMToolEmulator(tools=["get_weather", "search_database"]),

        # Or use a custom model for emulation
        # LLMToolEmulator(model="claude-sonnet-4-5-20250929"),
    ],
)


In [None]:
# 11.上下文编辑

from langchain.agents import create_agent
from langchain.agents.middleware import ContextEditingMiddleware, ClearToolUsesEdit


agent = create_agent(
    model="gpt-4o",
    tools=[...],
    middleware=[
        # 中间件在模型运行前被触发，它的参数 edits=[...] 是一个编辑规则列表。
        ContextEditingMiddleware(
            edits=[
                # 这是一个编辑器，用来在上下文中删除或压缩旧的工具使用记录。
                # 模型每次调用前，会检查历史上下文（messages）；
                # 如果之前的工具调用总长度超过 max_tokens=1000；
                # 它就会删除或截断旧的工具使用内容，只保留最近的部分。
                
                # 相当于告诉模型：“忘掉太久以前那些工具调用细节，只记住最近 1000 token 的内容。”
                ClearToolUsesEdit(max_tokens=1000),  # Clear old tool uses
            ],
        ),
    ],
)

## 自定义中间件

您可以通过两种方式创建中间件：
- 基于装饰器 - 对于单钩子中间件快速且简单
- 基于类 - 对于具有多个钩子的复杂中间件更强大
​


### 1.基于装饰器的中间件

In [None]:
# 基于装饰器的中间件

from langchain.agents.middleware import before_model, after_model, wrap_model_call
from langchain.agents.middleware import AgentState, ModelRequest, ModelResponse, dynamic_prompt
from langchain.messages import AIMessage
from langchain.agents import create_agent
from langgraph.runtime import Runtime
from typing import Any, Callable


# Node-style: logging before model calls
@before_model
def log_before_model(state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
    print(f"About to call model with {len(state['messages'])} messages")
    return None

# Node-style: validation after model calls
@after_model(can_jump_to=["end"])
def validate_output(state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
    last_message = state["messages"][-1]
    if "BLOCKED" in last_message.content:
        return {
            "messages": [AIMessage("I cannot respond to that request.")],
            "jump_to": "end"
        }
    return None

# Wrap-style: retry logic
@wrap_model_call
def retry_model(
    request: ModelRequest,
    handler: Callable[[ModelRequest], ModelResponse],
) -> ModelResponse:
    for attempt in range(3):
        try:
            return handler(request)
        except Exception as e:
            if attempt == 2:
                raise
            print(f"Retry {attempt + 1}/3 after error: {e}")

# Wrap-style: dynamic prompts
@dynamic_prompt
def personalized_prompt(request: ModelRequest) -> str:
    user_id = request.runtime.context.get("user_id", "guest")
    return f"You are a helpful assistant for user {user_id}. Be concise and friendly."

# Use decorators in agent
agent = create_agent(
    model="gpt-4o",
    middleware=[log_before_model, validate_output, retry_model, personalized_prompt],
    tools=[...],
)

## 2.基于装饰器的中间件

In [None]:
# 示例：日志中间件
from langchain.agents.middleware import AgentMiddleware, AgentState
from langgraph.runtime import Runtime
from typing import Any

class LoggingMiddleware(AgentMiddleware):
    def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
        print(f"About to call model with {len(state['messages'])} messages")
        return None

    def after_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
        print(f"Model returned: {state['messages'][-1].content}")
        return None

In [13]:
# 示例：对话长度限制
from langchain.agents.middleware import AgentMiddleware, AgentState
from langchain.messages import AIMessage
from langgraph.runtime import Runtime
from typing import Any

class MessageLimitMiddleware(AgentMiddleware):
    def __init__(self, max_messages: int = 50):
        super().__init__()
        self.max_messages = max_messages

    def before_model(self, state: AgentState, runtime: Runtime) -> dict[str, Any] | None:
        if len(state["messages"]) == self.max_messages:
            return {
                "messages": [AIMessage("Conversation limit reached.")],
                "jump_to": "end"
            }
        return None

### 最佳实践
- 保持中间件专注 - 每个中间件应做好一件事
- 优雅地处理错误 - 不要让中间件错误导致代理崩溃
- 使用适当的钩子类型 ：
- 用于顺序逻辑的节点风格（日志记录、验证）
- 控制流的包装样式（重试、后备、缓存）
- 清晰记录任何自定义状态属性
- 在集成之前独立单元测试中间件
- 考虑执行顺序 - 将关键中间件放在列表的最前面
- 尽可能使用内置中间件，不要重复造轮子 :)