### Middleware

Middleware provides a way to more tightly control what happens inside the agent. Middleware is useful for the following:
- Tracking agent behavior with logging, analytics, and debugging.
- Transforming prompts, tool selection, and output formatting.
- Adding retries, fallbacks, and early termination logic.
- Applying rate limits, guardrails, and PII detection.

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['GROQ_API_KEY'] = os.getenv('GROQ_API_KEY')

### Summarization MiddleWare
Automatically summarize conversation history when approaching token limits, preserving recent messages while compressing older context. Summarization is useful for the following:
- Long-running conversations that exceed context windows.
- Multi-turn dialogues with extensive history.
- Applications where preserving full conversation context matters.

In [2]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.messages import SystemMessage, AIMessage, HumanMessage
from langgraph.checkpoint.memory import InMemorySaver
from langchain_groq import ChatGroq

## message summarization
llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0
)

agent = create_agent(
    model=llm,
    checkpointer=InMemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model=llm,
            trigger=('messages', 10),
            keep=('messages', 4)
        )
    ]
)

  from pydantic.v1.fields import FieldInfo as FieldInfoV1


In [3]:
## Run with thread id

config = {'configurable': {'thread_id': 'test-1'}}

In [4]:
## alternative test data

questions = [
    'What is 2+2?',
    'What is 100/5?',
    'What is 100*5?',
    'What is 100-5?',
    'What is 5*5?',
    'What is 10^4?',
]

for q in questions:
    response = agent.invoke({'messages': [HumanMessage(content=q)]}, config=config)
    print(f"message: {response}")
    print(f"message length : {len(response['messages'])}" )

message: {'messages': [HumanMessage(content='What is 2+2?', additional_kwargs={}, response_metadata={}, id='a6b9c8c9-1d93-42c2-aaab-f64905fec8f2'), AIMessage(content='2 + 2 = 4.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 9, 'prompt_tokens': 42, 'total_tokens': 51, 'completion_time': 0.006546495, 'completion_tokens_details': None, 'prompt_time': 0.001970839, 'prompt_tokens_details': None, 'queue_time': 0.053195539, 'total_time': 0.008517334}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_f757f4b0bf', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c1797-ebf1-7c40-8273-f0ffcedc3960-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 42, 'output_tokens': 9, 'total_tokens': 51})]}
message length : 2
message: {'messages': [HumanMessage(content='What is 2+2?', additional_kwargs={}, response_metadata={}, id='a6b9c8c9-1d93-42c2-aaab-f64905fec8f2'), AIMe

##### Token size summerization

In [5]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import InMemorySaver
from langchain_core.messages import SystemMessage, AIMessage
from langchain_groq import ChatGroq
from langchain_core.tools import tool

@tool
def search_hotels(city: str) -> str:
    """Search hotels - returns long response to use more tokens."""
    return f"""Hotels in {city}:
    1. Grand Hotel - 5 star, $350/night, spa, pool, gym
    2. City Inn - 4 star, $180/night, business center
    3. Budget Stay - 3 star, $75/night, free wifi"""

agent = create_agent(
    model=llm,
    tools= [search_hotels],
    checkpointer=InMemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model=llm,
            trigger=('tokens', 500),
            keep=('tokens', 100)
        )
    ]
)

In [6]:
config = {"configurable": {"thread_id": "test-1"}}


In [7]:
## token counter
def count_tokens(message):
    total_chars = sum(len(str(m.content)) for m in message)
    return total_chars //4

In [8]:
cities = ['New Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Chennai']

for city in cities:
    response = agent.invoke({
        'messages': [HumanMessage(content=f'Find the best hotel in {city}')]
    }, config=config)

    tokens = count_tokens(response['messages'])
    print(f"{city}: ~{tokens} tokens, {len(response['messages'])} messages")
    print(f"{response['messages']}")

New Delhi: ~74 tokens, 4 messages
[HumanMessage(content='Find the best hotel in New Delhi', additional_kwargs={}, response_metadata={}, id='a3c67b6e-474c-4c46-9933-7a9741e88264'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '6s4x3hzys', 'function': {'arguments': '{"city":"New Delhi"}', 'name': 'search_hotels'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 229, 'total_tokens': 245, 'completion_time': 0.014360783, 'completion_tokens_details': None, 'prompt_time': 0.012785234, 'prompt_tokens_details': None, 'queue_time': 0.053748356, 'total_time': 0.027146017}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_4387d3edbb', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c1797-efed-79a1-8027-9a8ed264b613-0', tool_calls=[{'name': 'search_hotels', 'args': {'city': 'New Delhi'}, 'id': '6s4x3hzys', 'type': 'tool_call'}], invalid_tool_ca

#### Based on Fraction

In [9]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage
from langgraph.checkpoint.memory import InMemorySaver
from langchain_groq import ChatGroq

@tool
def search_hotels(city: str) -> str:
    """Search for hotels in a given city."""
    return f"Searching for hotels in {city}: Grand Hotel 2000, Luxury Hotel 1000, normal hotel 500"

llm = ChatGroq(
    model="llama-3.1-8b-instant",
    temperature=0,
)

## low fraction for testing
agent = create_agent(
    model=llm,
    tools=[search_hotels,],
    checkpointer= InMemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model=llm,
            trigger=('fraction', 0.005), ## 0.5% = ~640 tokens
            keep= ('fraction', 0.002)
        )
    ]
)
# config = {"configurable": {"thread_id": "test-1"}}

config = {"configurable": {'thread_id': 'test-1'}}

def count_tokens(messages):
    return sum(len(str(m.content)) for m in messages)

In [11]:
cities = ['Delhi', 'Mumbai', 'Bangalore', 'Hyderabad', 'Patna']

for city in cities:
    response= agent.invoke(
        {'messages': [HumanMessage(content=f'Hotels in {city}')]},
        config = config
    )

    tokens = count_tokens(response['messages'])
    fraction = tokens / 128000
    print(f'{city}: ~{tokens} tokens ({fraction: .4%}), {len(response["messages"])} msgs')
    print(response['messages'])

Delhi: ~427 tokens ( 0.3336%), 7 msgs
[HumanMessage(content='Hotels in Delhi', additional_kwargs={}, response_metadata={}, id='5ce267e4-b9d9-4829-a5a6-aceeea6cf88e'), AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'tjt74dzne', 'function': {'arguments': '{"city":"Delhi"}', 'name': 'search_hotels'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 16, 'prompt_tokens': 220, 'total_tokens': 236, 'completion_time': 0.027641021, 'completion_tokens_details': None, 'prompt_time': 0.013714927, 'prompt_tokens_details': None, 'queue_time': 0.053912406, 'total_time': 0.041355948}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c1798-3a23-7323-8f56-75c26a99c073-0', tool_calls=[{'name': 'search_hotels', 'args': {'city': 'Delhi'}, 'id': 'tjt74dzne', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadat

### Human In the Loop MiddleWare
Pause agent execution for human approval, editing, or rejection of tool calls before they execute. Human-in-the-loop is useful for the following:
- High-stakes operations requiring human approval (e.g. database writes, financial transactions).
- Compliance workflows where human oversight is mandatory.
- Long-running conversations where human feedback guides the agent.

In [26]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver

@tool
def read_email_send(email_id: str) -> str:
    """Function to read an email with ID"""
    return f"Email content for ID: {email_id}"

@tool
def send_email_tool(recipient: str, subject: str, body:str) -> str:
    """Function to send email with email ID"""
    return f"Email send to {recipient} with subject {subject}."

In [27]:
agent=create_agent(
    model=llm,
    tools=[read_email_tool,send_email_tool],
    checkpointer=InMemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool":{
                    "allowed_decisions":["approve","edit","reject"]
                },
                "read_email_tool":False,

            }
        )
    ]
)

In [28]:
config = {"configurable": {"thread_id": "test-approve"}}

result = agent.invoke({'messages': [HumanMessage(content='send email to chandan@gmail.com with subject "Hello brother" and body "what are u doing?"')]},
config=config
)
result

{'messages': [HumanMessage(content='send email to chandan@gmail.com with subject "Hello brother" and body "what are u doing?"', additional_kwargs={}, response_metadata={}, id='5d2a0265-44cc-43cb-9867-c822d5c6f7e7'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '30jnknk05', 'function': {'arguments': '{"body":"what are u doing?","recipient":"chandan@gmail.com","subject":"Hello brother"}', 'name': 'send_email_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 35, 'prompt_tokens': 314, 'total_tokens': 349, 'completion_time': 0.042570706, 'completion_tokens_details': None, 'prompt_time': 0.01934899, 'prompt_tokens_details': None, 'queue_time': 0.048985953, 'total_time': 0.061919696}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c17a7-34f5-7a72-a8f2-454345741f6a-0', tool_calls=[{'name': 

In [29]:
from langgraph.types import Command
# Step 2: Approve
if "__interrupt__" in result:
    print("⏸️ Paused! Approving...")
    
    result = agent.invoke(
        Command(
            resume={
                "decisions": [
                    {"type": "approve"}
                ]
            }
        ),
        config=config
    )
    
    print(f"✅ Result: {result['messages'][-1].content}")

⏸️ Paused! Approving...
✅ Result: 


### In that AIMessages content is empty so free api key do not provide that try to use openai api key then its active

In [30]:
result

{'messages': [HumanMessage(content='send email to chandan@gmail.com with subject "Hello brother" and body "what are u doing?"', additional_kwargs={}, response_metadata={}, id='5d2a0265-44cc-43cb-9867-c822d5c6f7e7'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': '30jnknk05', 'function': {'arguments': '{"body":"what are u doing?","recipient":"chandan@gmail.com","subject":"Hello brother"}', 'name': 'send_email_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 35, 'prompt_tokens': 314, 'total_tokens': 349, 'completion_time': 0.042570706, 'completion_tokens_details': None, 'prompt_time': 0.01934899, 'prompt_tokens_details': None, 'queue_time': 0.048985953, 'total_time': 0.061919696}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c17a7-34f5-7a72-a8f2-454345741f6a-0', tool_calls=[{'name': 

### Reject

In [31]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver


def read_email_tool(email_id: str) -> str:
    """Mock function to read an email by its ID."""
    return f"Email content for ID: {email_id}"

def send_email_tool(recipient: str, subject: str, body: str) -> str:
    """Mock function to send an email."""
    return f"Email sent to {recipient} with subject '{subject}'"

agent = create_agent(
    model=llm,
    tools=[read_email_tool,send_email_tool],
    checkpointer=InMemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool": {
                    "allowed_decisions": ["approve", "edit", "reject"],
                },
                "read_email_tool": False,
            }
        ),
    ],
)

In [32]:
config = {"configurable": {"thread_id": "test-reject"}}
# Step 1: Request
result = agent.invoke(
    {"messages": [HumanMessage(content="Send email to john@test.com with subject 'Hello' and body 'How are you?'")]},
    config=config)

In [33]:
result

{'messages': [HumanMessage(content="Send email to john@test.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='2f3eca1e-26cd-415c-9c25-b11f97e4f88a'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'ffa98yr54', 'function': {'arguments': '{"body":"How are you?","recipient":"john@test.com","subject":"Hello"}', 'name': 'send_email_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 32, 'prompt_tokens': 309, 'total_tokens': 341, 'completion_time': 0.028206757, 'completion_tokens_details': None, 'prompt_time': 0.019181513, 'prompt_tokens_details': None, 'queue_time': 0.050278617, 'total_time': 0.04738827}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c17a7-d92c-7ec3-9ec4-a9fa6bc28152-0', tool_calls=[{'name': 'send_email_tool', 'args': {'body'

In [35]:
# Step 2: Reject
if "__interrupt__" in result:
    print("⏸️ Paused! Approving...")
    
    result = agent.invoke(
        Command(
            resume={
                "decisions": [
                    {"type": "reject"}
                ]
            }
        ),
        config=config
    )
    
    print(f"✅ Result: {result['messages'][-1].content}")

⏸️ Paused! Approving...
✅ Result: 


### Editing

In [38]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver


def read_email_tool(email_id: str) -> str:
    """Mock function to read an email by its ID."""
    return f"Email content for ID: {email_id}"

def send_email_tool(recipient: str, subject: str, body: str) -> str:
    """Mock function to send an email."""
    return f"Email sent to {recipient} with subject '{subject}'"

agent = create_agent(
    model=llm,
    tools=[read_email_tool,send_email_tool],
    checkpointer=InMemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool": {
                    "allowed_decisions": ["approve", "edit", "reject"],
                },
                "read_email_tool": False,
            }
        ),
    ],
)

In [39]:
config = {"configurable": {"thread_id": "test-edit"}}

# Step 1: Request (with wrong info)
result = agent.invoke(
    {"messages": [HumanMessage(content="Send email to wrong@email.com with subject 'Test' and body 'Hello'")]},
    config=config
)

In [40]:
result

{'messages': [HumanMessage(content="Send email to wrong@email.com with subject 'Test' and body 'Hello'", additional_kwargs={}, response_metadata={}, id='a575551d-79d9-4da0-ba65-9a2e5f69a942'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'j7wd5z4tf', 'function': {'arguments': '{"body":"Hello","recipient":"wrong@email.com","subject":"Test"}', 'name': 'send_email_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 307, 'total_tokens': 337, 'completion_time': 0.026531862, 'completion_tokens_details': None, 'prompt_time': 0.019788996, 'prompt_tokens_details': None, 'queue_time': 0.053387664, 'total_time': 0.046320858}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c17ad-c3c8-7800-b2e7-fccd49a0c803-0', tool_calls=[{'name': 'send_email_tool', 'args': {'body': 'Hello', 

In [41]:
# Step 2: Edit and approve
if "__interrupt__" in result:
    print("⏸️ Paused! Editing...")
    
    result = agent.invoke(
        Command(
            resume={
                "decisions": [
                    {
                        "type": "edit",
                        "edited_action": {
                            "name": "send_email_tool",      # Tool name
                            "args": {                   # New arguments
                                "recipient": "correct@email.com",
                                "subject": "Corrected Subject",
                                "body": "This was edited by human before sending"
                            }
                        }
                    }
                ]
            }
        ),
        config=config
    )
    
    print(f"✏️ Result: {result['messages'][-1].content}")

⏸️ Paused! Editing...
✏️ Result: 


In [42]:
result

{'messages': [HumanMessage(content="Send email to wrong@email.com with subject 'Test' and body 'Hello'", additional_kwargs={}, response_metadata={}, id='a575551d-79d9-4da0-ba65-9a2e5f69a942'),
  AIMessage(content='', additional_kwargs={'tool_calls': [{'id': 'j7wd5z4tf', 'function': {'arguments': '{"body":"Hello","recipient":"wrong@email.com","subject":"Test"}', 'name': 'send_email_tool'}, 'type': 'function'}]}, response_metadata={'token_usage': {'completion_tokens': 30, 'prompt_tokens': 307, 'total_tokens': 337, 'completion_time': 0.026531862, 'completion_tokens_details': None, 'prompt_time': 0.019788996, 'prompt_tokens_details': None, 'queue_time': 0.053387664, 'total_time': 0.046320858}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_1151d4f23c', 'service_tier': 'on_demand', 'finish_reason': 'tool_calls', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--019c17ad-c3c8-7800-b2e7-fccd49a0c803-0', tool_calls=[{'type': 'tool_call', 'name': 'send_email_tool', 'args