## Middleware

Middleware provides a way to more tightly control what happens inside the agent.
* Tracking agent behaviour with logging,analytics ,and debugging.
* Adding retries,fallbacks,and early termination logic.
* Applying rate limits,guardrails, and PII detection.

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

### Summarization Middleware

Summarization middleware is a layer that condenses conversation or document content before sending it to the LLM, helping manage context limits, reduce token cost, and retain only important information.

In [2]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import MemorySaver

#MessageBased Summarization
agent=create_agent(
    model="gpt-4o-mini",
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model="gpt-4o-mini",
            trigger=("messages",10), # length based summarizaation
            keep=("messages",4)
        )
    ]
)


In [3]:
# RUn with thread id
config ={"configurable":{"thread_id":"test-3"}}

In [4]:
from langchain.messages import HumanMessage
questions=[
    "What is 2+2?",
    "What is 2*6?",
    "What is 2/4?",
    "What is 3*3?",
    "What is 15-7?",
    "What is 7*8?",
    "What is 8+8?"
]

for q in questions:
    response=agent.invoke({"messages":[HumanMessage(content=q)]},config)
    print(response)
    print("len: ",len(response['messages']))

{'messages': [HumanMessage(content='What is 2+2?', additional_kwargs={}, response_metadata={}, id='1ace6d61-b6b2-4269-8d3a-32fb9903a32e'), AIMessage(content='2 + 2 equals 4.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 8, 'prompt_tokens': 14, 'total_tokens': 22, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f4ae844694', 'id': 'chatcmpl-D8hXwZYkngxQTtJlMxdFnLuT2ZuAv', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None}, id='lc_run--019c55d2-8b0a-7ba1-b0f7-6de86e84ae7d-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 14, 'output_tokens': 8, 'total_tokens': 22, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 

Based on token Size

In [5]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage

@tool 
def search_hotel(city:str) ->str:
    """Search hotels-returns long response to use more tokens"""
    return f"""Hotels in: {city}
        1.Grand Hotel-5 star,$350/night,spa,pool,gym
        2. City Inn-4 star,$100/night , business centre
        3. Budget stay - 3 star ,$75/night,free wifi"""

agent=create_agent(
    model="gpt-4o-mini",
    tools=[search_hotel],
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model="gpt-4o-mini",
            trigger=("tokens",550),
            keep=("tokens",200)
        )
    ]
)

config={"configurable":{"thread_id":"test-1"}}

#Token Counter 
def count_tokens(messages):
    total_chars=sum(len(str(m.content)) for m in messages)
    return total_chars //4  #4 chars into 1 token

In [6]:
#RUn test
cities=["Paris","London","Tokyo","New York","Dubai"]

for city in cities:
    response=agent.invoke(
        {"messages":[HumanMessage(content=f"Find Hotels in {city}")]},
        config=config
          )
    tokens=count_tokens(response["messages"])
    print(f"{city}: ~{tokens} tokens, {len(response['messages'])} messages")
    print(f"{response['messages']}")

Paris: ~136 tokens, 4 messages
[HumanMessage(content='Find Hotels in Paris', additional_kwargs={}, response_metadata={}, id='4a63d475-f48c-4b12-9f56-a318e1f6b77d'), AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 53, 'total_tokens': 68, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f4ae844694', 'id': 'chatcmpl-D8hYFzg3cP6mFQ4QZrAFXJDonLYTp', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55d2-d8cf-7e53-8330-4c9f333c0f0f-0', tool_calls=[{'name': 'search_hotel', 'args': {'city': 'Paris'}, 'id': 'call_KrWfX7xBzuH9SCceKfj6NZPj', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 53,

### Fraction

In [7]:
from langchain.agents import create_agent
from langchain.agents.middleware import SummarizationMiddleware
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage

@tool 
def search_hotel(city:str) ->str:
    """Search hotels-returns long response to use more tokens"""
    return f"""Hotels in: {city}
        1.Grand Hotel-5 star,$350/night,spa,pool,gym
        2. City Inn-4 star,$100/night , business centre
        3. Budget stay - 3 star ,$75/night,free wifi"""

#Low fraction for testing
agent=create_agent(
    model="gpt-4o-mini",
    tools=[search_hotel],
    checkpointer=MemorySaver(),
    middleware=[
        SummarizationMiddleware(
            model="gpt-4o-mini",
            trigger=("fraction",0.005),  #0.5% = 640 tokens
            keep=("fraction",0.002)   #0.2% = 256 tokens
        )
    ]
)

config={"configurable":{"thread_id":"test-1"}}

#Token Counter 
def count_tokens(messages):
    total_chars=sum(len(str(m.content)) for m in messages)
    return total_chars //4  #4 chars into 1 token

#Run test
cities=["Paris","London","Tokyo","New York","Dubai"]

for city in cities:
    response=agent.invoke(
        {"messages":[HumanMessage(content=f"Find Hotels in {city}")]},
        config=config
          )
    tokens=count_tokens(response["messages"])
    print(f"{city}: ~{tokens} tokens, {len(response['messages'])} messages")
    print(f"{response['messages']}")

Paris: ~144 tokens, 4 messages
[HumanMessage(content='Find Hotels in Paris', additional_kwargs={}, response_metadata={}, id='4e357221-b8ac-4026-8e1e-f73ab002a64e'), AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 15, 'prompt_tokens': 53, 'total_tokens': 68, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-mini-2024-07-18', 'system_fingerprint': 'fp_f4ae844694', 'id': 'chatcmpl-D8hYsqi8pGsaoGyuyM9HszKOZOprH', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55d3-70a7-73e3-823b-6125a5c2f5d8-0', tool_calls=[{'name': 'search_hotel', 'args': {'city': 'Paris'}, 'id': 'call_ebRfD6a5WgG430PfQmyjuBQH', 'type': 'tool_call'}], invalid_tool_calls=[], usage_metadata={'input_tokens': 53,

### Human In the loop Middleware
Human-in-the-Loop middleware is a control layer in an AI / agent pipeline that:

* pauses the automated flow
* asks a human to review, approve, edit, or reject
* then continues execution based on that human input


In [8]:
from langchain.agents import create_agent
from langchain.agents.middleware import HumanInTheLoopMiddleware
from langgraph.checkpoint.memory import InMemorySaver

def read_email_tool(email_id: str)->str:
    """Mock function to read its email by its ID."""
    return f"Email content for ID :{email_id}"

def send_email_tool(recipient:str,subject:str,body:str)->str:
    """Mock function to send an email."""
    return f"Email sent to {recipient} with subject '{subject}'"

In [9]:
agent=create_agent(
    model="gpt-4o",
    tools=[read_email_tool,send_email_tool],
    checkpointer=InMemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool":{
                    "allowed_decisions":["approve","edit","reject"]
                },
                "read_email_tool":False,
            }
        )
    ]
)

In [10]:
from langchain_core.messages import HumanMessage
config={"configurable":{"thread_id":"test-approve"}}

#step 1: request
result=agent.invoke(
    {"messages":[HumanMessage(content="Send email to anjali@gmail.com with subject 'Hello' and body 'How are you?'")]},
    config=config
)

In [11]:
result

{'messages': [HumanMessage(content="Send email to anjali@gmail.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='b17b8fda-35a5-4b8f-9a71-ef80f428af57'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 98, 'total_tokens': 127, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_3fef075e19', 'id': 'chatcmpl-D8hZUssEzZln1BlMbS04TzDZ07Msl', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55d4-065c-7e13-bb85-0ba314616530-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'anjali@gmail.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_wJ0I8i22CI

In [12]:
#Step 2: Approve
from langgraph.types import Command
if "__interrupt__" in result:
    print(" Paused! Approving...")

    result =agent.invoke(
        Command(
            resume={
                "decisions":[
                    {"type":"approve"}
                ]
            }
        ),
        config=config
    )

    print(f"Result : {result['messages'][-1].content}")

 Paused! Approving...
Result : The email has been sent to anjali@gmail.com with the subject "Hello".


In [14]:
result

{'messages': [HumanMessage(content="Send email to anjali@gmail.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='b17b8fda-35a5-4b8f-9a71-ef80f428af57'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 98, 'total_tokens': 127, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_3fef075e19', 'id': 'chatcmpl-D8hZUssEzZln1BlMbS04TzDZ07Msl', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55d4-065c-7e13-bb85-0ba314616530-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'anjali@gmail.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_wJ0I8i22CI

In [16]:
#reject 
from langchain_core.messages import HumanMessage
config={"configurable":{"thread_id":"test-reject"}}

#step 1: request
result=agent.invoke(
    {"messages":[HumanMessage(content="Send email to anjali@gmail.com with subject 'Hello' and body 'How are you?'")]},
    config=config
)

In [17]:
#Step 2: Approve
from langgraph.types import Command
if "__interrupt__" in result:
    print(" Paused! Approving...")

    result =agent.invoke(
        Command(
            resume={
                "decisions":[
                    {"type":"reject"}
                ]
            }
        ),
        config=config
    )

    print(f"Result : {result['messages'][-1].content}")

 Paused! Approving...
Result : It seems there was an issue with sending the email. Please let me know if there's anything else you would like to do.


In [18]:
result

{'messages': [HumanMessage(content="Send email to anjali@gmail.com with subject 'Hello' and body 'How are you?'", additional_kwargs={}, response_metadata={}, id='91255d5d-0374-459c-85b3-2261e008df06'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 29, 'prompt_tokens': 98, 'total_tokens': 127, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ad98c18a04', 'id': 'chatcmpl-D8hclsQt60c5AkKGcqAgNLpaP2b6P', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55d7-1ce8-7093-86c4-24bc1d63ccda-0', tool_calls=[{'name': 'send_email_tool', 'args': {'recipient': 'anjali@gmail.com', 'subject': 'Hello', 'body': 'How are you?'}, 'id': 'call_ID576onwvu

In [27]:
agent=create_agent(
    model="gpt-4o",
    tools=[read_email_tool,send_email_tool],
    checkpointer=InMemorySaver(),
    middleware=[
        HumanInTheLoopMiddleware(
            interrupt_on={
                "send_email_tool":{
                    "allowed_decisions":["approve","edit","reject"]
                },
                "read_email_tool":False,
            }
        )
    ]
)

In [28]:
# Editing
from langchain_core.messages import HumanMessage
config={"configurable":{"thread_id":"test-edit"}}

#step 1: request
result=agent.invoke(
    {"messages":[HumanMessage(content="Send email to anjali@gmail.com with subject 'Test' and body 'Hello?'")]},
    config=config
)

In [30]:
#Step 2: Edit
from langgraph.types import Command
if "__interrupt__" in result:
    print(" Paused! Approving...")

    result =agent.invoke(
        Command(
            resume={
                "decisions":[
                   {
                       "type": "edit",
                       "edited_action": {
                        "name": "send_email_tool",
                        "args":{
                            "recipient":"correct@email.com",
                            "subject":"Corrected Subject",
                            "body": "This was edited by human begore sending"
                        }

                    }
                   }
                ]
            }
        ),
        config=config
    )


In [31]:
result

{'messages': [HumanMessage(content="Send email to anjali@gmail.com with subject 'Test' and body 'Hello?'", additional_kwargs={}, response_metadata={}, id='573178ee-1e09-4363-973a-4c344ea1a0d3'),
  AIMessage(content='', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 27, 'prompt_tokens': 96, 'total_tokens': 123, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 0, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_provider': 'openai', 'model_name': 'gpt-4o-2024-08-06', 'system_fingerprint': 'fp_ad98c18a04', 'id': 'chatcmpl-D8ho2mzGxRvNkiG4zwfXngy5cB0nO', 'service_tier': 'default', 'finish_reason': 'tool_calls', 'logprobs': None}, id='lc_run--019c55e1-c9b1-7b12-94d7-b2f5b5d24bb8-0', tool_calls=[{'type': 'tool_call', 'name': 'send_email_tool', 'args': {'recipient': 'correct@email.com', 'subject': 'Corrected Subject', 'body': 'This was ed