In [None]:
import sys
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent)) 
from tool_monkey import MonkeyObserver, with_monkey, burst_rate_limit, progressive_rate_limit, create_tool_with_monkey, logger, setup_default_logging

In [2]:
setup_default_logging(level=10)

# Rate Limiting Examples

This notebook demonstrates how agents behave when they hit API rate limits - one of the most common production failures for LLM agents.

Rate limits come in different flavors:
- **Burst limits:** Short-term spike protection (e.g., max 5 requests/second)
- **Quota limits:** Long-term usage caps (e.g., 100 requests/hour, then exhausted)

Both are simulated here using AI image generation as a realistic use case.

## Example 1: Burst Rate Limit

**Use Case:** Marketing agent generates multiple images rapidly. Hits burst rate limit (simulates DALL-E/Midjourney free tier limits).

**What Happens:**
1. User asks agent to generate 6 marketing images for a coffee shop
2. Agent generates creative prompts and calls `image_gen_tool()` repeatedly
3. **Images 1-2:** Generated successfully ✅
4. **Image 3:** Rate limit hit! ❌ (burst limit: max 3 requests/5 seconds)
5. Agent sees error message in tool response
6. **Images 4-7:** Continue generating ✅ (rate limit was temporary)
7. Agent completes task with 6 images

**What This Shows:**
- Burst rate limits are **temporary** - only affect a single request
- Agent can continue after the failure (not blocked permanently)
- Real pattern: Free tier AI image APIs (DALL-E, Midjourney) have strict burst limits
- No retry logic in this example - agent just handles the error and moves on

**Scenario Configuration:**
- `burst_rate_limit(on_call=3, retry_after=5.0)` - 3rd call fails
- Simulates: "Wait 5 seconds before retrying"


In [None]:
def bind_tools_burst_example():
    from langchain_examples.shared.llm import llm
    from langchain_examples.shared.tools import base_image_gen_tool, ImageGenInput
    from langchain_core.tools import tool 
    observer=MonkeyObserver()
    scenario=burst_rate_limit()
    image_gen_tool=create_tool_with_monkey(base_tool=base_image_gen_tool, scenario=scenario, observer=observer, tool_name="generate_image", args_schema=ImageGenInput)
    system_prompt = """You are a creative marketing assistant with access to an AI image generation tool. 
  When asked to create images, you should call the generate_image tool multiple times to generate each image."""

    user_prompt = """Generate 6 unique marketing images for a new artisan coffee shop. 
  Come up with creative prompts and generate each image using the photorealistic style."""
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
    llm_with_tools=llm.bind_tools([image_gen_tool])
    max_iterations=10
    for i in range(max_iterations):
        ai_msg=llm_with_tools.invoke(messages)
        messages.append(ai_msg)
        if not ai_msg.tool_calls:
            print(f"Agent loop finished: {ai_msg.content}")
            break
        for tool_call in ai_msg.tool_calls:
            try:
                result = image_gen_tool.invoke(tool_call)
                messages.append(result)
                print(f"Generated image {i+1}")
            except Exception as e:
                print(f"Rate limited: {e}")
                # Add error to messages so LLM sees it
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call["id"],
                    "content": f"Error: {e}"
                })
    print("\n" + "=" * 50)
    print("OBSERVER METRICS:")
    print("=" * 50)
    print(observer.summary())

bind_tools_burst_example()

Ending call for base_image_gen_tool on success
Generated image 1
Ending call for base_image_gen_tool on success
Generated image 2
Ending call for base_image_gen_tool after exception
Rate limited: ʕ•͡-•ʔ Tool Monkey unleashed! ʕ•͡-•ʔ: Rate limit exceeded (burst). Retry after 5.0 seconds.
Ending call for base_image_gen_tool on success
Generated image 4
Ending call for base_image_gen_tool on success
Generated image 5
Ending call for base_image_gen_tool on success
Generated image 6
Ending call for base_image_gen_tool on success
Generated image 7
Agent loop finished: Here are the 6 unique marketing images for the new artisan coffee shop:

1. [A cozy artisan coffee shop with wooden furniture and warm lighting](https://fake-cdn.com/images/7942009920325075913.png)
2. [A steaming cup of artisan coffee with a latte art heart](https://fake-cdn.com/images/-2982640019527402376.png)
3. [A rustic chalkboard menu hanging on a brick wall in a coffee shop](https://fake-cdn.com/images/3697382546964471184

## Example 2: Progressive Rate Limit (Quota Exhaustion)

**Use Case:** Marketing agent generates images until quota is exhausted. Cannot generate more until quota resets.

**What Happens:**
1. User asks agent to generate 6 marketing images
2. Agent generates prompts and starts calling `image_gen_tool()`
3. **Images 1-5:** Generated successfully ✅ ✅ ✅ ✅ ✅ (quota allows 5 images)
4. **Image 6:** Rate limit hit! ❌ (quota exhausted)
5. Agent sees error: "Rate limit exceeded, retry after 60 seconds"
6. Agent cannot complete the task (quota would need to reset)

**What This Shows:**
- Progressive rate limits are **persistent** - quota is exhausted, not just a spike
- Agent gets blocked until quota resets (e.g., hourly/daily limit)
- Real pattern: OpenAI API, Anthropic API have monthly/hourly quotas
- Different from burst limits - this blocks ALL subsequent calls

**Scenario Configuration:**
- `progressive_rate_limit(quota=5, retry_after=60.0)` - calls 1-5 succeed, 6+ fail
- Simulates: "You've used 5/5 free images this hour, wait 60 minutes"


In [None]:
def bind_tools_progressive_example():
    from langchain_examples.shared.llm import llm
    from langchain_examples.shared.tools import base_image_gen_tool 
    from langchain_core.tools import tool 
    observer=MonkeyObserver()
    scenario=progressive_rate_limit()
    wrapped_tool=with_monkey(scenario, observer)(base_image_gen_tool)
    @tool
    def image_gen_tool(prompt:str, style:str):
        """
        Make a call to DALL-E API to generate an image based on the given prompt and style.
        Args:
            prompt (str): The text prompt describing the desired image.
            style (str): The artistic style to apply to the generated image.
        """
        return wrapped_tool(prompt=prompt, style=style)
    system_prompt = """You are a creative marketing assistant with access to an AI image generation tool. 
  When asked to create images, you should call the generate_image tool multiple times to generate each image."""

    user_prompt = """Generate 6 unique marketing images for a new artisan coffee shop. 
  Come up with creative prompts and generate each image using the photorealistic style."""
    messages=[
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": user_prompt},
    ]
    llm_with_tools=llm.bind_tools([image_gen_tool])
    max_iterations=10
    for i in range(max_iterations):
        ai_msg=llm_with_tools.invoke(messages)
        messages.append(ai_msg)
        if not ai_msg.tool_calls:
            print(f"Agent loop finished: {ai_msg.content}")
            break
        for tool_call in ai_msg.tool_calls:
            try:
                result = image_gen_tool.invoke(tool_call)
                messages.append(result)
                print(f"Generated image {i+1}")
            except Exception as e:
                print(f"Rate limited: {e}")
                # Add error to messages so LLM sees it
                messages.append({
                    "role": "tool",
                    "tool_call_id": tool_call["id"],
                    "content": f"Error: {e}"
                })
    print("\n" + "=" * 50)
    print("OBSERVER METRICS:")
    print("=" * 50)
    print(observer.summary())

bind_tools_progressive_example()