# Guardrail Agent?

In [1]:
# Import required libraries and set up API key
from pydantic import BaseModel
from agents import (
    Agent,
    Runner,
    RunContextWrapper,
    TResponseInputItem,
    GuardrailFunctionOutput,
    InputGuardrailTripwireTriggered,
    OutputGuardrailTripwireTriggered,
    input_guardrail,
    output_guardrail,
)
import os
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY", "YOUR_KEY_HERE")  # Set your OpenAI API key

In [2]:
# Define Pydantic models for guardrail checkers' outputs
class MathHomeworkCheck(BaseModel):
    is_math_homework: bool
    reasoning: str

class MathContentCheck(BaseModel):
    is_math: bool
    reasoning: str

# Define lightweight classifier agents for input and output guardrails
input_checker = Agent(
    name="Input Guardrail Checker",
    instructions=(
        "Decide if the USER input is asking for math/homework help. "
        "Set is_math_homework=True if it looks like a math problem or homework request."
    ),
    output_type=MathHomeworkCheck,
)

output_checker = Agent(
    name="Output Guardrail Checker",
    instructions=(
        "Given the ASSISTANT's final message, decide if it includes math content. "
        "Set is_math=True if it includes equations, calculations, or step-by-step math."
    ),
    output_type=MathContentCheck,
)

In [7]:
# Implement input guardrail to block math/homework requests
@input_guardrail
async def block_math_homework(
    ctx: RunContextWrapper[None],
    agent: Agent,
    user_input: str | list[TResponseInputItem],
) -> GuardrailFunctionOutput:
    result = await Runner.run(input_checker, user_input, context=ctx.context)
    return GuardrailFunctionOutput(
        output_info=result.final_output,
        tripwire_triggered=bool(result.final_output.is_math_homework),
    )


In [8]:
# Implement output guardrail to block math content in responses
@output_guardrail
async def block_mathy_outputs(
    ctx: RunContextWrapper[None],
    agent: Agent,
    output_message: BaseModel,
) -> GuardrailFunctionOutput:
    result = await Runner.run(output_checker, getattr(output_message, "response", ""), context=ctx.context)
    # Remove polite_decline_response, which is not a valid parameter
    return GuardrailFunctionOutput(
        output_info=result.final_output,
        tripwire_triggered=bool(result.final_output.is_math),
    )

In [9]:
# Define the main support agent with input and output guardrails
class SupportMessage(BaseModel):
    response: str

support_agent = Agent(
    name="SupportAgent",
    instructions=(
        "You are a friendly customer support agent for Acme. "
        "Answer questions concisely. If it’s about products or shipping, help normally."
    ),
    output_type=SupportMessage,
    input_guardrails=[block_math_homework],
    output_guardrails=[block_mathy_outputs],
)


In [10]:
# Run demo scenarios to show guardrails in action
import nest_asyncio
nest_asyncio.apply()


# Run a normal support question (should pass)
print("A) Normal support question (should PASS):")
ok = await Runner.run(support_agent, "What’s your return policy for shoes?")
print("→", ok.final_output.response, "\n")

A) Normal support question (should PASS):
→ **Acme Return Policy for Shoes:**

- **Time Frame:** Shoes can be returned within 30 days of purchase.
  
- **Condition:** Must be unworn, in their original packaging, and with tags attached.
  
- **Process:** Use the prepaid return label provided in your shipment. If you don’t have one, you can generate a new label through your online account.
  
- **Refund:** Once we receive the shoes, we’ll process your refund to the original payment method within 7-10 business days.

If you need further assistance, feel free to ask! 



In [11]:
# Run a math/homework input (should trip input guardrail)
print("B) Homework-style input (should TRIP input guardrail):")
try:
    await Runner.run(support_agent, "Solve for x: 2x + 3 = 11.")
except InputGuardrailTripwireTriggered as e:
    print("→ Input guardrail tripped:", e)

B) Homework-style input (should TRIP input guardrail):
→ Input guardrail tripped: Guardrail InputGuardrail triggered tripwire


In [13]:
# Run a prompt that causes mathy output (should trip output guardrail)
print("\nC) Prompt that causes mathy output (should TRIP output guardrail):")
try:
    await Runner.run(support_agent, "Can you calculate 17 * 23 and explain?")
except InputGuardrailTripwireTriggered as e:
    print("→ Output guardrail tripped:", e)


C) Prompt that causes mathy output (should TRIP output guardrail):
→ Output guardrail tripped: Guardrail InputGuardrail triggered tripwire
