# Baseten agent workshop

Welcome to the Baseten Agent Workshop!

To get started:

1. Create a [Baseten account](https://app.baseten.co/signup/)
2. Add [DeepSeek V3](https://app.baseten.co/model-apis/create) to your workspace
3. Create an [API key](https://app.baseten.co/settings/api_keys) and paste it below

Let's build an agent!


In [None]:
# ✅ Baseten model config

API_KEY = "BASETEN-API-KEY-HERE" # Paste your API key in this string
BASE_URL = "https://inference.baseten.co/v1"
MODEL_NAME = "deepseek-ai/DeepSeek-V3"

In [None]:
# ✅ Install dependencies

!pip install openai-agents
!pip install openai
!pip install aiohttp

In [None]:
# Create a custom model provider

import asyncio
import random
import uuid

from pydantic import BaseModel

from agents import (
    Agent,
    HandoffOutputItem,
    ItemHelpers,
    MessageOutputItem,
    RunContextWrapper,
    Runner,
    ToolCallItem,
    ToolCallOutputItem,
    TResponseInputItem,
    function_tool,
    handoff,
    trace,
    set_tracing_disabled,
    RunConfig,
    Model,
    ModelProvider,
    OpenAIChatCompletionsModel,
)
from agents.extensions.handoff_prompt import RECOMMENDED_PROMPT_PREFIX
from openai import AsyncOpenAI

# ✅ Disable Datadog tracing (not needed in Colab)
set_tracing_disabled(True)

# ✅ Custom model provider using OpenAI-compatible interface
client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)

class CustomModelProvider(ModelProvider):
    def get_model(self, model_name: str | None) -> Model:
        return OpenAIChatCompletionsModel(
            model=model_name or MODEL_NAME,
            openai_client=client
        )

CUSTOM_PROVIDER = CustomModelProvider()

In [None]:
# Define context and tools

# ✅ Context definition
class AirlineAgentContext(BaseModel):
    passenger_name: str | None = None
    confirmation_number: str | None = None
    seat_number: str | None = None
    flight_number: str | None = None

# ✅ Tools
@function_tool(name_override="faq_lookup_tool", description_override="Lookup frequently asked questions.")
async def faq_lookup_tool(question: str) -> str:
    if "bag" in question or "baggage" in question:
        return "You are allowed to bring one bag on the plane. It must be under 50 pounds and 22 inches x 14 inches x 9 inches."
    elif "seats" in question or "plane" in question:
        return "There are 120 seats on the plane. 22 business class, 98 economy. Exit rows are 4 and 16. Economy Plus is 5-8."
    elif "wifi" in question:
        return "We have free wifi on the plane, join Airline-Wifi"
    return "I'm sorry, I don't know the answer to that question."

@function_tool
async def update_seat(context: RunContextWrapper[AirlineAgentContext], confirmation_number: str, new_seat: str) -> str:
    context.context.confirmation_number = confirmation_number
    context.context.seat_number = new_seat
    assert context.context.flight_number is not None, "Flight number is required"
    return f"Updated seat to {new_seat} for confirmation number {confirmation_number}"

# ✅ Handoff logic
async def on_seat_booking_handoff(context: RunContextWrapper[AirlineAgentContext]) -> None:
    context.context.flight_number = f"FLT-{random.randint(100, 999)}"

In [None]:
# ✅ Agent definitions
faq_agent = Agent[AirlineAgentContext](
    name="FAQ Agent",
    handoff_description="Answers airline-related questions.",
    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
You are an FAQ agent. If speaking to a customer, you were likely transferred here.
# Routine
1. Identify the last question.
2. Use the faq lookup tool.
3. If unsure, hand back to triage agent.""",
    tools=[faq_lookup_tool],
)

seat_booking_agent = Agent[AirlineAgentContext](
    name="Seat Booking Agent",
    handoff_description="Can update a passenger's seat.",
    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
You are a seat booking agent.
# Routine
1. Ask for confirmation number.
2. Ask for desired seat.
3. Use the tool to update it.
If unsure, hand back to triage agent.""",
    tools=[update_seat],
)

triage_agent = Agent[AirlineAgentContext](
    name="Triage Agent",
    handoff_description="Routes questions to the correct department.",
    instructions=f"""{RECOMMENDED_PROMPT_PREFIX}
You are a triage agent. Route the user's query to the right specialized agent.""",
    handoffs=[
        faq_agent,
        handoff(agent=seat_booking_agent, on_handoff=on_seat_booking_handoff),
    ],
)

faq_agent.handoffs.append(triage_agent)
seat_booking_agent.handoffs.append(triage_agent)

In [None]:
# ✅ Run the assistant loop
async def main():
    current_agent: Agent[AirlineAgentContext] = triage_agent
    input_items: list[TResponseInputItem] = []
    context = AirlineAgentContext()
    conversation_id = uuid.uuid4().hex[:16]

    while True:
        user_input = input("You: ")
        with trace("Customer service", group_id=conversation_id):
            input_items.append({"content": user_input, "role": "user"})
            result = await Runner.run(
                current_agent,
                input_items,
                context=context,
                run_config=RunConfig(model_provider=CUSTOM_PROVIDER)
            )

            for new_item in result.new_items:
                agent_name = new_item.agent.name
                if isinstance(new_item, MessageOutputItem):
                    print(f"{agent_name}: {ItemHelpers.text_message_output(new_item)}")
                elif isinstance(new_item, HandoffOutputItem):
                    print(f"➡️ Handoff from {new_item.source_agent.name} to {new_item.target_agent.name}")
                elif isinstance(new_item, ToolCallItem):
                    print(f"{agent_name}: (Calling a tool...)")
                elif isinstance(new_item, ToolCallOutputItem):
                    print(f"{agent_name}: Tool output → {new_item.output}")
                else:
                    print(f"{agent_name}: (Unhandled output type)")
            input_items = result.to_input_list()
            current_agent = result.last_agent

# ✅ Run it (you’ll get a text prompt box below the cell)
await main()

In [None]:
# Benchmark 1: Performance test for a single request

import asyncio
import time
from openai import AsyncOpenAI

# ✅ CONFIGURE YOUR ENDPOINT
NUM_REQUESTS = 5

client = AsyncOpenAI(base_url=BASE_URL, api_key=API_KEY)

# ✅ Benchmark 1: Single request - measure tokens per second
async def benchmark_single_request(prompt="Write a haiku about fireflies."):
    print("🔍 Benchmarking token throughput for a single request...")

    messages = [{"role": "user", "content": prompt}]
    start_time = time.perf_counter()
    first_token_time = None
    token_count = 0

    async for chunk in await client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
        stream=True
    ):
        if len(chunk.choices) > 0:
          content = chunk.choices[0].delta.content or ""
          if content.strip():
              if first_token_time is None:
                  first_token_time = time.perf_counter()
              token_count += 1

    end_time = time.perf_counter()
    total_time = end_time - start_time

    ttfb = first_token_time - start_time if first_token_time else float('nan')
    tps = token_count / (end_time - first_token_time) if first_token_time else 0

    print(f"📥 Time to first token: {ttfb:.2f}s")
    print(f"📦 Total tokens: {token_count}")
    print(f"⚡️ Tokens/sec: {tps:.2f}")
    print(f"⏱ Total time: {total_time:.2f}s")

await benchmark_single_request()

In [None]:
# Benchmark 2: 5 concurrent requests
async def send_and_measure(index, prompt="Write a haiku about mountains."):
    messages = [{"role": "user", "content": prompt}]
    start_time = time.perf_counter()
    first_token_time = None

    async for chunk in await client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
        stream=True
    ):
        if len(chunk.choices) > 0:
          content = chunk.choices[0].delta.content or ""
          if content.strip() and first_token_time is None:
              first_token_time = time.perf_counter()
              break  # Stop after first token

    ttfb = first_token_time - start_time if first_token_time else float('nan')
    print(f"Request {index+1}: ⏱ Time to first token = {ttfb:.2f}s")
    return ttfb

async def benchmark_concurrent_requests():
    print(f"\n🚀 Sending {NUM_REQUESTS} concurrent streaming requests...")
    tasks = [send_and_measure(i) for i in range(NUM_REQUESTS)]
    ttfs = await asyncio.gather(*tasks)
    avg_ttfb = sum(ttfs) / len(ttfs)
    print(f"\n📊 Average time to first token: {avg_ttfb:.2f}s")

# ✅ Call it
await benchmark_concurrent_requests()