braintrustdata · Abhijeet Prasad (AbhiPrasad) · Apr 8, 2026 · Apr 7, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/internal/golden/README.md b/internal/golden/README.md
@@ -24,12 +24,6 @@ cd langchain-py-v1
 python langchain.py
 ```
 
-```bash
-cd pydantic-ai-v1
-python pydantic_ai_test.py
-```
-
-
 ## Requirements
 
 Before running a suite, ensure you have the appropriate API keys set as environment variables for that provider, along with `BRAINTRUST_API_KEY` if you want to log traces to Braintrust.

diff --git a/internal/golden/pydantic-ai-v1/pydantic_ai_test.py b/internal/golden/pydantic-ai-v1/pydantic_ai_test.py
diff --git a/internal/golden/pydantic-ai-v1/pyproject.toml b/internal/golden/pydantic-ai-v1/pyproject.toml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_error_handling.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_error_handling.yaml
@@ -0,0 +1,30 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":[{"type":"image_url","image_url":{"url":"data:image/png;base64,SU5WQUxJRF9QTkdfREFUQV9OT1RfQV9SRUFMX0lNQUdF"}},{"type":"text","text":"What''s
+      in this image?"}]}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: '{"error":{"message":"Could not process image. The image data is not
+        valid.","type":"invalid_request_error","param":null,"code":"invalid_image"}}'
+    headers:
+      Content-Type:
+      - application/json
+    status:
+      code: 400
+      message: Bad Request
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_long_context.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_long_context.yaml
@@ -0,0 +1,31 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Here is a long text:\n\nThe quick
+      brown fox jumps over the lazy dog. <...repeated 20 times...>\n\nHow many times
+      does the word ''fox'' appear?"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-long001","object":"chat.completion","created":1735000000,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_test","choices":[{"index":0,"message":{"role":"assistant","content":"The
+        word ''fox'' appears 20 times in the text.","refusal":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":120,"completion_tokens":14,"total_tokens":134,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}'
+    headers:
+      Content-Type:
+      - application/json
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_prefill.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_prefill.yaml
@@ -0,0 +1,30 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Write a haiku about coding."},{"role":"assistant","content":"Here
+      is a haiku:"},{"role":"user","content":"Write a haiku about coding."}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-pre001","object":"chat.completion","created":1735000000,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_test","choices":[{"index":0,"message":{"role":"assistant","content":"Whitespace
+        tells a tale\nFunctions dance in loops and turns\nClean code brings freedom","refusal":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":35,"completion_tokens":16,"total_tokens":51,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}'
+    headers:
+      Content-Type:
+      - application/json
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_short_max_tokens.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_short_max_tokens.yaml
@@ -0,0 +1,29 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"What is AI?"}],"model":"gpt-4o-mini","max_completion_tokens":5,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-short001","object":"chat.completion","created":1735000000,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_test","choices":[{"index":0,"message":{"role":"assistant","content":"AI
+        stands for Artificial","refusal":null},"logprobs":null,"finish_reason":"length"}],"usage":{"prompt_tokens":11,"completion_tokens":5,"total_tokens":16,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}'
+    headers:
+      Content-Type:
+      - application/json
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_stop_sequences.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_stop_sequences.yaml
@@ -0,0 +1,30 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Write a short story about a robot."}],"model":"gpt-4o-mini","max_completion_tokens":500,"stop":["END","\n\n"],"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - python-httpx/0.28.1
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: '{"id":"chatcmpl-stop001","object":"chat.completion","created":1735000000,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_test","choices":[{"index":0,"message":{"role":"assistant","content":"Once
+        upon a time, there was a small robot named Bolt who loved exploring the world.
+        Every day, Bolt would venture out to discover new things.","refusal":null},"logprobs":null,"finish_reason":"stop"}],"usage":{"prompt_tokens":16,"completion_tokens":31,"total_tokens":47,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}}}'
+    headers:
+      Content-Type:
+      - application/json
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
@@ -1573,8 +1573,7 @@ def check_item(item):
 async def test_agent_with_document_input(memory_logger):
     """Test that agents with document input (PDF) properly serialize attachments.
 
-    This specifically tests the scenario from test_document_input in the golden tests,
-    verifying that both agent_run and chat spans convert BinaryContent to Braintrust
+    Verifies that both agent_run and chat spans convert BinaryContent to Braintrust
     attachments for document files like PDFs.
     """
     from braintrust.logger import Attachment
@@ -2777,4 +2776,189 @@ def wrapped():
         wrapper(wrapped, instance, (), {})
 
     assert instance.call_count == 1
-    assert instance._async_producer == original_async_producer
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agent_with_stop_sequences(memory_logger):
+    """Test Agent respects stop_sequences in model settings."""
+    assert not memory_logger.pop()
+
+    agent = Agent(
+        MODEL,
+        model_settings=ModelSettings(max_tokens=500, stop_sequences=["END", "\n\n"]),
+    )
+
+    start = time.time()
+    result = await agent.run("Write a short story about a robot.")
+    end = time.time()
+
+    assert result.output
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2
+
+    agent_span = next(
+        (
+            s
+            for s in spans
+            if "agent_run" in s["span_attributes"]["name"] and "chat" not in s["span_attributes"]["name"]
+        ),
+        None,
+    )
+    assert agent_span is not None, "agent_run span not found"
+    assert agent_span["metadata"]["model"] == "gpt-4o-mini"
+
+    # stop_sequences on the agent constructor → in metadata.model_settings
+    assert "model_settings" in agent_span["metadata"]
+    settings = agent_span["metadata"]["model_settings"]
+    assert settings.get("stop_sequences") == ["END", "\n\n"]
+
+    _assert_metrics_are_valid(agent_span["metrics"], start, end)
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agent_with_prefill(memory_logger):
+    """Test Agent with a partial assistant response in message_history.
+
+    The 'prefill' pattern puts an incomplete assistant message at the end of the
+    history so the model continues from that point.
+    """
+    from pydantic_ai.messages import ModelResponse, TextPart
+
+    assert not memory_logger.pop()
+
+    agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=200))
+
+    prefill_history = [
+        ModelRequest(parts=[UserPromptPart(content="Write a haiku about coding.")]),
+        ModelResponse(parts=[TextPart(content="Here is a haiku:")]),
+    ]
+
+    start = time.time()
+    result = await agent.run("Write a haiku about coding.", message_history=prefill_history)
+    end = time.time()
+
+    assert result.output
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2
+
+    agent_span = next(
+        (
+            s
+            for s in spans
+            if "agent_run" in s["span_attributes"]["name"] and "chat" not in s["span_attributes"]["name"]
+        ),
+        None,
+    )
+    assert agent_span is not None, "agent_run span not found"
+    assert agent_span["metadata"]["model"] == "gpt-4o-mini"
+
+    # The prefill history (including the partial assistant TextPart) must appear
+    # in the span input so that the trace is complete and auditable.
+    assert "message_history" in str(agent_span["input"])
+    assert "Here is a haiku" in str(agent_span["input"])
+    assert agent_span["output"]
+
+    _assert_metrics_are_valid(agent_span["metrics"], start, end)
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agent_with_short_max_tokens(memory_logger):
+    """Test Agent with a very small max_tokens that truncates the response."""
+    assert not memory_logger.pop()
+
+    agent = Agent(MODEL)
+
+    start = time.time()
+    result = await agent.run("What is AI?", model_settings=ModelSettings(max_tokens=5))
+    end = time.time()
+
+    # Truncated responses are still valid output; no exception should be raised.
+    assert result.output
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2
+
+    agent_span = next(
+        (
+            s
+            for s in spans
+            if "agent_run" in s["span_attributes"]["name"] and "chat" not in s["span_attributes"]["name"]
+        ),
+        None,
+    )
+    assert agent_span is not None, "agent_run span not found"
+    assert agent_span["metadata"]["model"] == "gpt-4o-mini"
+
+    # max_tokens passed to run() → in input.model_settings
+    assert "model_settings" in agent_span["input"]
+    assert agent_span["input"]["model_settings"].get("max_tokens") == 5
+
+    assert agent_span["output"]
+    _assert_metrics_are_valid(agent_span["metrics"], start, end)
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agent_with_long_context(memory_logger):
+    """Test Agent handles large input context without errors."""
+    assert not memory_logger.pop()
+
+    agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=100))
+
+    long_text = "The quick brown fox jumps over the lazy dog. " * 20
+    prompt = f"Here is a long text:\n\n{long_text}\n\nHow many times does the word 'fox' appear?"
+
+    start = time.time()
+    result = await agent.run(prompt)
+    end = time.time()
+
+    assert result.output
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2
+
+    agent_span = next(
+        (
+            s
+            for s in spans
+            if "agent_run" in s["span_attributes"]["name"] and "chat" not in s["span_attributes"]["name"]
+        ),
+        None,
+    )
+    assert agent_span is not None, "agent_run span not found"
+    assert agent_span["metadata"]["model"] == "gpt-4o-mini"
+    # The long prompt should be captured in the span input
+    assert "fox" in str(agent_span["input"]).lower()
+    assert agent_span["output"]
+
+    _assert_metrics_are_valid(agent_span["metrics"], start, end)
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agent_with_error_handling(memory_logger):
+    """Test Agent propagates API errors for invalid binary content.
+
+    Sends corrupted image bytes; the API returns a 400 error.  The exception must
+    propagate to the caller rather than being silently swallowed.
+    """
+    from pydantic_ai.models.function import BinaryContent
+
+    assert not memory_logger.pop()
+
+    agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=100))
+
+    corrupted_data = b"INVALID_PNG_DATA_NOT_A_REAL_IMAGE"
+
+    with pytest.raises(Exception):
+        await agent.run(
+            [
+                BinaryContent(data=corrupted_data, media_type="image/png"),
+                "What's in this image?",
+            ]
+        )