diff --git a/sentry_sdk/integrations/openai_agents/patches/agent_run.py b/sentry_sdk/integrations/openai_agents/patches/agent_run.py index 6e7f0f2820..71883b2eef 100644 --- a/sentry_sdk/integrations/openai_agents/patches/agent_run.py +++ b/sentry_sdk/integrations/openai_agents/patches/agent_run.py @@ -3,6 +3,7 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import capture_internal_exceptions, reraise from ..spans import ( @@ -12,7 +13,7 @@ ) if TYPE_CHECKING: - from typing import Any, Awaitable, Callable, Optional + from typing import Any, Awaitable, Callable, Optional, Union from agents.run_internal.run_steps import SingleStepResult @@ -50,7 +51,7 @@ def _maybe_start_agent_span( should_run_agent_start_hooks: bool, span_kwargs: "dict[str, Any]", is_streaming: bool = False, -) -> "Optional[Span]": +) -> "Optional[Union[Span, StreamedSpan]]": """ Start an agent invocation span if conditions are met. Handles ending any existing span for a different agent. @@ -78,7 +79,12 @@ def _maybe_start_agent_span( context_wrapper._sentry_agent_span = span agent._sentry_agent_span = span - if is_streaming: + if not is_streaming: + return span + + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) + else: span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) return span @@ -108,7 +114,11 @@ async def _run_single_turn( context_wrapper, agent, should_run_agent_start_hooks, kwargs ) - if span is None or span.timestamp is not None: + if ( + span is None + or (isinstance(span, StreamedSpan) and span.end_timestamp is not None) + or (not isinstance(span, StreamedSpan) and span.timestamp is not None) + ): return await original_run_single_turn(*args, **kwargs) try: @@ -188,7 +198,11 @@ async def _run_single_turn_streamed( is_streaming=True, ) - if span is None or span.timestamp is not None: + if ( + span is None + or (isinstance(span, StreamedSpan) and span.end_timestamp is not None) + or (not isinstance(span, StreamedSpan) and span.timestamp is not None) + ): return await original_run_single_turn_streamed(*args, **kwargs) try: diff --git a/sentry_sdk/integrations/openai_agents/patches/models.py b/sentry_sdk/integrations/openai_agents/patches/models.py index 1f684a6d60..634c9fdca1 100644 --- a/sentry_sdk/integrations/openai_agents/patches/models.py +++ b/sentry_sdk/integrations/openai_agents/patches/models.py @@ -6,6 +6,7 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.tracing import BAGGAGE_HEADER_NAME from sentry_sdk.tracing_utils import ( add_sentry_baggage_to_headers, @@ -16,7 +17,7 @@ from ..spans import ai_client_span, update_ai_client_span if TYPE_CHECKING: - from typing import Any, Callable, Optional + from typing import Any, Callable, Optional, Union from sentry_sdk.tracing import Span @@ -34,11 +35,14 @@ def _set_response_model_on_agent_span( if response_model: agent_span = getattr(agent, "_sentry_agent_span", None) if agent_span: - agent_span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + if isinstance(agent_span, StreamedSpan): + agent_span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + else: + agent_span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) def _inject_trace_propagation_headers( - hosted_tool: "HostedMCPTool", span: "Span" + hosted_tool: "HostedMCPTool", span: "Union[Span, StreamedSpan]" ) -> None: headers = hosted_tool.tool_config.get("headers") if headers is None: @@ -151,7 +155,12 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any": for hosted_tool in hosted_tools: _inject_trace_propagation_headers(hosted_tool, span=span) - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) + set_on_span = ( + span.set_attribute + if isinstance(span, StreamedSpan) + else span.set_data + ) + set_on_span(SPANDATA.GEN_AI_RESPONSE_STREAMING, True) streaming_response = None ttft_recorded = False @@ -162,9 +171,7 @@ async def wrapped_stream_response(*args: "Any", **kwargs: "Any") -> "Any": # Detect first content token (text delta event) if not ttft_recorded and hasattr(event, "delta"): ttft = time.perf_counter() - start_time - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft - ) + set_on_span(SPANDATA.GEN_AI_RESPONSE_TIME_TO_FIRST_TOKEN, ttft) ttft_recorded = True # Capture the full response from ResponseCompletedEvent diff --git a/sentry_sdk/integrations/openai_agents/patches/runner.py b/sentry_sdk/integrations/openai_agents/patches/runner.py index 6828ab4855..f5cc26060c 100644 --- a/sentry_sdk/integrations/openai_agents/patches/runner.py +++ b/sentry_sdk/integrations/openai_agents/patches/runner.py @@ -4,6 +4,7 @@ import sentry_sdk from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable +from sentry_sdk.traces import StreamedSpan from sentry_sdk.utils import capture_internal_exceptions, reraise from ..spans import agent_workflow_span, update_invoke_agent_span @@ -43,9 +44,15 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": conversation_id = kwargs.get("conversation_id") if conversation_id: agent._sentry_conversation_id = conversation_id - workflow_span.set_data( - SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id - ) + + if isinstance(workflow_span, StreamedSpan): + workflow_span.set_attribute( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) + else: + workflow_span.set_data( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) args = (agent, *args[1:]) try: @@ -61,9 +68,15 @@ async def wrapper(*args: "Any", **kwargs: "Any") -> "Any": context_wrapper, "_sentry_agent_span", None ) - if ( - invoke_agent_span is not None - and invoke_agent_span.timestamp is None + if invoke_agent_span is not None and ( + ( + isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.end_timestamp is None + ) + or ( + not isinstance(invoke_agent_span, StreamedSpan) + and invoke_agent_span.timestamp is None + ) ): update_invoke_agent_span( span=invoke_agent_span, @@ -135,7 +148,12 @@ def wrapper(*args: "Any", **kwargs: "Any") -> "Any": # Set conversation ID on workflow span early so it's captured even on errors if conversation_id: - workflow_span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) + if isinstance(workflow_span, StreamedSpan): + workflow_span.set_attribute( + SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id + ) + else: + workflow_span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) # Store span on agent for cleanup agent._sentry_workflow_span = workflow_span diff --git a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py index 7874ad2483..758f06db8d 100644 --- a/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py +++ b/sentry_sdk/integrations/openai_agents/spans/agent_workflow.py @@ -2,15 +2,28 @@ import sentry_sdk from sentry_sdk.ai.utils import get_start_span_function +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN if TYPE_CHECKING: + from typing import Union + import agents -def agent_workflow_span(agent: "agents.Agent") -> "sentry_sdk.tracing.Span": +def agent_workflow_span( + agent: "agents.Agent", +) -> "Union[sentry_sdk.tracing.Span, sentry_sdk.traces.StreamedSpan]": # Create a transaction or a span if an transaction is already active + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"{agent.name} workflow", attributes={"sentry.origin": SPAN_ORIGIN} + ) + + return span + span = get_start_span_function()( name=f"{agent.name} workflow", origin=SPAN_ORIGIN, diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index 564d325416..f4f02cb674 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -2,6 +2,8 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN from ..utils import ( @@ -12,14 +14,14 @@ ) if TYPE_CHECKING: - from typing import Any, Optional + from typing import Any, Optional, Union from agents import Agent def ai_client_span( agent: "Agent", get_response_kwargs: "dict[str, Any]" -) -> "sentry_sdk.tracing.Span": +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": # TODO-anton: implement other types of operations. Now "chat" is hardcoded. # Get model name from agent.model or fall back to request model (for when agent.model is None/default) model_name = None @@ -28,13 +30,24 @@ def ai_client_span( elif hasattr(agent, "_sentry_request_model"): model_name = agent._sentry_request_model - span = sentry_sdk.start_span( - op=OP.GEN_AI_CHAT, - name=f"chat {model_name}", - origin=SPAN_ORIGIN, - ) - # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"chat {model_name}", + attributes={ + "sentry.op": OP.GEN_AI_CHAT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "chat", + }, + ) + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_CHAT, + name=f"chat {model_name}", + origin=SPAN_ORIGIN, + ) + # TODO-anton: remove hardcoded stuff and replace something that also works for embedding and so on + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "chat") _set_agent_data(span, agent) _set_input_data(span, get_response_kwargs) @@ -43,7 +56,7 @@ def ai_client_span( def update_ai_client_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", response: "Any", response_model: "Optional[str]" = None, agent: "Optional[Agent]" = None, @@ -55,13 +68,17 @@ def update_ai_client_span( if hasattr(response, "output") and response.output: _set_output_data(span, response) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + if response_model is not None: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + set_on_span(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) elif hasattr(response, "model") and response.model: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, str(response.model)) + set_on_span(SPANDATA.GEN_AI_RESPONSE_MODEL, str(response.model)) # Set conversation ID from agent if available if agent: conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + set_on_span(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py index 6e690d59bb..fd3a430951 100644 --- a/sentry_sdk/integrations/openai_agents/spans/execute_tool.py +++ b/sentry_sdk/integrations/openai_agents/spans/execute_tool.py @@ -3,39 +3,58 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA, SPANSTATUS from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import SpanStatus, StreamedSpan +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN from ..utils import _set_agent_data if TYPE_CHECKING: - from typing import Any + from typing import Any, Union import agents def execute_tool_span( tool: "agents.Tool", *args: "Any", **kwargs: "Any" -) -> "sentry_sdk.tracing.Span": - span = sentry_sdk.start_span( - op=OP.GEN_AI_EXECUTE_TOOL, - name=f"execute_tool {tool.name}", - origin=SPAN_ORIGIN, - ) +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"execute_tool {tool.name}", + attributes={ + "sentry.op": OP.GEN_AI_EXECUTE_TOOL, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "execute_tool", + SPANDATA.GEN_AI_TOOL_NAME: tool.name, + SPANDATA.GEN_AI_TOOL_DESCRIPTION: tool.description, + }, + ) + + set_on_span = span.set_attribute + else: + span = sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + name=f"execute_tool {tool.name}", + origin=SPAN_ORIGIN, + ) + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "execute_tool") + span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name) + span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description) - span.set_data(SPANDATA.GEN_AI_TOOL_NAME, tool.name) - span.set_data(SPANDATA.GEN_AI_TOOL_DESCRIPTION, tool.description) + set_on_span = span.set_data if should_send_default_pii(): input = args[1] - span.set_data(SPANDATA.GEN_AI_TOOL_INPUT, input) + set_on_span(SPANDATA.GEN_AI_TOOL_INPUT, input) return span def update_execute_tool_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "agents.Agent", tool: "agents.Tool", result: "Any", @@ -45,12 +64,19 @@ def update_execute_tool_span( if isinstance(result, str) and result.startswith( "An error occurred while running the tool" ): - span.set_status(SPANSTATUS.INTERNAL_ERROR) + if isinstance(span, StreamedSpan): + span.status = SpanStatus.ERROR + else: + span.set_status(SPANSTATUS.INTERNAL_ERROR) + + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) if should_send_default_pii(): - span.set_data(SPANDATA.GEN_AI_TOOL_OUTPUT, result) + set_on_span(SPANDATA.GEN_AI_TOOL_OUTPUT, result) # Add conversation ID from agent conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + set_on_span(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/handoff.py b/sentry_sdk/integrations/openai_agents/spans/handoff.py index 979a4fb7df..ea91464afb 100644 --- a/sentry_sdk/integrations/openai_agents/spans/handoff.py +++ b/sentry_sdk/integrations/openai_agents/spans/handoff.py @@ -2,6 +2,7 @@ import sentry_sdk from sentry_sdk.consts import OP, SPANDATA +from sentry_sdk.tracing_utils import has_span_streaming_enabled from ..consts import SPAN_ORIGIN @@ -12,14 +13,29 @@ def handoff_span( context: "agents.RunContextWrapper", from_agent: "agents.Agent", to_agent_name: str ) -> None: - with sentry_sdk.start_span( - op=OP.GEN_AI_HANDOFF, - name=f"handoff from {from_agent.name} to {to_agent_name}", - origin=SPAN_ORIGIN, - ) as span: - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + with sentry_sdk.traces.start_span( + name=f"handoff from {from_agent.name} to {to_agent_name}", + attributes={ + "sentry.op": OP.GEN_AI_HANDOFF, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "handoff", + }, + ) as span: + # Add conversation ID from agent + conv_id = getattr(from_agent, "_sentry_conversation_id", None) + if conv_id: + span.set_attribute(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + else: + with sentry_sdk.start_span( + op=OP.GEN_AI_HANDOFF, + name=f"handoff from {from_agent.name} to {to_agent_name}", + origin=SPAN_ORIGIN, + ) as span: + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "handoff") - # Add conversation ID from agent - conv_id = getattr(from_agent, "_sentry_conversation_id", None) - if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + # Add conversation ID from agent + conv_id = getattr(from_agent, "_sentry_conversation_id", None) + if conv_id: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py index 6f7dda3982..c21145ac4a 100644 --- a/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py +++ b/sentry_sdk/integrations/openai_agents/spans/invoke_agent.py @@ -9,29 +9,45 @@ ) from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import ( + has_span_streaming_enabled, + should_truncate_gen_ai_input, +) from sentry_sdk.utils import safe_serialize from ..consts import SPAN_ORIGIN from ..utils import _set_agent_data, _set_usage_data if TYPE_CHECKING: - from typing import Any + from typing import Any, Union import agents def invoke_agent_span( context: "agents.RunContextWrapper", agent: "agents.Agent", kwargs: "dict[str, Any]" -) -> "sentry_sdk.tracing.Span": - start_span_function = get_start_span_function() - span = start_span_function( - op=OP.GEN_AI_INVOKE_AGENT, - name=f"invoke_agent {agent.name}", - origin=SPAN_ORIGIN, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") +) -> "Union[sentry_sdk.tracing.Span, StreamedSpan]": + span_streaming = has_span_streaming_enabled(sentry_sdk.get_client().options) + if span_streaming: + span = sentry_sdk.traces.start_span( + name=f"invoke_agent {agent.name}", + attributes={ + "sentry.op": OP.GEN_AI_INVOKE_AGENT, + "sentry.origin": SPAN_ORIGIN, + SPANDATA.GEN_AI_OPERATION_NAME: "invoke_agent", + }, + ) + else: + start_span_function = get_start_span_function() + span = start_span_function( + op=OP.GEN_AI_INVOKE_AGENT, + name=f"invoke_agent {agent.name}", + origin=SPAN_ORIGIN, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "invoke_agent") if should_send_default_pii(): messages = [] @@ -67,9 +83,9 @@ def invoke_agent_span( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -85,7 +101,7 @@ def invoke_agent_span( def update_invoke_agent_span( - span: "sentry_sdk.tracing.Span", + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", context: "agents.RunContextWrapper", agent: "agents.Agent", output: "Any" = None, @@ -100,4 +116,7 @@ def update_invoke_agent_span( # Add conversation ID from agent conv_id = getattr(agent, "_sentry_conversation_id", None) if conv_id: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) + else: + span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conv_id) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 78f0a90f65..224a5f66ba 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -17,10 +17,12 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import should_truncate_gen_ai_input from sentry_sdk.utils import event_from_exception, safe_serialize if TYPE_CHECKING: - from typing import Any + from typing import Any, Union from agents import TResponseInputItem, Usage @@ -42,17 +44,21 @@ def _capture_exception(exc: "Any") -> None: sentry_sdk.capture_event(event, hint=hint) -def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "agents.Agent") -> None: - span.set_data( +def _set_agent_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", agent: "agents.Agent" +) -> None: + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + + set_on_span( SPANDATA.GEN_AI_SYSTEM, "openai" ) # See footnote for https://opentelemetry.io/docs/specs/semconv/registry/attributes/gen-ai/#gen-ai-system for explanation why. - span.set_data(SPANDATA.GEN_AI_AGENT_NAME, agent.name) + set_on_span(SPANDATA.GEN_AI_AGENT_NAME, agent.name) if agent.model_settings.max_tokens: - span.set_data( - SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens - ) + set_on_span(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, agent.model_settings.max_tokens) # Get model name from agent.model or fall back to request model (for when agent.model is None/default) model_name = None @@ -62,51 +68,57 @@ def _set_agent_data(span: "sentry_sdk.tracing.Span", agent: "agents.Agent") -> N model_name = agent._sentry_request_model if model_name: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model_name) if agent.model_settings.presence_penalty: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, agent.model_settings.presence_penalty, ) if agent.model_settings.temperature: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_TEMPERATURE, agent.model_settings.temperature ) if agent.model_settings.top_p: - span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p) + set_on_span(SPANDATA.GEN_AI_REQUEST_TOP_P, agent.model_settings.top_p) if agent.model_settings.frequency_penalty: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, agent.model_settings.frequency_penalty, ) if len(agent.tools) > 0: - span.set_data( + set_on_span( SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize([vars(tool) for tool in agent.tools]), ) -def _set_usage_data(span: "sentry_sdk.tracing.Span", usage: "Usage") -> None: - span.set_data(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) - span.set_data( +def _set_usage_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", usage: "Usage" +) -> None: + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) + set_on_span(SPANDATA.GEN_AI_USAGE_INPUT_TOKENS, usage.input_tokens) + set_on_span( SPANDATA.GEN_AI_USAGE_INPUT_TOKENS_CACHED, usage.input_tokens_details.cached_tokens, ) - span.set_data(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) - span.set_data( + set_on_span(SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS, usage.output_tokens) + set_on_span( SPANDATA.GEN_AI_USAGE_OUTPUT_TOKENS_REASONING, usage.output_tokens_details.reasoning_tokens, ) - span.set_data(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) + set_on_span(SPANDATA.GEN_AI_USAGE_TOTAL_TOKENS, usage.total_tokens) def _set_input_data( - span: "sentry_sdk.tracing.Span", get_response_kwargs: "dict[str, Any]" + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", + get_response_kwargs: "dict[str, Any]", ) -> None: if not should_send_default_pii(): return @@ -131,10 +143,16 @@ def _set_input_data( instructions_text_parts += _transform_system_instructions(system_instructions) if len(instructions_text_parts) > 0: - span.set_data( - SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, - json.dumps(instructions_text_parts), - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps(instructions_text_parts), + ) + else: + span.set_data( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, + json.dumps(instructions_text_parts), + ) non_system_messages = [ message for message in messages if not _is_system_instruction(message) @@ -173,9 +191,9 @@ def _set_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -186,7 +204,9 @@ def _set_input_data( ) -def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: +def _set_output_data( + span: "Union[sentry_sdk.tracing.Span, StreamedSpan]", result: "Any" +) -> None: if not should_send_default_pii(): return @@ -207,9 +227,16 @@ def _set_output_data(span: "sentry_sdk.tracing.Span", result: "Any") -> None: output_messages["response"].append(output_message.dict()) if len(output_messages["tool"]) > 0: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, safe_serialize(output_messages["tool"]) - ) + if isinstance(span, StreamedSpan): + span.set_attribute( + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + safe_serialize(output_messages["tool"]), + ) + else: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + safe_serialize(output_messages["tool"]), + ) if len(output_messages["response"]) > 0: set_data_normalized( diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index 7ca0df5fa2..6deb2a888f 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -31,6 +31,7 @@ from sentry_sdk.integrations.logging import LoggingIntegration from sentry_sdk.integrations.openai_agents import OpenAIAgentsIntegration from sentry_sdk.integrations.openai_agents.utils import _set_input_data, safe_serialize +from sentry_sdk.integrations.stdlib import StdlibIntegration from sentry_sdk.utils import package_version, parse_version OPENAI_VERSION = package_version("openai") @@ -168,6 +169,7 @@ def test_agent_custom_model(): ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_agent_invocation_span_no_pii( @@ -178,6 +180,7 @@ async def test_agent_invocation_span_no_pii( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): client = AsyncOpenAI(api_key="test-key") model = OpenAIResponsesModel(model="gpt-4", openai_client=client) @@ -187,7 +190,71 @@ async def test_agent_invocation_span_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=False, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert spans[2]["name"] == "test_agent workflow" + assert spans[2]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) + assert "gen_ai.request.messages" not in invoke_agent_span["attributes"] + assert "gen_ai.response.text" not in invoke_agent_span["attributes"] + + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -305,6 +372,7 @@ async def test_agent_invocation_span_no_pii( assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -493,6 +561,7 @@ async def test_agent_invocation_span( expected_request_messages, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration creates spans for agent invocations. @@ -505,7 +574,78 @@ async def test_agent_invocation_span( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + ai_client_span, invoke_agent_span, workflow_span = spans + + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + + if expected_system_instructions is None: + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] + else: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize(expected_system_instructions) + + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) + + assert ( + invoke_agent_span["attributes"]["gen_ai.response.text"] + == "Hello, how can I help you?" + ) + + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -642,6 +782,7 @@ async def test_agent_invocation_span( assert ai_client_span["data"]["gen_ai.request.top_p"] == 1.0 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_client_span_custom_model( @@ -652,6 +793,7 @@ async def test_client_span_custom_model( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that the integration uses the correct model name if a custom model is used. @@ -665,7 +807,7 @@ async def test_client_span_custom_model( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -673,8 +815,12 @@ async def test_client_span_custom_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span") @@ -686,6 +832,7 @@ async def test_client_span_custom_model( assert result is not None assert result.final_output == "Hello, how can I help you?" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -721,6 +868,7 @@ async def test_client_span_custom_model( assert ai_client_span["data"]["gen_ai.request.model"] == "my-custom-model" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_agent_invocation_span_sync_no_pii( sentry_init, @@ -730,6 +878,7 @@ def test_agent_invocation_span_sync_no_pii( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, sync_event_loop, ): """ @@ -743,7 +892,69 @@ def test_agent_invocation_span_sync_no_pii( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=False, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = agents.Runner.run_sync( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[2]["name"] == "test_agent workflow" + assert spans[2]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ( + SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["attributes"] + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -855,6 +1066,7 @@ def test_agent_invocation_span_sync_no_pii( assert SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS not in invoke_agent_span["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "instructions,input,expected_system_instructions,expected_request_messages", @@ -1042,6 +1254,7 @@ def test_agent_invocation_span_sync( expected_request_messages, get_model_response, stream_gen_ai_spans, + span_streaming, sync_event_loop, ): """ @@ -1055,7 +1268,71 @@ def test_agent_invocation_span_sync( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = agents.Runner.run_sync( + agent, + input, + run_config=test_run_config, + ) + + assert result is not None + assert result.final_output == "Hello, how can I help you?" + + sentry_sdk.flush() + spans = [item.payload for item in items] + ai_client_span, invoke_agent_span, workflow_span = spans + + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + ) + assert invoke_agent_span["attributes"]["gen_ai.system"] == "openai" + assert invoke_agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert invoke_agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert invoke_agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert invoke_agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert invoke_agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span["attributes"]["gen_ai.request.top_p"] == 1.0 + + if expected_system_instructions is None: + assert "gen_ai.system_instructions" not in ai_client_span["attributes"] + else: + assert ai_client_span["attributes"][ + "gen_ai.system_instructions" + ] == safe_serialize(expected_system_instructions) + + assert ( + json.loads(ai_client_span["attributes"][SPANDATA.GEN_AI_REQUEST_MESSAGES]) + == expected_request_messages + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -1174,6 +1451,7 @@ def test_agent_invocation_span_sync( ] == safe_serialize(expected_system_instructions) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_handoff_span( @@ -1182,6 +1460,7 @@ async def test_handoff_span( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that handoff spans are created when agents hand off to other agents. @@ -1275,7 +1554,7 @@ async def test_handoff_span( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( primary_agent.model._client._client, "send", @@ -1283,8 +1562,12 @@ async def test_handoff_span( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("transaction", "span") @@ -1297,6 +1580,7 @@ async def test_handoff_span( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] handoff_span = next( span @@ -1344,6 +1628,7 @@ async def test_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_max_turns_before_handoff_span( @@ -1352,6 +1637,7 @@ async def test_max_turns_before_handoff_span( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Example raising agents.exceptions.AgentsException after the agent invocation span is complete. @@ -1445,7 +1731,7 @@ async def test_max_turns_before_handoff_span( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( primary_agent.model._client._client, "send", @@ -1453,8 +1739,12 @@ async def test_max_turns_before_handoff_span( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("transaction", "span") @@ -1467,6 +1757,7 @@ async def test_max_turns_before_handoff_span( max_turns=1, ) + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] handoff_span = next( span @@ -1514,6 +1805,7 @@ async def test_max_turns_before_handoff_span( assert handoff_span["data"]["gen_ai.operation.name"] == "handoff" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_span( @@ -1524,6 +1816,7 @@ async def test_tool_execution_span( get_model_response, responses_tool_call_model_responses, stream_gen_ai_spans, + span_streaming, ): """ Test tool execution span creation. @@ -1581,7 +1874,7 @@ def simple_test_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: with patch.object( agent_with_tool.model._client._client, "send", @@ -1589,12 +1882,14 @@ def simple_test_tool(message: str) -> str: ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, ) - items = capture_items("transaction", "span") + items = capture_items("span") await agents.Runner.run( agent_with_tool, @@ -1602,18 +1897,21 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = (item.payload for item in items if item.type == "transaction") - assert transaction["transaction"] == "test_agent workflow" - assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[4]["name"] == "test_agent workflow" + assert spans[4]["attributes"]["sentry.origin"] == "auto.ai.openai_agents" - spans = [item.payload for item in items if item.type == "span"] agent_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT ) ai_client_span1, ai_client_span2 = ( - span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + span + for span in spans + if span["attributes"].get("sentry.op") == OP.GEN_AI_CHAT ) tool_span = next( span @@ -1813,7 +2111,7 @@ def simple_test_tool(message: str) -> str: assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 - else: + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -1826,7 +2124,7 @@ def simple_test_tool(message: str) -> str: stream_gen_ai_spans=stream_gen_ai_spans, ) - events = capture_events() + items = capture_items("transaction", "span") await agents.Runner.run( agent_with_tool, @@ -1834,11 +2132,243 @@ def simple_test_tool(message: str) -> str: run_config=test_run_config, ) - (transaction,) = events - spans = transaction["spans"] + (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["transaction"] == "test_agent workflow" + assert transaction["contexts"]["trace"]["origin"] == "auto.ai.openai_agents" + + spans = [item.payload for item in items if item.type == "span"] agent_span = next( - span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT - ) + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span1, ai_client_span2 = ( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + tool_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_EXECUTE_TOOL + ) + + available_tool = { + "name": "simple_test_tool", + "description": "A simple tool", + "params_json_schema": { + "properties": {"message": {"title": "Message", "type": "string"}}, + "required": ["message"], + "title": "simple_test_tool_args", + "type": "object", + "additionalProperties": False, + }, + "on_invoke_tool": mock.ANY, + "strict_json_schema": True, + "is_enabled": True, + } + + if parse_version(OPENAI_AGENTS_VERSION) >= (0, 3, 3): + available_tool.update( + {"tool_input_guardrails": None, "tool_output_guardrails": None} + ) + + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 8, + ): + available_tool["needs_approval"] = False + if parse_version(OPENAI_AGENTS_VERSION) >= ( + 0, + 9, + 0, + ): + available_tool.update( + { + "timeout_seconds": None, + "timeout_behavior": "error_as_result", + "timeout_error_function": None, + } + ) + + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert agent_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert agent_span["attributes"]["gen_ai.operation.name"] == "invoke_agent" + + agent_span_available_tool = json.loads( + agent_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(agent_span_available_tool[k] == v for k, v in available_tool.items()) + + assert agent_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert agent_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert agent_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert agent_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert agent_span["attributes"]["gen_ai.system"] == "openai" + + assert ai_client_span1["name"] == "chat gpt-4" + assert ai_client_span1["attributes"]["gen_ai.operation.name"] == "chat" + assert ai_client_span1["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span1["attributes"]["gen_ai.agent.name"] == "test_agent" + + ai_client_span1_available_tool = json.loads( + ai_client_span1["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span1_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span1["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span1["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + ] + ) + assert ai_client_span1["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span1["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span1["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens"] == 10 + assert ai_client_span1["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span1["attributes"]["gen_ai.usage.output_tokens"] == 5 + assert ( + ai_client_span1["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span1["attributes"]["gen_ai.usage.total_tokens"] == 15 + + tool_call = { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + "status": None, + } + + if OPENAI_VERSION >= (2, 25, 0): + tool_call["namespace"] = None + + assert json.loads( + ai_client_span1["attributes"]["gen_ai.response.tool_calls"] + ) == [tool_call] + + assert tool_span["name"] == "execute_tool simple_test_tool" + assert tool_span["attributes"]["gen_ai.agent.name"] == "test_agent" + assert tool_span["attributes"]["gen_ai.operation.name"] == "execute_tool" + + tool_span_available_tool = json.loads( + tool_span["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all(tool_span_available_tool[k] == v for k, v in available_tool.items()) + + assert tool_span["attributes"]["gen_ai.request.max_tokens"] == 100 + assert tool_span["attributes"]["gen_ai.request.model"] == "gpt-4" + assert tool_span["attributes"]["gen_ai.request.temperature"] == 0.7 + assert tool_span["attributes"]["gen_ai.request.top_p"] == 1.0 + assert tool_span["attributes"]["gen_ai.system"] == "openai" + assert tool_span["attributes"]["gen_ai.tool.description"] == "A simple tool" + assert tool_span["attributes"]["gen_ai.tool.input"] == '{"message": "hello"}' + assert tool_span["attributes"]["gen_ai.tool.name"] == "simple_test_tool" + assert ( + tool_span["attributes"]["gen_ai.tool.output"] == "Tool executed with: hello" + ) + assert ai_client_span2["name"] == "chat gpt-4" + assert ai_client_span2["attributes"]["gen_ai.agent.name"] == "test_agent" + assert ai_client_span2["attributes"]["gen_ai.operation.name"] == "chat" + + ai_client_span2_available_tool = json.loads( + ai_client_span2["attributes"]["gen_ai.request.available_tools"] + )[0] + + assert all( + ai_client_span2_available_tool[k] == v for k, v in available_tool.items() + ) + + assert ai_client_span2["attributes"]["gen_ai.request.max_tokens"] == 100 + assert ai_client_span2["attributes"][ + "gen_ai.request.messages" + ] == safe_serialize( + [ + { + "role": "user", + "content": [ + {"type": "text", "text": "Please use the simple test tool"} + ], + }, + { + "role": "assistant", + "content": [ + { + "arguments": '{"message": "hello"}', + "call_id": "call_123", + "name": "simple_test_tool", + "type": "function_call", + "id": "call_123", + } + ], + }, + { + "role": "tool", + "content": [ + { + "call_id": "call_123", + "output": "Tool executed with: hello", + "type": "function_call_output", + } + ], + }, + ] + ) + assert ai_client_span2["attributes"]["gen_ai.request.model"] == "gpt-4" + assert ai_client_span2["attributes"]["gen_ai.request.temperature"] == 0.7 + assert ai_client_span2["attributes"]["gen_ai.request.top_p"] == 1.0 + assert ( + ai_client_span2["attributes"]["gen_ai.response.text"] + == "Task completed using the tool" + ) + assert ai_client_span2["attributes"]["gen_ai.system"] == "openai" + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens.cached"] == 0 + assert ai_client_span2["attributes"]["gen_ai.usage.input_tokens"] == 15 + assert ( + ai_client_span2["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 0 + ) + assert ai_client_span2["attributes"]["gen_ai.usage.output_tokens"] == 10 + assert ai_client_span2["attributes"]["gen_ai.usage.total_tokens"] == 25 + + else: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + ) + + events = capture_events() + + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + agent_span = next( + span for span in spans if span["op"] == OP.GEN_AI_INVOKE_AGENT + ) ai_client_span1, ai_client_span2 = ( span for span in spans if span["op"] == OP.GEN_AI_CHAT ) @@ -2262,6 +2792,7 @@ async def test_hosted_mcp_tool_propagation_headers( assert hosted_mcp_tool["headers"]["baggage"] == expected_outgoing_baggage +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_model_behavior_error( @@ -2270,6 +2801,7 @@ async def test_model_behavior_error( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Example raising agents.exceptions.AgentsException before the agent invocation span is complete. @@ -2284,7 +2816,64 @@ def simple_test_tool(message: str) -> str: # Create agent with the tool agent_with_tool = test_agent.clone(tools=[simple_test_tool]) - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a mock response that includes tool calls + tool_call = ResponseFunctionToolCall( + id="call_123", + call_id="call_123", + name="wrong_tool", + type="function_call", + arguments='{"message": "hello"}', + ) + + tool_response = ModelResponse( + output=[tool_call], + usage=Usage( + requests=1, input_tokens=10, output_tokens=5, total_tokens=15 + ), + response_id="resp_tool_123", + ) + + mock_get_response.side_effect = [tool_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + with pytest.raises(ModelBehaviorError): + await agents.Runner.run( + agent_with_tool, + "Please use the simple test tool", + run_config=test_run_config, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + ( + ai_client_span1, + agent_span, + workflow_span, + ) = spans + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert agent_span["name"] == "invoke_agent test_agent" + assert agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + # Error due to unrecognized tool in model response. + assert agent_span["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -2394,6 +2983,7 @@ def simple_test_tool(message: str) -> str: assert agent_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_error_handling( @@ -2402,12 +2992,61 @@ async def test_error_handling( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test error handling in agent execution. """ - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = Exception("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("event", "span") + + with pytest.raises(Exception, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["exception"]["values"][0]["type"] == "Exception" + assert error_event["exception"]["values"][0]["value"] == "Model Error" + assert ( + error_event["exception"]["values"][0]["mechanism"]["type"] + == "openai_agents" + ) + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + (ai_client_span, invoke_agent_span, workflow_span) = spans + + assert workflow_span["name"] == "test_agent workflow" + assert workflow_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + + assert invoke_agent_span["name"] == "invoke_agent test_agent" + assert ( + invoke_agent_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + ) + + assert ai_client_span["name"] == "chat gpt-4" + assert ai_client_span["attributes"]["sentry.origin"] == "auto.ai.openai_agents" + assert ai_client_span["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -2502,6 +3141,7 @@ async def test_error_handling( assert ai_client_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_error_captures_input_data( @@ -2510,6 +3150,7 @@ async def test_error_captures_input_data( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): """ Test that input data is captured even when the API call raises an exception. @@ -2529,7 +3170,7 @@ async def test_error_captures_input_data( request=model_request, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -2540,9 +3181,13 @@ async def test_error_captures_input_data( OpenAIAgentsIntegration(), LoggingIntegration(event_level=logging.CRITICAL), ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("event", "span") @@ -2555,6 +3200,7 @@ async def test_error_captures_input_data( assert error_event["exception"]["values"][0]["type"] == "InternalServerError" assert error_event["exception"]["values"][0]["value"] == "Error code: 500" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = [ s for s in spans if s["attributes"].get("sentry.op", "") == "gen_ai.chat" @@ -2617,6 +3263,7 @@ async def test_error_captures_input_data( assert ai_client_span["data"]["gen_ai.request.messages"] == request_messages +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_status_error( @@ -2625,8 +3272,42 @@ async def test_span_status_error( capture_items, test_agent, stream_gen_ai_spans, + span_streaming, ): - if stream_gen_ai_spans: + if span_streaming: + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + mock_get_response.side_effect = ValueError("Model Error") + + sentry_init( + integrations=[ + OpenAIAgentsIntegration(), + LoggingIntegration(event_level=logging.CRITICAL), + ], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("event", "span") + + with pytest.raises(ValueError, match="Model Error"): + await agents.Runner.run( + test_agent, "Test input", run_config=test_run_config + ) + + (error,) = (item.payload for item in items if item.type == "event") + assert error["level"] == "error" + + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["status"] == "error" + + assert spans[2]["is_segment"] is True + assert spans[2]["status"] == "error" + elif span_streaming or stream_gen_ai_spans: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" ) as mock_get_response: @@ -2637,6 +3318,7 @@ async def test_span_status_error( OpenAIAgentsIntegration(), LoggingIntegration(event_level=logging.CRITICAL), ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, ) @@ -2655,6 +3337,7 @@ async def test_span_status_error( assert spans[0]["status"] == "error" (transaction,) = (item.payload for item in items if item.type == "transaction") + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}), patch( "agents.models.openai_responses.OpenAIResponsesModel.get_response" @@ -2681,10 +3364,10 @@ async def test_span_status_error( assert error["level"] == "error" assert transaction["spans"][0]["status"] == "internal_error" assert transaction["spans"][0]["tags"]["status"] == "internal_error" - - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_agents_asyncio( @@ -2695,6 +3378,7 @@ async def test_multiple_agents_asyncio( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that multiple agents can be run at the same time in asyncio tasks @@ -2708,7 +3392,38 @@ async def test_multiple_agents_asyncio( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + async def run(): + await agents.Runner.run( + starting_agent=agent, + input="Test input", + run_config=test_run_config, + ) + + await asyncio.gather(*[run() for _ in range(3)]) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert spans[2]["name"] == "test_agent workflow" + assert spans[5]["name"] == "test_agent workflow" + assert spans[8]["name"] == "test_agent workflow" + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -2716,6 +3431,7 @@ async def test_multiple_agents_asyncio( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, ) @@ -2813,6 +3529,7 @@ def test_openai_agents_message_role_mapping( assert stored_messages[0]["role"] == expected_role +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_tool_execution_error_tracing( @@ -2823,6 +3540,7 @@ async def test_tool_execution_error_tracing( get_model_response, responses_tool_call_model_responses, stream_gen_ai_spans, + span_streaming, ): """ Test that tool execution errors are properly tracked via error tracing patch. @@ -2887,7 +3605,7 @@ def failing_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -2895,9 +3613,13 @@ def failing_tool(message: str) -> str: ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -2909,6 +3631,7 @@ def failing_tool(message: str) -> str: run_config=test_run_config, ) + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] # Find the execute_tool span @@ -2975,6 +3698,7 @@ def failing_tool(message: str) -> str: assert execute_tool_span["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_usage_data( @@ -2984,6 +3708,7 @@ async def test_invoke_agent_span_includes_usage_data( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans include aggregated usage data from context_wrapper. @@ -3032,7 +3757,7 @@ async def test_invoke_agent_span_includes_usage_data( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3040,9 +3765,13 @@ async def test_invoke_agent_span_includes_usage_data( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3052,6 +3781,7 @@ async def test_invoke_agent_span_includes_usage_data( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( span @@ -3111,6 +3841,7 @@ async def test_invoke_agent_span_includes_usage_data( assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 5 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_includes_response_model( @@ -3120,6 +3851,7 @@ async def test_ai_client_span_includes_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that ai_client spans (gen_ai.chat) include the response model from the actual API response. @@ -3168,7 +3900,7 @@ async def test_ai_client_span_includes_response_model( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3176,9 +3908,13 @@ async def test_ai_client_span_includes_response_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3188,6 +3924,7 @@ async def test_ai_client_span_includes_response_model( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -3230,6 +3967,7 @@ async def test_ai_client_span_includes_response_model( assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_ai_client_span_response_model_with_chat_completions( @@ -3238,6 +3976,7 @@ async def test_ai_client_span_response_model_with_chat_completions( capture_items, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that response model is captured when using ChatCompletions API (not Responses API). @@ -3292,7 +4031,7 @@ async def test_ai_client_span_response_model_with_chat_completions( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3300,8 +4039,12 @@ async def test_ai_client_span_response_model_with_chat_completions( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3312,6 +4055,7 @@ async def test_ai_client_span_response_model_with_chat_completions( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] ai_client_span = next( span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT @@ -3353,6 +4097,7 @@ async def test_ai_client_span_response_model_with_chat_completions( ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_multiple_llm_calls_aggregate_usage( @@ -3362,6 +4107,7 @@ async def test_multiple_llm_calls_aggregate_usage( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans show aggregated usage across multiple LLM calls @@ -3449,7 +4195,48 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_call_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + invoke_agent_span = spans[3] + + # Verify invoke_agent span has aggregated usage from both API calls + # Total: 10 + 20 = 30 input tokens, 5 + 15 = 20 output tokens, 15 + 35 = 50 total + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens"] == 30 + assert invoke_agent_span["attributes"]["gen_ai.usage.output_tokens"] == 20 + assert invoke_agent_span["attributes"]["gen_ai.usage.total_tokens"] == 50 + # Cached tokens should be aggregated: 0 + 5 = 5 + assert invoke_agent_span["attributes"]["gen_ai.usage.input_tokens.cached"] == 5 + # Reasoning tokens should be aggregated: 0 + 3 = 3 + assert ( + invoke_agent_span["attributes"]["gen_ai.usage.output_tokens.reasoning"] == 3 + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -3525,6 +4312,7 @@ def calculator(a: int, b: int) -> int: assert invoke_agent_span["data"]["gen_ai.usage.output_tokens.reasoning"] == 3 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_includes_response_model( @@ -3534,6 +4322,7 @@ async def test_invoke_agent_span_includes_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that invoke_agent spans include the response model from the API response. @@ -3581,7 +4370,7 @@ async def test_invoke_agent_span_includes_response_model( serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -3589,9 +4378,13 @@ async def test_invoke_agent_span_includes_response_model( ) as _: sentry_init( integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -3602,6 +4395,7 @@ async def test_invoke_agent_span_includes_response_model( assert result is not None + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] invoke_agent_span = next( span @@ -3665,6 +4459,7 @@ async def test_invoke_agent_span_includes_response_model( assert ai_client_span["data"]["gen_ai.response.model"] == "gpt-4.1-2025-04-14" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_invoke_agent_span_uses_last_response_model( @@ -3674,6 +4469,7 @@ async def test_invoke_agent_span_uses_last_response_model( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that when an agent makes multiple LLM calls (e.g., with tools), @@ -3761,7 +4557,54 @@ def calculator(a: int, b: int) -> int: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[first_response, second_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + send_default_pii=True, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent_with_tool, + "What is 5 + 3?", + run_config=test_run_config, + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + invoke_agent_span = spans[3] + first_ai_client_span = spans[0] + second_ai_client_span = spans[2] # After tool span + + # Invoke_agent span uses the LAST response model + assert "gen_ai.response.model" in invoke_agent_span["attributes"] + assert ( + invoke_agent_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + + # Each ai_client span has its own response model from the API + assert ( + first_ai_client_span["attributes"]["gen_ai.response.model"] == "gpt-4-0613" + ) + assert ( + second_ai_client_span["attributes"]["gen_ai.response.model"] + == "gpt-4.1-2025-04-14" + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -4089,6 +4932,7 @@ async def test_streaming_ttft_on_chat_span( assert chat_span._data.get(SPANDATA.GEN_AI_RESPONSE_STREAMING) is True +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4103,6 +4947,7 @@ async def test_conversation_id_on_all_spans( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is set on all AI-related spans when passed to Runner.run(). @@ -4116,7 +4961,52 @@ async def test_conversation_id_on_all_spans( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + result = await agents.Runner.run( + agent, + "Test input", + run_config=test_run_config, + conversation_id="conv_test_123", + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + assert spans[2]["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + + # Verify invoke_agent span has conversation_id + assert ( + invoke_agent_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + ) + + # Verify ai_client span has conversation_id + assert ai_client_span["attributes"]["gen_ai.conversation.id"] == "conv_test_123" + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send", @@ -4126,6 +5016,9 @@ async def test_conversation_id_on_all_spans( integrations=[OpenAIAgentsIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={ + "trace_lifecycle": "stream" if span_streaming else "static" + }, ) items = capture_items("span", "transaction") @@ -4206,6 +5099,7 @@ async def test_conversation_id_on_all_spans( assert ai_client_span["data"]["gen_ai.conversation.id"] == "conv_test_123" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4219,6 +5113,7 @@ async def test_conversation_id_on_tool_span( test_agent, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is set on tool execution spans when passed to Runner.run(). @@ -4305,7 +5200,52 @@ def simple_tool(message: str) -> str: serialize_pydantic=True, ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent_with_tool.model._client._client, + "send", + side_effect=[tool_response, final_response], + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + await agents.Runner.run( + agent_with_tool, + "Use the tool", + run_config=test_run_config, + conversation_id="conv_tool_test_456", + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + # Find the tool span + tool_span = None + for span in spans: + if span.get("name", "").startswith("execute_tool"): + tool_span = span + break + + assert tool_span is not None + # Tool span should have the conversation_id passed to Runner.run() + assert tool_span["attributes"]["gen_ai.conversation.id"] == "conv_tool_test_456" + + # Workflow span should have the same conversation_id + workflow_span = spans[4] + assert workflow_span["is_segment"] is True + + assert ( + workflow_span["attributes"]["gen_ai.conversation.id"] + == "conv_tool_test_456" + ) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent_with_tool.model._client._client, "send", @@ -4389,6 +5329,7 @@ def simple_tool(message: str) -> str: ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( parse_version(OPENAI_AGENTS_VERSION) < (0, 4, 0), @@ -4403,6 +5344,7 @@ async def test_no_conversation_id_when_not_provided( nonstreaming_responses_model_response, get_model_response, stream_gen_ai_spans, + span_streaming, ): """ Test that gen_ai.conversation.id is not set when not passed to Runner.run(). @@ -4416,7 +5358,49 @@ async def test_no_conversation_id_when_not_provided( nonstreaming_responses_model_response, serialize_pydantic=True ) - if stream_gen_ai_spans: + if span_streaming: + with patch.object( + agent.model._client._client, + "send", + return_value=response, + ) as _: + sentry_init( + integrations=[OpenAIAgentsIntegration()], + disabled_integrations=[StdlibIntegration], + traces_sample_rate=1.0, + stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream"}, + ) + + items = capture_items("span") + + # Don't pass conversation_id + result = await agents.Runner.run( + agent, "Test input", run_config=test_run_config + ) + + assert result is not None + + sentry_sdk.flush() + spans = [item.payload for item in items] + + workflow_span = spans[2] + assert workflow_span["is_segment"] is True + + invoke_agent_span = next( + span + for span in spans + if span["attributes"]["sentry.op"] == OP.GEN_AI_INVOKE_AGENT + ) + ai_client_span = next( + span for span in spans if span["attributes"]["sentry.op"] == OP.GEN_AI_CHAT + ) + + # Verify conversation_id is NOT set on any spans + assert "gen_ai.conversation.id" not in workflow_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in invoke_agent_span.get("attributes", {}) + assert "gen_ai.conversation.id" not in ai_client_span.get("attributes", {}) + elif span_streaming or stream_gen_ai_spans: with patch.object( agent.model._client._client, "send",