diff --git a/sentry_sdk/integrations/openai_agents/spans/ai_client.py b/sentry_sdk/integrations/openai_agents/spans/ai_client.py index d325ae86e3..cf99adc5fb 100644 --- a/sentry_sdk/integrations/openai_agents/spans/ai_client.py +++ b/sentry_sdk/integrations/openai_agents/spans/ai_client.py @@ -7,6 +7,7 @@ _set_input_data, _set_output_data, _set_usage_data, + _create_mcp_execute_tool_spans, ) from typing import TYPE_CHECKING @@ -37,3 +38,4 @@ def update_ai_client_span(span, agent, get_response_kwargs, result): _set_usage_data(span, result.usage) _set_input_data(span, get_response_kwargs) _set_output_data(span, result) + _create_mcp_execute_tool_spans(span, result) diff --git a/sentry_sdk/integrations/openai_agents/utils.py b/sentry_sdk/integrations/openai_agents/utils.py index 73d2858e7f..b0ad6bf903 100644 --- a/sentry_sdk/integrations/openai_agents/utils.py +++ b/sentry_sdk/integrations/openai_agents/utils.py @@ -1,6 +1,6 @@ import sentry_sdk from sentry_sdk.ai.utils import set_data_normalized -from sentry_sdk.consts import SPANDATA +from sentry_sdk.consts import SPANDATA, SPANSTATUS, OP from sentry_sdk.integrations import DidNotEnable from sentry_sdk.scope import should_send_default_pii from sentry_sdk.tracing_utils import set_span_errored @@ -156,3 +156,27 @@ def _set_output_data(span, result): set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, output_messages["response"] ) + + +def _create_mcp_execute_tool_spans(span, result): + # type: (sentry_sdk.tracing.Span, agents.Result) -> None + for output in result.output: + if output.__class__.__name__ == "McpCall": + with sentry_sdk.start_span( + op=OP.GEN_AI_EXECUTE_TOOL, + description=f"execute_tool {output.name}", + start_timestamp=span.start_timestamp, + ) as execute_tool_span: + set_data_normalized(execute_tool_span, SPANDATA.GEN_AI_TOOL_TYPE, "mcp") + set_data_normalized( + execute_tool_span, SPANDATA.GEN_AI_TOOL_NAME, output.name + ) + if should_send_default_pii(): + execute_tool_span.set_data( + SPANDATA.GEN_AI_TOOL_INPUT, output.arguments + ) + execute_tool_span.set_data( + SPANDATA.GEN_AI_TOOL_OUTPUT, output.output + ) + if output.error: + execute_tool_span.set_status(SPANSTATUS.ERROR) diff --git a/tests/integrations/openai_agents/test_openai_agents.py b/tests/integrations/openai_agents/test_openai_agents.py index bd7f15faff..1768971c99 100644 --- a/tests/integrations/openai_agents/test_openai_agents.py +++ b/tests/integrations/openai_agents/test_openai_agents.py @@ -15,6 +15,7 @@ ModelSettings, ) from agents.items import ( + McpCall, ResponseOutputMessage, ResponseOutputText, ResponseFunctionToolCall, @@ -683,6 +684,307 @@ async def test_span_status_error(sentry_init, capture_events, test_agent): assert transaction["contexts"]["trace"]["status"] == "error" +@pytest.mark.asyncio +async def test_mcp_tool_execution_spans(sentry_init, capture_events, test_agent): + """ + Test that MCP (Model Context Protocol) tool calls create execute_tool spans. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a McpCall object + mcp_call = McpCall( + id="mcp_call_123", + name="test_mcp_tool", + arguments='{"query": "search term"}', + output="MCP tool executed successfully", + error=None, + type="mcp_call", + server_label="test_server", + ) + + # Create a ModelResponse with an McpCall in the output + mcp_response = ModelResponse( + output=[mcp_call], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_123", + ) + + # Final response after MCP tool execution + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed using MCP tool", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool test_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "search term"}' + assert ( + mcp_tool_span["data"]["gen_ai.tool.output"] == "MCP tool executed successfully" + ) + + # Verify no error status since error was None + assert mcp_tool_span.get("tags", {}).get("status") != "error" + + +@pytest.mark.asyncio +async def test_mcp_tool_execution_with_error(sentry_init, capture_events, test_agent): + """ + Test that MCP tool calls with errors are tracked with error status. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a McpCall object with an error + mcp_call_with_error = McpCall( + id="mcp_call_error_123", + name="failing_mcp_tool", + arguments='{"query": "test"}', + output=None, + error="MCP tool execution failed", + type="mcp_call", + server_label="test_server", + ) + + # Create a ModelResponse with a failing McpCall + mcp_response = ModelResponse( + output=[mcp_call_with_error], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_error_123", + ) + + # Final response after error + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="The MCP tool encountered an error", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_error_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=True, + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use failing MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span with error + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool failing_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created with error status + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "failing_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.input"] == '{"query": "test"}' + assert mcp_tool_span["data"]["gen_ai.tool.output"] is None + + # Verify error status was set + assert mcp_tool_span["tags"]["status"] == "error" + + +@pytest.mark.asyncio +async def test_mcp_tool_execution_without_pii(sentry_init, capture_events, test_agent): + """ + Test that MCP tool input/output are not included when send_default_pii is False. + """ + + with patch.dict(os.environ, {"OPENAI_API_KEY": "test-key"}): + with patch( + "agents.models.openai_responses.OpenAIResponsesModel.get_response" + ) as mock_get_response: + # Create a McpCall object + mcp_call = McpCall( + id="mcp_call_pii_123", + name="test_mcp_tool", + arguments='{"query": "sensitive data"}', + output="Result with sensitive info", + error=None, + type="mcp_call", + server_label="test_server", + ) + + # Create a ModelResponse with an McpCall + mcp_response = ModelResponse( + output=[mcp_call], + usage=Usage( + requests=1, + input_tokens=10, + output_tokens=5, + total_tokens=15, + ), + response_id="resp_mcp_123", + ) + + # Final response + final_response = ModelResponse( + output=[ + ResponseOutputMessage( + id="msg_final", + type="message", + status="completed", + content=[ + ResponseOutputText( + text="Task completed", + type="output_text", + annotations=[], + ) + ], + role="assistant", + ) + ], + usage=Usage( + requests=1, + input_tokens=15, + output_tokens=10, + total_tokens=25, + ), + response_id="resp_final_123", + ) + + mock_get_response.side_effect = [mcp_response, final_response] + + sentry_init( + integrations=[OpenAIAgentsIntegration()], + traces_sample_rate=1.0, + send_default_pii=False, # PII disabled + ) + + events = capture_events() + + await agents.Runner.run( + test_agent, + "Please use MCP tool", + run_config=test_run_config, + ) + + (transaction,) = events + spans = transaction["spans"] + + # Find the MCP execute_tool span + mcp_tool_span = None + for span in spans: + if ( + span.get("description") == "execute_tool test_mcp_tool" + and span.get("data", {}).get("gen_ai.tool.type") == "mcp" + ): + mcp_tool_span = span + break + + # Verify the MCP tool span was created but without input/output + assert mcp_tool_span is not None, "MCP execute_tool span was not created" + assert mcp_tool_span["description"] == "execute_tool test_mcp_tool" + assert mcp_tool_span["data"]["gen_ai.tool.type"] == "mcp" + assert mcp_tool_span["data"]["gen_ai.tool.name"] == "test_mcp_tool" + + # Verify input and output are not included when send_default_pii is False + assert "gen_ai.tool.input" not in mcp_tool_span["data"] + assert "gen_ai.tool.output" not in mcp_tool_span["data"] + + @pytest.mark.asyncio async def test_multiple_agents_asyncio( sentry_init, capture_events, test_agent, mock_model_response