diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py index 295ab96d34..186c665ed1 100644 --- a/sentry_sdk/integrations/openai.py +++ b/sentry_sdk/integrations/openai.py @@ -34,6 +34,11 @@ from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.traces import StreamedSpan +from sentry_sdk.tracing_utils import ( + has_span_streaming_enabled, + should_truncate_gen_ai_input, +) from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -157,7 +162,7 @@ def _has_attr_and_is_int( def _calculate_completions_token_usage( messages: "Optional[Iterable[ChatCompletionMessageParam]]", response: "Any", - span: "Span", + span: "Union[Span, StreamedSpan]", streaming_message_responses: "Optional[List[str]]", streaming_message_total_token_usage: "Optional[CompletionUsage]", count_tokens: "Callable[..., Any]", @@ -239,7 +244,7 @@ def _calculate_completions_token_usage( def _calculate_responses_token_usage( input: "Any", response: "Any", - span: "Span", + span: "Union[Span, StreamedSpan]", streaming_message_responses: "Optional[List[str]]", count_tokens: "Callable[..., Any]", ) -> None: @@ -315,7 +320,7 @@ def _calculate_responses_token_usage( def _set_responses_api_input_data( - span: "Span", + span: "Union[Span, StreamedSpan]", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: @@ -328,21 +333,24 @@ def _set_responses_api_input_data( span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) ) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) model = kwargs.get("model") if model is not None: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model) max_tokens = kwargs.get("max_output_tokens") if max_tokens is not None and _is_given(max_tokens): - span.set_data(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) + set_on_span(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) temperature = kwargs.get("temperature") if temperature is not None and _is_given(temperature): - span.set_data(SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) + set_on_span(SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) top_p = kwargs.get("top_p") if top_p is not None and _is_given(top_p): - span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) + set_on_span(SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) conversation = kwargs.get("conversation") if conversation is not None and _is_given(conversation): @@ -352,7 +360,7 @@ def _set_responses_api_input_data( elif isinstance(conversation, dict): conversation_id = conversation.get("id") if conversation_id is not None: - span.set_data(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) + set_on_span(SPANDATA.GEN_AI_CONVERSATION_ID, conversation_id) if not should_send_default_pii() or not integration.include_prompts: set_data_normalized(span, SPANDATA.GEN_AI_OPERATION_NAME, "responses") @@ -363,7 +371,7 @@ def _set_responses_api_input_data( and explicit_instructions is not None and _is_given(explicit_instructions) ): - span.set_data( + set_on_span( SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, json.dumps( [ @@ -397,7 +405,7 @@ def _set_responses_api_input_data( instructions_text_parts += _transform_system_instructions(system_instructions) if len(instructions_text_parts) > 0: - span.set_data( + set_on_span( SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, json.dumps(instructions_text_parts), ) @@ -407,9 +415,9 @@ def _set_responses_api_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -427,9 +435,9 @@ def _set_responses_api_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -440,7 +448,7 @@ def _set_responses_api_input_data( def _set_completions_api_input_data( - span: "Span", + span: "Union[Span, StreamedSpan]", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: @@ -454,29 +462,32 @@ def _set_completions_api_input_data( span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools) ) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) model = kwargs.get("model") if model is not None: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model) max_tokens = kwargs.get("max_tokens") if max_tokens is not None and _is_given(max_tokens): - span.set_data(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) + set_on_span(SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, max_tokens) presence_penalty = kwargs.get("presence_penalty") if presence_penalty is not None and _is_given(presence_penalty): - span.set_data(SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty) + set_on_span(SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, presence_penalty) frequency_penalty = kwargs.get("frequency_penalty") if frequency_penalty is not None and _is_given(frequency_penalty): - span.set_data(SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty) + set_on_span(SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, frequency_penalty) temperature = kwargs.get("temperature") if temperature is not None and _is_given(temperature): - span.set_data(SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) + set_on_span(SPANDATA.GEN_AI_REQUEST_TEMPERATURE, temperature) top_p = kwargs.get("top_p") if top_p is not None and _is_given(top_p): - span.set_data(SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) + set_on_span(SPANDATA.GEN_AI_REQUEST_TOP_P, top_p) if ( not should_send_default_pii() @@ -491,9 +502,9 @@ def _set_completions_api_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -512,7 +523,7 @@ def _set_completions_api_input_data( system_instructions = _get_system_instructions_completions(messages) if len(system_instructions) > 0: - span.set_data( + set_on_span( SPANDATA.GEN_AI_SYSTEM_INSTRUCTIONS, json.dumps(_transform_system_instructions(system_instructions)), ) @@ -527,9 +538,9 @@ def _set_completions_api_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_messages(normalized_messages, span, scope) + truncate_and_annotate_messages(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -540,7 +551,7 @@ def _set_completions_api_input_data( def _set_embeddings_input_data( - span: "Span", + span: "Union[Span, StreamedSpan]", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", ) -> None: @@ -548,9 +559,12 @@ def _set_embeddings_input_data( "input" ) + set_on_span = ( + span.set_attribute if isinstance(span, StreamedSpan) else span.set_data + ) model = kwargs.get("model") if model is not None: - span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + set_on_span(SPANDATA.GEN_AI_REQUEST_MODEL, model) if ( not should_send_default_pii() @@ -568,11 +582,9 @@ def _set_embeddings_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope - ) + truncate_and_annotate_embedding_inputs(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -594,11 +606,9 @@ def _set_embeddings_input_data( client = sentry_sdk.get_client() scope = sentry_sdk.get_current_scope() messages_data = ( - normalized_messages - if client.options.get("stream_gen_ai_spans", False) - else truncate_and_annotate_embedding_inputs( - normalized_messages, span, scope - ) + truncate_and_annotate_embedding_inputs(normalized_messages, span, scope) + if should_truncate_gen_ai_input(client.options) + else normalized_messages ) if messages_data is not None: set_data_normalized( @@ -609,7 +619,7 @@ def _set_embeddings_input_data( def _set_common_output_data( - span: "Span", + span: "Union[Span, StreamedSpan]", response: "Any", input: "Any", integration: "OpenAIIntegration", @@ -698,7 +708,8 @@ def _set_common_output_data( def _new_sync_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) @@ -714,18 +725,30 @@ def _new_sync_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": model = kwargs.get("model") - span = get_start_span_function()( - op=consts.OP.GEN_AI_CHAT, - name=f"chat {model}", - origin=OpenAIIntegration.origin, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - # Same bool handling as in https://github.com/openai/openai-python/blob/acd0c54d8a68efeedde0e5b4e6c310eef1ce7867/src/openai/resources/completions.py#L585 is_streaming_response = kwargs.get("stream", False) or False - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) + + if has_span_streaming_enabled(client.options): + span = sentry_sdk.traces.start_span( + name=f"chat {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_CHAT, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + SPANDATA.GEN_AI_RESPONSE_STREAMING: is_streaming_response, + }, + ) + + else: + span = get_start_span_function()( + op=consts.OP.GEN_AI_CHAT, + name=f"chat {model}", + origin=OpenAIIntegration.origin, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) _set_completions_api_input_data(span, kwargs, integration) @@ -766,7 +789,8 @@ def _new_sync_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": async def _new_async_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return await f(*args, **kwargs) @@ -782,18 +806,29 @@ async def _new_async_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> model = kwargs.get("model") - span = get_start_span_function()( - op=consts.OP.GEN_AI_CHAT, - name=f"chat {model}", - origin=OpenAIIntegration.origin, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - # Same bool handling as in https://github.com/openai/openai-python/blob/acd0c54d8a68efeedde0e5b4e6c310eef1ce7867/src/openai/resources/completions.py#L585 is_streaming_response = kwargs.get("stream", False) or False - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) + + if has_span_streaming_enabled(client.options): + span = sentry_sdk.traces.start_span( + name=f"chat {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_CHAT, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + SPANDATA.GEN_AI_RESPONSE_STREAMING: is_streaming_response, + }, + ) + else: + span = get_start_span_function()( + op=consts.OP.GEN_AI_CHAT, + name=f"chat {model}", + origin=OpenAIIntegration.origin, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) _set_completions_api_input_data(span, kwargs, integration) @@ -833,7 +868,7 @@ async def _new_async_chat_completion(f: "Any", *args: "Any", **kwargs: "Any") -> def _set_completions_api_output_data( - span: "Span", + span: "Union[Span, StreamedSpan]", response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", @@ -854,7 +889,7 @@ def _set_completions_api_output_data( def _wrap_synchronous_completions_chunk_iterator( - span: "Span", + span: "Union[Span, StreamedSpan]", integration: "OpenAIIntegration", start_time: "Optional[float]", messages: "Optional[Iterable[ChatCompletionMessageParam]]", @@ -872,7 +907,10 @@ def _wrap_synchronous_completions_chunk_iterator( streaming_message_total_token_usage = None for x in old_iterator: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) + else: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) with capture_internal_exceptions(): if hasattr(x, "choices") and x.choices is not None: @@ -916,7 +954,7 @@ def _wrap_synchronous_completions_chunk_iterator( async def _wrap_asynchronous_completions_chunk_iterator( - span: "Span", + span: "Union[Span, StreamedSpan]", integration: "OpenAIIntegration", start_time: "Optional[float]", messages: "Optional[Iterable[ChatCompletionMessageParam]]", @@ -934,7 +972,10 @@ async def _wrap_asynchronous_completions_chunk_iterator( streaming_message_total_token_usage = None async for x in old_iterator: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) + else: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.model) with capture_internal_exceptions(): if hasattr(x, "choices") and x.choices is not None: @@ -978,7 +1019,7 @@ async def _wrap_asynchronous_completions_chunk_iterator( def _wrap_synchronous_responses_event_iterator( - span: "Span", + span: "Union[Span, StreamedSpan]", integration: "OpenAIIntegration", start_time: "Optional[float]", input: "Optional[Union[str, ResponseInputParam]]", @@ -1005,7 +1046,10 @@ def _wrap_synchronous_responses_event_iterator( data_buf[0].append(x.delta or "") if isinstance(x, ResponseCompletedEvent): - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) + else: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) _calculate_responses_token_usage( input=input, @@ -1042,7 +1086,7 @@ def _wrap_synchronous_responses_event_iterator( async def _wrap_asynchronous_responses_event_iterator( - span: "Span", + span: "Union[Span, StreamedSpan]", integration: "OpenAIIntegration", start_time: "Optional[float]", input: "Optional[Union[str, ResponseInputParam]]", @@ -1069,7 +1113,10 @@ async def _wrap_asynchronous_responses_event_iterator( data_buf[0].append(x.delta or "") if isinstance(x, ResponseCompletedEvent): - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) + if isinstance(span, StreamedSpan): + span.set_attribute(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) + else: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, x.response.model) _calculate_responses_token_usage( input=input, @@ -1104,7 +1151,7 @@ async def _wrap_asynchronous_responses_event_iterator( def _set_responses_api_output_data( - span: "Span", + span: "Union[Span, StreamedSpan]", response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", @@ -1125,7 +1172,7 @@ def _set_responses_api_output_data( def _set_embeddings_output_data( - span: "Span", + span: "Union[Span, StreamedSpan]", response: "Any", kwargs: "dict[str, Any]", integration: "OpenAIIntegration", @@ -1172,65 +1219,117 @@ async def _sentry_patched_create_async(*args: "Any", **kwargs: "Any") -> "Any": def _new_sync_embeddings_create(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) model = kwargs.get("model") - with get_start_span_function()( - op=consts.OP.GEN_AI_EMBEDDINGS, - name=f"embeddings {model}", - origin=OpenAIIntegration.origin, - ) as span: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - _set_embeddings_input_data(span, kwargs, integration) + if has_span_streaming_enabled(client.options): + with sentry_sdk.traces.start_span( + name=f"embeddings {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_EMBEDDINGS, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + }, + ) as span: + _set_embeddings_input_data(span, kwargs, integration) + + try: + response = f(*args, **kwargs) + except Exception as exc: + exc_info = sys.exc_info() + with capture_internal_exceptions(): + _capture_exception(exc) + reraise(*exc_info) + + _set_embeddings_output_data( + span, response, kwargs, integration, finish_span=False + ) - try: - response = f(*args, **kwargs) - except Exception as exc: - exc_info = sys.exc_info() - with capture_internal_exceptions(): - _capture_exception(exc) - reraise(*exc_info) - - _set_embeddings_output_data( - span, response, kwargs, integration, finish_span=False - ) + return response + else: + with get_start_span_function()( + op=consts.OP.GEN_AI_EMBEDDINGS, + name=f"embeddings {model}", + origin=OpenAIIntegration.origin, + ) as span: + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + _set_embeddings_input_data(span, kwargs, integration) + + try: + response = f(*args, **kwargs) + except Exception as exc: + exc_info = sys.exc_info() + with capture_internal_exceptions(): + _capture_exception(exc) + reraise(*exc_info) + + _set_embeddings_output_data( + span, response, kwargs, integration, finish_span=False + ) - return response + return response async def _new_async_embeddings_create( f: "Any", *args: "Any", **kwargs: "Any" ) -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return await f(*args, **kwargs) model = kwargs.get("model") - with get_start_span_function()( - op=consts.OP.GEN_AI_EMBEDDINGS, - name=f"embeddings {model}", - origin=OpenAIIntegration.origin, - ) as span: - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - _set_embeddings_input_data(span, kwargs, integration) + if has_span_streaming_enabled(client.options): + with sentry_sdk.traces.start_span( + name=f"embeddings {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_EMBEDDINGS, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + }, + ) as span: + _set_embeddings_input_data(span, kwargs, integration) + + try: + response = await f(*args, **kwargs) + except Exception as exc: + exc_info = sys.exc_info() + with capture_internal_exceptions(): + _capture_exception(exc) + reraise(*exc_info) + + _set_embeddings_output_data( + span, response, kwargs, integration, finish_span=False + ) - try: - response = await f(*args, **kwargs) - except Exception as exc: - exc_info = sys.exc_info() - with capture_internal_exceptions(): - _capture_exception(exc) - reraise(*exc_info) - - _set_embeddings_output_data( - span, response, kwargs, integration, finish_span=False - ) + return response + else: + with get_start_span_function()( + op=consts.OP.GEN_AI_EMBEDDINGS, + name=f"embeddings {model}", + origin=OpenAIIntegration.origin, + ) as span: + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + _set_embeddings_input_data(span, kwargs, integration) + + try: + response = await f(*args, **kwargs) + except Exception as exc: + exc_info = sys.exc_info() + with capture_internal_exceptions(): + _capture_exception(exc) + reraise(*exc_info) + + _set_embeddings_output_data( + span, response, kwargs, integration, finish_span=False + ) - return response + return response def _wrap_embeddings_create(f: "Any") -> "Any": @@ -1258,24 +1357,36 @@ async def _sentry_patched_create_async(*args: "Any", **kwargs: "Any") -> "Any": def _new_sync_responses_create(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return f(*args, **kwargs) model = kwargs.get("model") - span = get_start_span_function()( - op=consts.OP.GEN_AI_RESPONSES, - name=f"responses {model}", - origin=OpenAIIntegration.origin, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - # Same bool handling as in https://github.com/openai/openai-python/blob/acd0c54d8a68efeedde0e5b4e6c310eef1ce7867/src/openai/resources/responses/responses.py#L940 is_streaming_response = kwargs.get("stream", False) or False - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) + + if has_span_streaming_enabled(client.options): + span = sentry_sdk.traces.start_span( + name=f"responses {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_RESPONSES, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + SPANDATA.GEN_AI_RESPONSE_STREAMING: is_streaming_response, + }, + ) + else: + span = get_start_span_function()( + op=consts.OP.GEN_AI_RESPONSES, + name=f"responses {model}", + origin=OpenAIIntegration.origin, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) _set_responses_api_input_data(span, kwargs, integration) @@ -1316,24 +1427,36 @@ def _new_sync_responses_create(f: "Any", *args: "Any", **kwargs: "Any") -> "Any" async def _new_async_responses_create(f: "Any", *args: "Any", **kwargs: "Any") -> "Any": - integration = sentry_sdk.get_client().get_integration(OpenAIIntegration) + client = sentry_sdk.get_client() + integration = client.get_integration(OpenAIIntegration) if integration is None: return await f(*args, **kwargs) model = kwargs.get("model") - span = get_start_span_function()( - op=consts.OP.GEN_AI_RESPONSES, - name=f"responses {model}", - origin=OpenAIIntegration.origin, - ) - span.__enter__() - - span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") - # Same bool handling as in https://github.com/openai/openai-python/blob/acd0c54d8a68efeedde0e5b4e6c310eef1ce7867/src/openai/resources/responses/responses.py#L940 is_streaming_response = kwargs.get("stream", False) or False - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) + + if has_span_streaming_enabled(client.options): + span = sentry_sdk.traces.start_span( + name=f"responses {model}", + attributes={ + "sentry.op": consts.OP.GEN_AI_RESPONSES, + "sentry.origin": OpenAIIntegration.origin, + SPANDATA.GEN_AI_SYSTEM: "openai", + SPANDATA.GEN_AI_RESPONSE_STREAMING: is_streaming_response, + }, + ) + else: + span = get_start_span_function()( + op=consts.OP.GEN_AI_RESPONSES, + name=f"responses {model}", + origin=OpenAIIntegration.origin, + ) + span.__enter__() + + span.set_data(SPANDATA.GEN_AI_SYSTEM, "openai") + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, is_streaming_response) _set_responses_api_input_data(span, kwargs, integration) diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py index 7f18356be9..dc2a046fa7 100644 --- a/tests/integrations/openai/test_openai.py +++ b/tests/integrations/openai/test_openai.py @@ -2,6 +2,7 @@ import pytest +import sentry_sdk from sentry_sdk.utils import package_version try: @@ -50,6 +51,7 @@ _calculate_completions_token_usage, _calculate_responses_token_usage, ) +from sentry_sdk.integrations.stdlib import StdlibIntegration from sentry_sdk.utils import safe_serialize try: @@ -104,6 +106,7 @@ async def __call__(self, *args, **kwargs): ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -121,12 +124,15 @@ def test_nonstreaming_chat_completion_no_prompts( include_prompts, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -144,7 +150,7 @@ def test_nonstreaming_chat_completion_no_prompts( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -166,6 +172,7 @@ def test_nonstreaming_chat_completion_no_prompts( ) assert response == "the model response" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -230,6 +237,7 @@ def test_nonstreaming_chat_completion_no_prompts( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "get_messages,expected_system_instructions", @@ -317,12 +325,15 @@ def test_nonstreaming_chat_completion( expected_system_instructions, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -340,7 +351,7 @@ def test_nonstreaming_chat_completion( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -359,6 +370,7 @@ def test_nonstreaming_chat_completion( ) assert response == "the model response" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -432,6 +444,7 @@ def test_nonstreaming_chat_completion( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -450,12 +463,15 @@ async def test_nonstreaming_chat_completion_async_no_prompts( include_prompts, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -473,7 +489,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -492,6 +508,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( response = response.choices[0].message.content assert response == "the model response" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -553,6 +570,7 @@ async def test_nonstreaming_chat_completion_async_no_prompts( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -641,12 +659,15 @@ async def test_nonstreaming_chat_completion_async( expected_system_instructions, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -664,7 +685,7 @@ async def test_nonstreaming_chat_completion_async( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -680,6 +701,7 @@ async def test_nonstreaming_chat_completion_async( response = response.choices[0].message.content assert response == "the model response" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -760,6 +782,7 @@ def tiktoken_encoding_if_installed(): # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -778,6 +801,7 @@ def test_streaming_chat_completion_no_prompts( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -786,9 +810,11 @@ def test_streaming_chat_completion_no_prompts( tiktoken_encoding_name=tiktoken_encoding_if_installed(), ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -839,7 +865,7 @@ def test_streaming_chat_completion_no_prompts( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -865,6 +891,7 @@ def test_streaming_chat_completion_no_prompts( ) assert response_string == "hello world" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -947,6 +974,7 @@ def test_streaming_chat_completion_no_prompts( pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), @@ -959,13 +987,16 @@ def test_streaming_chat_completion_with_usage_in_stream( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """When stream_options=include_usage is set, token usage comes from the final chunk's usage field.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -1008,7 +1039,7 @@ def test_streaming_chat_completion_with_usage_in_stream( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1025,6 +1056,7 @@ def test_streaming_chat_completion_with_usage_in_stream( for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 @@ -1056,6 +1088,7 @@ def test_streaming_chat_completion_with_usage_in_stream( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), @@ -1068,13 +1101,16 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """Token usage from the stream is recorded even when no content is produced (e.g. content filter).""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -1098,7 +1134,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1115,6 +1151,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 @@ -1146,6 +1183,7 @@ def test_streaming_chat_completion_empty_content_preserves_token_usage( assert span["data"]["gen_ai.usage.total_tokens"] == 20 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), @@ -1160,13 +1198,16 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """Token usage from the stream is recorded even when no content is produced - async variant.""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -1192,7 +1233,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1209,6 +1250,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy async for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 @@ -1240,6 +1282,7 @@ async def test_streaming_chat_completion_empty_content_preserves_token_usage_asy assert span["data"]["gen_ai.usage.total_tokens"] == 20 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), @@ -1254,13 +1297,16 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """When stream_options=include_usage is set, token usage comes from the final chunk's usage field (async).""" sentry_init( integrations=[OpenAIIntegration(include_prompts=False)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=False, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -1305,7 +1351,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1322,6 +1368,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( async for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"]["gen_ai.usage.input_tokens"] == 20 @@ -1354,6 +1401,7 @@ async def test_streaming_chat_completion_async_with_usage_in_stream( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "get_messages,expected_system_instructions,expected_output_tokens,expected_input_tokens", @@ -1450,6 +1498,7 @@ def test_streaming_chat_completion( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -1458,9 +1507,11 @@ def test_streaming_chat_completion( tiktoken_encoding_name=tiktoken_encoding_if_installed(), ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -1511,7 +1562,7 @@ def test_streaming_chat_completion( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1533,6 +1584,7 @@ def test_streaming_chat_completion( map(lambda x: x.choices[0].delta.content, response_stream) ) assert response_string == "hello world" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -1637,6 +1689,7 @@ def test_streaming_chat_completion( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -1657,6 +1710,7 @@ async def test_streaming_chat_completion_async_no_prompts( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -1665,9 +1719,11 @@ async def test_streaming_chat_completion_async_no_prompts( tiktoken_encoding_name=tiktoken_encoding_if_installed(), ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -1720,7 +1776,7 @@ async def test_streaming_chat_completion_async_no_prompts( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -1747,6 +1803,7 @@ async def test_streaming_chat_completion_async_no_prompts( response_string += x.choices[0].delta.content assert response_string == "hello world" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -1833,6 +1890,7 @@ async def test_streaming_chat_completion_async_no_prompts( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -1931,6 +1989,7 @@ async def test_streaming_chat_completion_async( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -1939,9 +1998,11 @@ async def test_streaming_chat_completion_async( tiktoken_encoding_name=tiktoken_encoding_if_installed(), ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -1995,7 +2056,7 @@ async def test_streaming_chat_completion_async( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -2019,6 +2080,7 @@ async def test_streaming_chat_completion_async( response_string += x.choices[0].delta.content assert response_string == "hello world" + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2124,20 +2186,42 @@ async def test_streaming_chat_completion_async( pass # if tiktoken is not installed, we can't guarantee token usage will be calculated properly +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_bad_chat_completion( sentry_init, capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("event", "span") + + client = OpenAI(api_key="z") + client.chat.completions._post = mock.Mock( + side_effect=OpenAIError("API rate limit reached") + ) + with pytest.raises(OpenAIError): + client.chat.completions.create( + model="some-model", + messages=[{"role": "system", "content": "hello"}], + ) + + (event,) = (item.payload for item in items if item.type == "event") + sentry_sdk.flush() + (span,) = (item.payload for item in items if item.type == "span") + assert event["level"] == "error" + assert span["status"] == "error" + elif stream_gen_ai_spans: items = capture_items("event", "transaction") client = OpenAI(api_key="z") @@ -2152,6 +2236,8 @@ def test_bad_chat_completion( (event,) = (item.payload for item in items if item.type == "event") (transaction,) = (item.payload for item in items if item.type == "transaction") + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: events = capture_events() @@ -2166,25 +2252,28 @@ def test_bad_chat_completion( ) (event, transaction) = events - - assert event["level"] == "error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_status_error( sentry_init, capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("event", "transaction", "span") with start_transaction(name="test"): @@ -2201,6 +2290,7 @@ def test_span_status_error( (error,) = (item.payload for item in items if item.type == "event") assert error["level"] == "error" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["status"] == "error" else: @@ -2223,6 +2313,7 @@ def test_span_status_error( assert transaction["spans"][0]["tags"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_bad_chat_completion_async( @@ -2230,18 +2321,35 @@ async def test_bad_chat_completion_async( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") client.chat.completions._post = AsyncMock( side_effect=OpenAIError("API rate limit reached") ) - if stream_gen_ai_spans: + + if span_streaming: + items = capture_items("event", "span") + + with pytest.raises(OpenAIError): + await client.chat.completions.create( + model="some-model", messages=[{"role": "system", "content": "hello"}] + ) + + (event,) = (item.payload for item in items if item.type == "event") + sentry_sdk.flush() + (span,) = (item.payload for item in items if item.type == "span") + assert event["level"] == "error" + assert span["status"] == "error" + elif stream_gen_ai_spans: items = capture_items("event", "transaction") with pytest.raises(OpenAIError): @@ -2251,6 +2359,8 @@ async def test_bad_chat_completion_async( (event,) = (item.payload for item in items if item.type == "event") (transaction,) = (item.payload for item in items if item.type == "transaction") + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: events = capture_events() @@ -2260,11 +2370,11 @@ async def test_bad_chat_completion_async( ) (event, transaction) = events - - assert event["level"] == "error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -2281,12 +2391,15 @@ def test_embeddings_create_no_pii( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -2303,7 +2416,7 @@ def test_embeddings_create_no_pii( client.embeddings._post = mock.Mock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -2313,6 +2426,7 @@ def test_embeddings_create_no_pii( assert len(response.data[0].embedding) == 3 + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2348,6 +2462,7 @@ def test_embeddings_create_no_pii( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "get_input,expected_embeddings_input", @@ -2425,12 +2540,15 @@ def test_embeddings_create( get_input, expected_embeddings_input, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -2447,7 +2565,7 @@ def test_embeddings_create( client.embeddings._post = mock.Mock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -2457,6 +2575,7 @@ def test_embeddings_create( assert len(response.data[0].embedding) == 3 + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2498,6 +2617,7 @@ def test_embeddings_create( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -2515,12 +2635,15 @@ async def test_embeddings_create_async_no_pii( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -2537,7 +2660,7 @@ async def test_embeddings_create_async_no_pii( client.embeddings._post = AsyncMock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -2547,6 +2670,7 @@ async def test_embeddings_create_async_no_pii( assert len(response.data[0].embedding) == 3 + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2582,6 +2706,7 @@ async def test_embeddings_create_async_no_pii( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -2660,12 +2785,15 @@ async def test_embeddings_create_async( get_input, expected_embeddings_input, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -2682,7 +2810,7 @@ async def test_embeddings_create_async( client.embeddings._post = AsyncMock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -2692,6 +2820,7 @@ async def test_embeddings_create_async( assert len(response.data[0].embedding) == 3 + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.embeddings" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -2733,6 +2862,7 @@ async def test_embeddings_create_async( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -2745,12 +2875,15 @@ def test_embeddings_create_raises_error( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -2759,7 +2892,18 @@ def test_embeddings_create_raises_error( side_effect=OpenAIError("API rate limit reached") ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("event", "span") + + with pytest.raises(OpenAIError): + client.embeddings.create(input="hello", model="text-embedding-3-large") + + (event,) = (item.payload for item in items if item.type == "event") + sentry_sdk.flush() + (span,) = (item.payload for item in items if item.type == "span") + assert event["level"] == "error" + assert span["status"] == "error" + elif stream_gen_ai_spans: items = capture_items("event", "transaction") with pytest.raises(OpenAIError): @@ -2767,6 +2911,8 @@ def test_embeddings_create_raises_error( (event,) = (item.payload for item in items if item.type == "event") (transaction,) = (item.payload for item in items if item.type == "transaction") + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: events = capture_events() @@ -2774,11 +2920,11 @@ def test_embeddings_create_raises_error( client.embeddings.create(input="hello", model="text-embedding-3-large") (event, transaction) = events - - assert event["level"] == "error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -2792,12 +2938,15 @@ async def test_embeddings_create_raises_error_async( send_default_pii, include_prompts, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=include_prompts)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -2806,7 +2955,20 @@ async def test_embeddings_create_raises_error_async( side_effect=OpenAIError("API rate limit reached") ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("event", "span") + + with pytest.raises(OpenAIError): + await client.embeddings.create( + input="hello", model="text-embedding-3-large" + ) + + (event,) = (item.payload for item in items if item.type == "event") + sentry_sdk.flush() + (span,) = (item.payload for item in items if item.type == "span") + assert event["level"] == "error" + assert span["status"] == "error" + elif stream_gen_ai_spans: items = capture_items("event", "transaction") with pytest.raises(OpenAIError): @@ -2816,6 +2978,8 @@ async def test_embeddings_create_raises_error_async( (event,) = (item.payload for item in items if item.type == "event") (transaction,) = (item.payload for item in items if item.type == "transaction") + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" else: events = capture_events() @@ -2825,11 +2989,11 @@ async def test_embeddings_create_raises_error_async( ) (event, transaction) = events - - assert event["level"] == "error" - assert transaction["contexts"]["trace"]["status"] == "internal_error" + assert event["level"] == "error" + assert transaction["contexts"]["trace"]["status"] == "internal_error" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_origin_nonstreaming_chat( sentry_init, @@ -2837,11 +3001,13 @@ def test_span_origin_nonstreaming_chat( capture_items, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -2859,7 +3025,7 @@ def test_span_origin_nonstreaming_chat( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") with start_transaction(name="openai tx"): @@ -2870,6 +3036,7 @@ def test_span_origin_nonstreaming_chat( (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -2886,6 +3053,7 @@ def test_span_origin_nonstreaming_chat( assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_origin_nonstreaming_chat_async( @@ -2894,11 +3062,14 @@ async def test_span_origin_nonstreaming_chat_async( capture_items, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -2916,7 +3087,7 @@ async def test_span_origin_nonstreaming_chat_async( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") with start_transaction(name="openai tx"): @@ -2927,6 +3098,7 @@ async def test_span_origin_nonstreaming_chat_async( (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -2943,17 +3115,21 @@ async def test_span_origin_nonstreaming_chat_async( assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_origin_streaming_chat( sentry_init, capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -2994,7 +3170,7 @@ def test_span_origin_streaming_chat( ), ] - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") client.chat.completions._post = mock.Mock(return_value=returned_stream) @@ -3008,6 +3184,7 @@ def test_span_origin_streaming_chat( (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -3027,6 +3204,7 @@ def test_span_origin_streaming_chat( assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_origin_streaming_chat_async( @@ -3035,11 +3213,14 @@ async def test_span_origin_streaming_chat_async( capture_items, async_iterator, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -3086,7 +3267,7 @@ async def test_span_origin_streaming_chat_async( client.chat.completions._post = AsyncMock(return_value=returned_stream) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") with start_transaction(name="openai tx"): @@ -3101,6 +3282,7 @@ async def test_span_origin_streaming_chat_async( (event,) = (item.payload for item in items if item.type == "transaction") assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -3121,17 +3303,21 @@ async def test_span_origin_streaming_chat_async( assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_span_origin_embeddings( sentry_init, capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -3148,7 +3334,7 @@ def test_span_origin_embeddings( client.embeddings._post = mock.Mock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") with start_transaction(name="openai tx"): @@ -3157,6 +3343,7 @@ def test_span_origin_embeddings( (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -3171,6 +3358,7 @@ def test_span_origin_embeddings( assert event["spans"][0]["origin"] == "auto.ai.openai" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_span_origin_embeddings_async( @@ -3178,11 +3366,14 @@ async def test_span_origin_embeddings_async( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -3199,7 +3390,7 @@ async def test_span_origin_embeddings_async( client.embeddings._post = AsyncMock(return_value=returned_embedding) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("transaction", "span") with start_transaction(name="openai tx"): @@ -3210,6 +3401,7 @@ async def test_span_origin_embeddings_async( (event,) = [item.payload for item in items if item.type == "transaction"] assert event["contexts"]["trace"]["origin"] == "manual" + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] assert spans[0]["attributes"]["sentry.origin"] == "auto.ai.openai" else: @@ -3588,6 +3780,7 @@ def count_tokens(msg): ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_ai_client_span_responses_api_no_pii( @@ -3595,17 +3788,67 @@ def test_ai_client_span_responses_api_no_pii( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert len(spans) == 2 + assert spans[0]["attributes"] == { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "server.address": mock.ANY, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + assert "gen_ai.system_instructions" not in spans[0]["attributes"] + assert "gen_ai.request.messages" not in spans[0]["attributes"] + assert "gen_ai.response.text" not in spans[0]["attributes"] + elif stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -3693,6 +3936,7 @@ def test_ai_client_span_responses_api_no_pii( assert "gen_ai.response.text" not in spans[0]["data"] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "instructions,input,expected_system_instructions,expected_request_messages", @@ -3847,18 +4091,74 @@ def test_ai_client_span_responses_api( expected_system_instructions, expected_request_messages, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="openai tx"): + client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert len(spans) == 2 + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.system": "openai", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.request.messages": safe_serialize(expected_request_messages), + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "server.address": mock.ANY, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if expected_system_instructions is not None: + expected_data["gen_ai.system_instructions"] = safe_serialize( + expected_system_instructions + ) + + assert spans[0]["attributes"] == expected_data + elif stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -3958,6 +4258,7 @@ def test_ai_client_span_responses_api( assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "conversation, expected_id", @@ -3976,17 +4277,20 @@ def test_responses_api_conversation_id( conversation, expected_id, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") client.responses._post = mock.Mock(return_value=EXAMPLE_RESPONSE) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -3996,6 +4300,7 @@ def test_responses_api_conversation_id( conversation=conversation, ) + sentry_sdk.flush() (span,) = (item.payload for item in items if item.type == "span") if expected_id is None: @@ -4021,6 +4326,7 @@ def test_responses_api_conversation_id( assert span["data"]["gen_ai.conversation.id"] == expected_id +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_error_in_responses_api( @@ -4028,12 +4334,15 @@ def test_error_in_responses_api( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -4041,7 +4350,29 @@ def test_error_in_responses_api( side_effect=OpenAIError("API rate limit reached") ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("event", "span") + + with sentry_sdk.traces.start_span(name="openai tx"), pytest.raises(OpenAIError): + client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + # make sure the span where the error occurred is captured + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + assert spans[1]["is_segment"] is True + assert error_event["contexts"]["trace"]["trace_id"] == spans[1]["trace_id"] + elif stream_gen_ai_spans: items = capture_items("event", "transaction", "span") with start_transaction(name="openai tx"), pytest.raises(OpenAIError): @@ -4063,6 +4394,10 @@ def test_error_in_responses_api( (transaction_event,) = ( item.payload for item in items if item.type == "transaction" ) + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) else: events = capture_events() @@ -4081,13 +4416,13 @@ def test_error_in_responses_api( assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" - - assert ( - error_event["contexts"]["trace"]["trace_id"] - == transaction_event["contexts"]["trace"]["trace_id"] - ) + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -4243,18 +4578,74 @@ async def test_ai_client_span_responses_async_api( expected_system_instructions, expected_request_messages, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") client.responses._post = AsyncMock(return_value=EXAMPLE_RESPONSE) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("span") + + with sentry_sdk.traces.start_span(name="openai tx"): + await client.responses.create( + model="gpt-4o", + instructions=instructions, + input=input, + max_output_tokens=100, + temperature=0.7, + top_p=0.9, + ) + + sentry_sdk.flush() + spans = [item.payload for item in items] + + assert len(spans) == 2 + + expected_data = { + "gen_ai.operation.name": "responses", + "gen_ai.request.max_tokens": 100, + "gen_ai.request.temperature": 0.7, + "gen_ai.request.top_p": 0.9, + "gen_ai.request.messages": safe_serialize(expected_request_messages), + "gen_ai.request.model": "gpt-4o", + "gen_ai.response.model": "response-model-id", + "gen_ai.response.streaming": False, + "gen_ai.system": "openai", + "gen_ai.usage.input_tokens": 20, + "gen_ai.usage.input_tokens.cached": 5, + "gen_ai.usage.output_tokens": 10, + "gen_ai.usage.output_tokens.reasoning": 8, + "gen_ai.usage.total_tokens": 30, + "gen_ai.response.text": "the model response", + "sentry.environment": "production", + "sentry.op": "gen_ai.responses", + "sentry.origin": "auto.ai.openai", + "sentry.release": mock.ANY, + "sentry.sdk.name": "sentry.python", + "sentry.sdk.version": mock.ANY, + "sentry.segment.id": mock.ANY, + "sentry.segment.name": "openai tx", + "server.address": mock.ANY, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if expected_system_instructions is not None: + expected_data["gen_ai.system_instructions"] = safe_serialize( + expected_system_instructions + ) + + assert spans[0]["attributes"] == expected_data + elif stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -4354,6 +4745,7 @@ async def test_ai_client_span_responses_async_api( assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -4512,12 +4904,15 @@ async def test_ai_client_span_streaming_responses_async_api( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -4525,14 +4920,19 @@ async def test_ai_client_span_streaming_responses_async_api( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") + ctx = ( + sentry_sdk.traces.start_span(name="openai tx") + if span_streaming + else start_transaction(name="openai tx") + ) with mock.patch.object( client.responses._client._client, "send", return_value=returned_stream, - ), start_transaction(name="openai tx"): + ), ctx: result = await client.responses.create( model="gpt-4o", instructions=instructions, @@ -4545,11 +4945,12 @@ async def test_ai_client_span_streaming_responses_async_api( async for _ in result: pass + sentry_sdk.flush() spans = [item.payload for item in items if item.type == "span"] spans = [ span for span in spans - if span["attributes"]["sentry.op"] == OP.GEN_AI_RESPONSES + if span["attributes"].get("sentry.op") == OP.GEN_AI_RESPONSES ] assert len(spans) == 1 @@ -4647,6 +5048,7 @@ async def test_ai_client_span_streaming_responses_async_api( assert spans[0]["data"] == expected_data +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -4655,12 +5057,15 @@ async def test_error_in_responses_async_api( capture_events, capture_items, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -4668,7 +5073,29 @@ async def test_error_in_responses_async_api( side_effect=OpenAIError("API rate limit reached") ) - if stream_gen_ai_spans: + if span_streaming: + items = capture_items("event", "span") + + with sentry_sdk.traces.start_span(name="openai tx"), pytest.raises(OpenAIError): + await client.responses.create( + model="gpt-4o", + instructions="You are a coding assistant that talks like a pirate.", + input="How do I check if a Python object is an instance of a class?", + ) + + # make sure the span where the error occurred is captured + sentry_sdk.flush() + spans = [item.payload for item in items if item.type == "span"] + assert spans[0]["attributes"]["sentry.op"] == "gen_ai.responses" + + (error_event,) = (item.payload for item in items if item.type == "event") + + assert error_event["level"] == "error" + assert error_event["exception"]["values"][0]["type"] == "OpenAIError" + + assert spans[1]["is_segment"] is True + assert error_event["contexts"]["trace"]["trace_id"] == spans[1]["trace_id"] + elif stream_gen_ai_spans: items = capture_items("event", "transaction", "span") with start_transaction(name="openai tx"), pytest.raises(OpenAIError): @@ -4690,6 +5117,10 @@ async def test_error_in_responses_async_api( (transaction_event,) = ( item.payload for item in items if item.type == "transaction" ) + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) else: events = capture_events() @@ -4708,11 +5139,10 @@ async def test_error_in_responses_async_api( assert error_event["level"] == "error" assert error_event["exception"]["values"][0]["type"] == "OpenAIError" - - assert ( - error_event["contexts"]["trace"]["trace_id"] - == transaction_event["contexts"]["trace"]["trace_id"] - ) + assert ( + error_event["contexts"]["trace"]["trace_id"] + == transaction_event["contexts"]["trace"]["trace_id"] + ) if SKIP_RESPONSES_TESTS: @@ -4788,6 +5218,7 @@ async def test_error_in_responses_async_api( ] +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "send_default_pii, include_prompts", @@ -4803,6 +5234,7 @@ def test_streaming_responses_api( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -4810,9 +5242,11 @@ def test_streaming_responses_api( include_prompts=include_prompts, ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -4822,7 +5256,7 @@ def test_streaming_responses_api( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -4846,6 +5280,7 @@ def test_streaming_responses_api( assert response_string == "hello world" + sentry_sdk.flush() (span,) = (item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.responses" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -4911,6 +5346,7 @@ def test_streaming_responses_api( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.parametrize( @@ -4928,6 +5364,7 @@ async def test_streaming_responses_api_async( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[ @@ -4935,9 +5372,11 @@ async def test_streaming_responses_api_async( include_prompts=include_prompts, ) ], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=send_default_pii, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -4945,7 +5384,7 @@ async def test_streaming_responses_api_async( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -4969,6 +5408,7 @@ async def test_streaming_responses_api_async( assert response_string == "hello world" + sentry_sdk.flush() (span,) = (item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.responses" assert span["attributes"][SPANDATA.GEN_AI_SYSTEM] == "openai" @@ -5034,6 +5474,7 @@ async def test_streaming_responses_api_async( assert span["data"]["gen_ai.usage.total_tokens"] == 30 +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif( OPENAI_VERSION <= (1, 1, 0), @@ -5050,11 +5491,14 @@ def test_empty_tools_in_chat_completion( tools, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -5072,7 +5516,7 @@ def test_empty_tools_in_chat_completion( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): @@ -5082,6 +5526,7 @@ def test_empty_tools_in_chat_completion( tools=tools, ) + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert "gen_ai.request.available_tools" not in span["attributes"] @@ -5102,6 +5547,7 @@ def test_empty_tools_in_chat_completion( # Test messages with mixed roles including "ai" that should be mapped to "assistant" +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.parametrize( "test_message,expected_role", @@ -5125,14 +5571,17 @@ def test_openai_message_role_mapping( expected_role, nonstreaming_chat_completions_model_response, stream_gen_ai_spans, + span_streaming, ): """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'""" sentry_init( integrations=[OpenAIIntegration(include_prompts=True)], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, send_default_pii=True, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -5152,13 +5601,14 @@ def test_openai_message_role_mapping( test_messages = [test_message] - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with start_transaction(name="openai tx"): client.chat.completions.create(model="test-model", messages=test_messages) # Verify that the span was created correctly + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["attributes"] @@ -5247,6 +5697,7 @@ def test_openai_message_truncation( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) def test_streaming_chat_completion_ttft( sentry_init, @@ -5255,14 +5706,17 @@ def test_streaming_chat_completion_ttft( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """ Test that streaming chat completions capture time-to-first-token (TTFT). """ sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -5300,7 +5754,7 @@ def test_streaming_chat_completion_ttft( ), ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -5317,6 +5771,7 @@ def test_streaming_chat_completion_ttft( for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" @@ -5353,6 +5808,7 @@ def test_streaming_chat_completion_ttft( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio async def test_streaming_chat_completion_ttft_async( @@ -5363,14 +5819,17 @@ async def test_streaming_chat_completion_ttft_async( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """ Test that async streaming chat completions capture time-to-first-token (TTFT). """ sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -5410,7 +5869,7 @@ async def test_streaming_chat_completion_ttft_async( ) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -5427,6 +5886,7 @@ async def test_streaming_chat_completion_ttft_async( async for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.chat" @@ -5463,6 +5923,7 @@ async def test_streaming_chat_completion_ttft_async( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") def test_streaming_responses_api_ttft( @@ -5472,14 +5933,17 @@ def test_streaming_responses_api_ttft( get_model_response, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """ Test that streaming responses API captures time-to-first-token (TTFT). """ sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = OpenAI(api_key="z") @@ -5487,7 +5951,7 @@ def test_streaming_responses_api_ttft( server_side_event_chunks(EXAMPLE_RESPONSES_STREAM) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -5504,6 +5968,7 @@ def test_streaming_responses_api_ttft( for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.responses" @@ -5540,6 +6005,7 @@ def test_streaming_responses_api_ttft( # noinspection PyTypeChecker +@pytest.mark.parametrize("span_streaming", [True, False]) @pytest.mark.parametrize("stream_gen_ai_spans", [True, False]) @pytest.mark.asyncio @pytest.mark.skipif(SKIP_RESPONSES_TESTS, reason="Responses API not available") @@ -5551,14 +6017,17 @@ async def test_streaming_responses_api_ttft_async( async_iterator, server_side_event_chunks, stream_gen_ai_spans, + span_streaming, ): """ Test that async streaming responses API captures time-to-first-token (TTFT). """ sentry_init( integrations=[OpenAIIntegration()], + disabled_integrations=[StdlibIntegration], traces_sample_rate=1.0, stream_gen_ai_spans=stream_gen_ai_spans, + _experiments={"trace_lifecycle": "stream" if span_streaming else "static"}, ) client = AsyncOpenAI(api_key="z") @@ -5566,7 +6035,7 @@ async def test_streaming_responses_api_ttft_async( async_iterator(server_side_event_chunks(EXAMPLE_RESPONSES_STREAM)) ) - if stream_gen_ai_spans: + if span_streaming or stream_gen_ai_spans: items = capture_items("span") with mock.patch.object( @@ -5583,6 +6052,7 @@ async def test_streaming_responses_api_ttft_async( async for _ in response_stream: pass + sentry_sdk.flush() span = next(item.payload for item in items if item.type == "span") assert span["attributes"]["sentry.op"] == "gen_ai.responses"