From 88cd97eac65d8d8c67b3400c1fc4cf3414380f26 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 10:14:11 +0200 Subject: [PATCH 01/43] Rename attributes --- sentry_sdk/consts.py | 1 + sentry_sdk/integrations/huggingface_hub.py | 38 +++++++++++++--------- 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py index d7a0603a10..6c82cffc90 100644 --- a/sentry_sdk/consts.py +++ b/sentry_sdk/consts.py @@ -794,6 +794,7 @@ class OP: GEN_AI_CHAT = "gen_ai.chat" GEN_AI_EMBEDDINGS = "gen_ai.embeddings" GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool" + GEN_AI_GENERATE_TEXT = "gen_ai.generate_text" GEN_AI_HANDOFF = "gen_ai.handoff" GEN_AI_PIPELINE = "gen_ai.pipeline" GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent" diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 2dfcb5925a..22099c5559 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -1,15 +1,14 @@ from functools import wraps -from sentry_sdk import consts -from sentry_sdk.ai.monitoring import record_token_usage -from sentry_sdk.ai.utils import set_data_normalized -from sentry_sdk.consts import SPANDATA from typing import Any, Iterable, Callable import sentry_sdk -from sentry_sdk.scope import should_send_default_pii +from sentry_sdk.ai.monitoring import record_token_usage +from sentry_sdk.ai.utils import set_data_normalized +from sentry_sdk.consts import OP, SPANDATA from sentry_sdk.integrations import DidNotEnable, Integration +from sentry_sdk.scope import should_send_default_pii from sentry_sdk.utils import ( capture_internal_exceptions, event_from_exception, @@ -34,6 +33,8 @@ def __init__(self, include_prompts=True): @staticmethod def setup_once(): # type: () -> None + + # Other tasks that can be called: https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks huggingface_hub.inference._client.InferenceClient.text_generation = ( _wrap_text_generation( huggingface_hub.inference._client.InferenceClient.text_generation @@ -70,15 +71,22 @@ def new_text_generation(*args, **kwargs): # invalid call, let it return error return f(*args, **kwargs) - model = kwargs.get("model") + client = args[0] + model = client.model or kwargs.get("model") or "" streaming = kwargs.get("stream") span = sentry_sdk.start_span( - op=consts.OP.HUGGINGFACE_HUB_CHAT_COMPLETIONS_CREATE, - name="Text Generation", + op=OP.GEN_AI_GENERATE_TEXT, + name=f"generate_text {model}", origin=HuggingfaceHubIntegration.origin, ) span.__enter__() + + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text") + if model: + span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + span.set_data(SPANDATA.GEN_AI_SYSTEM, "TODO!!!!!") + try: res = f(*args, **kwargs) except Exception as e: @@ -88,16 +96,15 @@ def new_text_generation(*args, **kwargs): with capture_internal_exceptions(): if should_send_default_pii() and integration.include_prompts: - set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompt) + set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt) - set_data_normalized(span, SPANDATA.AI_MODEL_ID, model) - set_data_normalized(span, SPANDATA.AI_STREAMING, streaming) + span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming) if isinstance(res, str): if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, - SPANDATA.AI_RESPONSES, + SPANDATA.GEN_AI_RESPONSE_TEXT, [res], ) span.__exit__(None, None, None) @@ -107,7 +114,7 @@ def new_text_generation(*args, **kwargs): if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, - SPANDATA.AI_RESPONSES, + SPANDATA.GEN_AI_RESPONSE_TEXT, [res.generated_text], ) if res.details is not None and res.details.generated_tokens > 0: @@ -120,7 +127,6 @@ def new_text_generation(*args, **kwargs): if not isinstance(res, Iterable): # we only know how to deal with strings and iterables, ignore - set_data_normalized(span, "unknown_response", True) span.__exit__(None, None, None) return res @@ -145,7 +151,7 @@ def new_details_iterator(): and integration.include_prompts ): set_data_normalized( - span, SPANDATA.AI_RESPONSES, "".join(data_buf) + span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf) ) if tokens_used > 0: record_token_usage( @@ -172,7 +178,7 @@ def new_iterator(): and integration.include_prompts ): set_data_normalized( - span, SPANDATA.AI_RESPONSES, "".join(data_buf) + span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf) ) span.__exit__(None, None, None) From 54164cd1462c0cf713211ef37a6edc1b40953c0f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 10:56:38 +0200 Subject: [PATCH 02/43] text generation done. --- sentry_sdk/integrations/huggingface_hub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 22099c5559..a4ad16fdcb 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -85,7 +85,6 @@ def new_text_generation(*args, **kwargs): span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text") if model: span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) - span.set_data(SPANDATA.GEN_AI_SYSTEM, "TODO!!!!!") try: res = f(*args, **kwargs) From 05ef7e338ff5b11ee4beb23dc3451ab248ce6c1f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 12:25:13 +0200 Subject: [PATCH 03/43] First version of supporting chat-completion --- sentry_sdk/integrations/huggingface_hub.py | 54 ++++++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index a4ad16fdcb..1c399e6082 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -17,7 +17,7 @@ try: import huggingface_hub.inference._client - from huggingface_hub import ChatCompletionStreamOutput, TextGenerationOutput + from huggingface_hub import ChatCompletionOutput, TextGenerationOutput except ImportError: raise DidNotEnable("Huggingface not installed") @@ -40,6 +40,11 @@ def setup_once(): huggingface_hub.inference._client.InferenceClient.text_generation ) ) + huggingface_hub.inference._client.InferenceClient.chat_completion = ( + _wrap_text_generation( + huggingface_hub.inference._client.InferenceClient.chat_completion + ) + ) def _capture_exception(exc): @@ -63,12 +68,14 @@ def new_text_generation(*args, **kwargs): if "prompt" in kwargs: prompt = kwargs["prompt"] + elif "messages" in kwargs: + prompt = kwargs["messages"] elif len(args) >= 2: kwargs["prompt"] = args[1] prompt = kwargs["prompt"] args = (args[0],) + args[2:] else: - # invalid call, let it return error + # invalid call, dont instrument, let it return error return f(*args, **kwargs) client = args[0] @@ -95,7 +102,9 @@ def new_text_generation(*args, **kwargs): with capture_internal_exceptions(): if should_send_default_pii() and integration.include_prompts: - set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt) + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False + ) span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming) @@ -104,17 +113,20 @@ def new_text_generation(*args, **kwargs): set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, - [res], + res, ) span.__exit__(None, None, None) return res if isinstance(res, TextGenerationOutput): if should_send_default_pii() and integration.include_prompts: + import ipdb + + ipdb.set_trace() set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, - [res.generated_text], + res.generated_text, ) if res.details is not None and res.details.generated_tokens > 0: record_token_usage( @@ -124,15 +136,35 @@ def new_text_generation(*args, **kwargs): span.__exit__(None, None, None) return res + if isinstance(res, ChatCompletionOutput): + if should_send_default_pii() and integration.include_prompts: + text_response = "".join( + [x.get("message", {}).get("content") for x in res.choices] + ) + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + if hasattr(res, "usage") and res.usage is not None: + record_token_usage( + span, + input_tokens=res.usage.prompt_tokens, + output_tokens=res.usage.completion_tokens, + total_tokens=res.usage.total_tokens, + ) + span.__exit__(None, None, None) + return res + if not isinstance(res, Iterable): # we only know how to deal with strings and iterables, ignore span.__exit__(None, None, None) return res if kwargs.get("details", False): - # res is Iterable[TextGenerationStreamOutput] + def new_details_iterator(): - # type: () -> Iterable[ChatCompletionStreamOutput] + # type: () -> Iterable[Any] with capture_internal_exceptions(): tokens_used = 0 data_buf: list[str] = [] @@ -150,7 +182,9 @@ def new_details_iterator(): and integration.include_prompts ): set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf) + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + "".join(data_buf), ) if tokens_used > 0: record_token_usage( @@ -177,7 +211,9 @@ def new_iterator(): and integration.include_prompts ): set_data_normalized( - span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf) + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + "".join(data_buf), ) span.__exit__(None, None, None) From d43d17fae203c2642589121ded2f40f42672b2a7 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 12:42:12 +0200 Subject: [PATCH 04/43] Cleanup --- sentry_sdk/integrations/huggingface_hub.py | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 1c399e6082..d8ae2a2285 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -36,13 +36,15 @@ def setup_once(): # Other tasks that can be called: https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks huggingface_hub.inference._client.InferenceClient.text_generation = ( - _wrap_text_generation( - huggingface_hub.inference._client.InferenceClient.text_generation + _wrap_huggingface_task( + huggingface_hub.inference._client.InferenceClient.text_generation, + OP.GEN_AI_GENERATE_TEXT, ) ) huggingface_hub.inference._client.InferenceClient.chat_completion = ( - _wrap_text_generation( - huggingface_hub.inference._client.InferenceClient.chat_completion + _wrap_huggingface_task( + huggingface_hub.inference._client.InferenceClient.chat_completion, + OP.GEN_AI_CHAT, ) ) @@ -57,8 +59,8 @@ def _capture_exception(exc): sentry_sdk.capture_event(event, hint=hint) -def _wrap_text_generation(f): - # type: (Callable[..., Any]) -> Callable[..., Any] +def _wrap_huggingface_task(f, op): + # type: (Callable[..., Any], str) -> Callable[..., Any] @wraps(f) def new_text_generation(*args, **kwargs): # type: (*Any, **Any) -> Any @@ -81,21 +83,23 @@ def new_text_generation(*args, **kwargs): client = args[0] model = client.model or kwargs.get("model") or "" streaming = kwargs.get("stream") + operation_name = op.split(".")[-1] span = sentry_sdk.start_span( - op=OP.GEN_AI_GENERATE_TEXT, - name=f"generate_text {model}", + op=op, + name=f"{operation_name} {model}", origin=HuggingfaceHubIntegration.origin, ) span.__enter__() - span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text") + span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name) if model: span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) try: res = f(*args, **kwargs) except Exception as e: + span.set_status("error") _capture_exception(e) span.__exit__(None, None, None) raise e from None @@ -120,9 +124,6 @@ def new_text_generation(*args, **kwargs): if isinstance(res, TextGenerationOutput): if should_send_default_pii() and integration.include_prompts: - import ipdb - - ipdb.set_trace() set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, From 27c851dd627c6b5916d1c9cbefcaa8bd8309f2a8 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 12:47:26 +0200 Subject: [PATCH 05/43] better format of dict in span data --- sentry_sdk/ai/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index cf52cba6e8..d6e5293a68 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,3 +1,5 @@ +import json + from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -33,4 +35,4 @@ def set_data_normalized(span, key, value, unpack=True): if isinstance(normalized, (int, float, bool, str)): span.set_data(key, normalized) else: - span.set_data(key, str(normalized)) + span.set_data(key, json.dumps(normalized)) From ade94106f6c4b2365dcd7348340b43088759f60f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 13:44:48 +0200 Subject: [PATCH 06/43] fix test --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index df0c6c6d76..8a50dd0fe2 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -68,7 +68,7 @@ def test_nonstreaming_chat_completion( tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "ai.chat_completions.create.huggingface_hub" + assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] @@ -127,7 +127,7 @@ def test_streaming_chat_completion( tx = events[0] assert tx["type"] == "transaction" span = tx["spans"][0] - assert span["op"] == "ai.chat_completions.create.huggingface_hub" + assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] From afa687cfc08e6bba52a1b481d954e2a53db15084 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Wed, 3 Sep 2025 15:04:38 +0200 Subject: [PATCH 07/43] attributes for huggingface requests/responses --- sentry_sdk/integrations/huggingface_hub.py | 115 +++++++++++++++------ 1 file changed, 84 insertions(+), 31 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index d8ae2a2285..5a5cbe61d6 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -62,7 +62,7 @@ def _capture_exception(exc): def _wrap_huggingface_task(f, op): # type: (Callable[..., Any], str) -> Callable[..., Any] @wraps(f) - def new_text_generation(*args, **kwargs): + def new_huggingface_task(*args, **kwargs): # type: (*Any, **Any) -> Any integration = sentry_sdk.get_client().get_integration(HuggingfaceHubIntegration) if integration is None: @@ -82,7 +82,6 @@ def new_text_generation(*args, **kwargs): client = args[0] model = client.model or kwargs.get("model") or "" - streaming = kwargs.get("stream") operation_name = op.split(".")[-1] span = sentry_sdk.start_span( @@ -93,9 +92,29 @@ def new_text_generation(*args, **kwargs): span.__enter__() span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name) + if model: span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) + # Input attributes + attribute_mapping = { + "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, + "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, + "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS, + "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY, + "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE, + "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P, + "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K, + "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, + } + for attribute, span_attribute in attribute_mapping.items(): + value = kwargs.get(attribute, None) + if value is not None: + if isinstance(value, (int, float, bool, str)): + span.set_data(span_attribute, value) + else: + set_data_normalized(span, span_attribute, value, unpack=False) + try: res = f(*args, **kwargs) except Exception as e: @@ -105,30 +124,56 @@ def new_text_generation(*args, **kwargs): raise e from None with capture_internal_exceptions(): + # Output attributes + if hasattr(res, "model"): + model = res.model + if model: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) + + if hasattr(res, "details") and res.details is not None: + finish_reason = getattr(res.details, "finish_reason", None) + if finish_reason: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason + ) + + try: + tool_calls = res.choices[0].message.tool_calls + except Exception: + tool_calls = [] + + if len(tool_calls) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, + ) + if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False ) - span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming) - if isinstance(res, str): if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - res, - ) + if res: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + res, + ) span.__exit__(None, None, None) return res if isinstance(res, TextGenerationOutput): if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - res.generated_text, - ) + if res.generated_text: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + res.generated_text, + ) if res.details is not None and res.details.generated_tokens > 0: record_token_usage( span, @@ -140,13 +185,17 @@ def new_text_generation(*args, **kwargs): if isinstance(res, ChatCompletionOutput): if should_send_default_pii() and integration.include_prompts: text_response = "".join( - [x.get("message", {}).get("content") for x in res.choices] - ) - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - text_response, + [ + x.get("message", {}).get("content", None) or "" + for x in res.choices + ] ) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) if hasattr(res, "usage") and res.usage is not None: record_token_usage( span, @@ -182,11 +231,13 @@ def new_details_iterator(): and should_send_default_pii() and integration.include_prompts ): - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - "".join(data_buf), - ) + text_response = "".join(data_buf) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) if tokens_used > 0: record_token_usage( span, @@ -211,13 +262,15 @@ def new_iterator(): and should_send_default_pii() and integration.include_prompts ): - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - "".join(data_buf), - ) + text_response = "".join(data_buf) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) span.__exit__(None, None, None) return new_iterator() - return new_text_generation + return new_huggingface_task From 8b439a33da14514045e83c3069d1bbbce6d35d6e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 4 Sep 2025 08:31:06 +0200 Subject: [PATCH 08/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 5a5cbe61d6..9e7f6ff2d5 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -207,7 +207,6 @@ def new_huggingface_task(*args, **kwargs): return res if not isinstance(res, Iterable): - # we only know how to deal with strings and iterables, ignore span.__exit__(None, None, None) return res From 4ca5442dc106e330bdd88f2ab882af32eec9e792 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 4 Sep 2025 14:27:28 +0200 Subject: [PATCH 09/43] updated tests --- .../huggingface_hub/test_huggingface_hub.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 8a50dd0fe2..897c4bb223 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -71,11 +71,11 @@ def test_nonstreaming_chat_completion( assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] - assert "the model response" in span["data"][SPANDATA.AI_RESPONSES] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: - assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert SPANDATA.AI_RESPONSES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] if details_arg: assert span["data"]["gen_ai.usage.total_tokens"] == 10 @@ -130,11 +130,11 @@ def test_streaming_chat_completion( assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES] - assert "the model response" in span["data"][SPANDATA.AI_RESPONSES] + assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] + assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] else: - assert SPANDATA.AI_INPUT_MESSAGES not in span["data"] - assert SPANDATA.AI_RESPONSES not in span["data"] + assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] + assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] if details_arg: assert span["data"]["gen_ai.usage.total_tokens"] == 10 From 9b1d23a48a4104c7de9d24f3260eecfc73881519 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 4 Sep 2025 14:31:05 +0200 Subject: [PATCH 10/43] better tests --- .../huggingface_hub/test_huggingface_hub.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 897c4bb223..a9dc450168 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -8,7 +8,6 @@ from huggingface_hub.errors import OverloadedError from sentry_sdk import start_transaction -from sentry_sdk.consts import SPANDATA from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration @@ -71,11 +70,11 @@ def test_nonstreaming_chat_completion( assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["data"]["gen_ai.request.messages"] + assert "the model response" in span["data"]["gen_ai.response.text"] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] if details_arg: assert span["data"]["gen_ai.usage.total_tokens"] == 10 @@ -130,11 +129,11 @@ def test_streaming_chat_completion( assert span["op"] == "gen_ai.generate_text" if send_default_pii and include_prompts: - assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES] - assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT] + assert "hello" in span["data"]["gen_ai.request.messages"] + assert "the model response" in span["data"]["gen_ai.response.text"] else: - assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"] - assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"] + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] if details_arg: assert span["data"]["gen_ai.usage.total_tokens"] == 10 From 8451b6d996d9778c8185f3a99dd3f9507deca489 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Thu, 4 Sep 2025 14:41:25 +0200 Subject: [PATCH 11/43] do this in separate pr --- sentry_sdk/ai/utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py index d6e5293a68..cf52cba6e8 100644 --- a/sentry_sdk/ai/utils.py +++ b/sentry_sdk/ai/utils.py @@ -1,5 +1,3 @@ -import json - from typing import TYPE_CHECKING if TYPE_CHECKING: @@ -35,4 +33,4 @@ def set_data_normalized(span, key, value, unpack=True): if isinstance(normalized, (int, float, bool, str)): span.set_data(key, normalized) else: - span.set_data(key, json.dumps(normalized)) + span.set_data(key, str(normalized)) From 75e55c1c3872b6957dc52470a98e2bce46a7263c Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 12:20:04 +0200 Subject: [PATCH 12/43] First step of refactoring test --- scripts/populate_tox/config.py | 3 + sentry_sdk/integrations/huggingface_hub.py | 2 +- .../old_test_huggingface_hub.py | 185 +++++++++++ .../huggingface_hub/test_huggingface_hub.py | 297 +++++++++--------- tox.ini | 31 +- 5 files changed, 346 insertions(+), 172 deletions(-) create mode 100644 tests/integrations/huggingface_hub/old_test_huggingface_hub.py diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py index b05c4297f1..38d3e7fc09 100644 --- a/scripts/populate_tox/config.py +++ b/scripts/populate_tox/config.py @@ -155,6 +155,9 @@ }, "huggingface_hub": { "package": "huggingface_hub", + "deps": { + "*": ["responses"], + }, }, "langchain-base": { "package": "langchain", diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 9e7f6ff2d5..618af608c5 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -142,7 +142,7 @@ def new_huggingface_task(*args, **kwargs): except Exception: tool_calls = [] - if len(tool_calls) > 0: + if tool_calls is not None and len(tool_calls) > 0: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, diff --git a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py new file mode 100644 index 0000000000..a9dc450168 --- /dev/null +++ b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py @@ -0,0 +1,185 @@ +import itertools +from unittest import mock + +import pytest +from huggingface_hub import ( + InferenceClient, +) +from huggingface_hub.errors import OverloadedError + +from sentry_sdk import start_transaction +from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration + + +def mock_client_post(client, post_mock): + # huggingface-hub==0.28.0 deprecates the `post` method + # so patch `_inner_post` instead + if hasattr(client, "post"): + client.post = post_mock + if hasattr(client, "_inner_post"): + client._inner_post = post_mock + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts, details_arg", + itertools.product([True, False], repeat=3), +) +def test_nonstreaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts, details_arg +): + sentry_init( + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = InferenceClient(model="https://") + + if details_arg: + post_mock = mock.Mock( + return_value=b"""[{ + "generated_text": "the model response", + "details": { + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "tokens": [] + } + }]""" + ) + else: + post_mock = mock.Mock( + return_value=b'[{"generated_text": "the model response"}]' + ) + mock_client_post(client, post_mock) + + with start_transaction(name="huggingface_hub tx"): + response = client.text_generation( + prompt="hello", + details=details_arg, + stream=False, + ) + if details_arg: + assert response.generated_text == "the model response" + else: + assert response == "the model response" + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.generate_text" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["gen_ai.request.messages"] + assert "the model response" in span["data"]["gen_ai.response.text"] + else: + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] + + if details_arg: + assert span["data"]["gen_ai.usage.total_tokens"] == 10 + + +@pytest.mark.parametrize( + "send_default_pii, include_prompts, details_arg", + itertools.product([True, False], repeat=3), +) +def test_streaming_chat_completion( + sentry_init, capture_events, send_default_pii, include_prompts, details_arg +): + sentry_init( + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + ) + events = capture_events() + + client = InferenceClient(model="https://") + + post_mock = mock.Mock( + return_value=[ + b"""data:{ + "token":{"id":1, "special": false, "text": "the model "} + }""", + b"""data:{ + "token":{"id":2, "special": false, "text": "response"}, + "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0} + }""", + ] + ) + mock_client_post(client, post_mock) + + with start_transaction(name="huggingface_hub tx"): + response = list( + client.text_generation( + prompt="hello", + details=details_arg, + stream=True, + ) + ) + assert len(response) == 2 + if details_arg: + assert response[0].token.text + response[1].token.text == "the model response" + else: + assert response[0] + response[1] == "the model response" + + tx = events[0] + assert tx["type"] == "transaction" + span = tx["spans"][0] + assert span["op"] == "gen_ai.generate_text" + + if send_default_pii and include_prompts: + assert "hello" in span["data"]["gen_ai.request.messages"] + assert "the model response" in span["data"]["gen_ai.response.text"] + else: + assert "gen_ai.request.messages" not in span["data"] + assert "gen_ai.response.text" not in span["data"] + + if details_arg: + assert span["data"]["gen_ai.usage.total_tokens"] == 10 + + +def test_bad_chat_completion(sentry_init, capture_events): + sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0) + events = capture_events() + + client = InferenceClient(model="https://") + post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded")) + mock_client_post(client, post_mock) + + with pytest.raises(OverloadedError): + client.text_generation(prompt="hello") + + (event,) = events + assert event["level"] == "error" + + +def test_span_origin(sentry_init, capture_events): + sentry_init( + integrations=[HuggingfaceHubIntegration()], + traces_sample_rate=1.0, + ) + events = capture_events() + + client = InferenceClient(model="https://") + post_mock = mock.Mock( + return_value=[ + b"""data:{ + "token":{"id":1, "special": false, "text": "the model "} + }""", + ] + ) + mock_client_post(client, post_mock) + + with start_transaction(name="huggingface_hub tx"): + list( + client.text_generation( + prompt="hello", + stream=True, + ) + ) + + (event,) = events + + assert event["contexts"]["trace"]["origin"] == "manual" + assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub" diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index a9dc450168..192b1eead2 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -1,185 +1,170 @@ -import itertools from unittest import mock - import pytest -from huggingface_hub import ( - InferenceClient, -) -from huggingface_hub.errors import OverloadedError - -from sentry_sdk import start_transaction -from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration - - -def mock_client_post(client, post_mock): - # huggingface-hub==0.28.0 deprecates the `post` method - # so patch `_inner_post` instead - if hasattr(client, "post"): - client.post = post_mock - if hasattr(client, "_inner_post"): - client._inner_post = post_mock - - -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_nonstreaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg -): - sentry_init( - integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - traces_sample_rate=1.0, - send_default_pii=send_default_pii, - ) - events = capture_events() +import responses + +from huggingface_hub import InferenceClient + +import sentry_sdk + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any + + +@pytest.fixture +def mock_hf_text_generation_api(): + # type: () -> Any + """Mock HuggingFace text generation API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + f"https://huggingface.co/api/models/{model_name}", + json={ + "id": model_name, + "pipeline_tag": "text-generation", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "text-generation", + } + }, + }, + status=200, + ) - client = InferenceClient(model="https://") - - if details_arg: - post_mock = mock.Mock( - return_value=b"""[{ - "generated_text": "the model response", - "details": { - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [], - "tokens": [] + # Mock text generation endpoint + rsps.add( + responses.POST, + f"https://router.huggingface.co/hf-inference/models/{model_name}", + json=[ + { + "generated_text": "Mocked response", + "details": { + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "tokens": [], + }, } - }]""" + ], + status=200, ) - else: - post_mock = mock.Mock( - return_value=b'[{"generated_text": "the model response"}]' - ) - mock_client_post(client, post_mock) - with start_transaction(name="huggingface_hub tx"): - response = client.text_generation( - prompt="hello", - details=details_arg, - stream=False, + yield rsps + + +@pytest.fixture +def mock_hf_chat_completion_api(): + # type: () -> Any + """Mock HuggingFace chat completion API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + f"https://huggingface.co/api/models/{model_name}", + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, ) - if details_arg: - assert response.generated_text == "the model response" - else: - assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.generate_text" - if send_default_pii and include_prompts: - assert "hello" in span["data"]["gen_ai.request.messages"] - assert "the model response" in span["data"]["gen_ai.response.text"] - else: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] + # Mock chat completion endpoint + rsps.add( + responses.POST, + f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions", + json={ + "id": f"{model_name}-123", + "created": 1234567890, + "model": "test-model-123", + "system_fingerprint": "fp_123", + "choices": [ + { + "index": 0, + "finish_reason": "stop", + "message": { + "role": "assistant", + "content": "Hello! How can I help you today?", + }, + "logprobs": None, + } + ], + "usage": { + "completion_tokens": 8, + "prompt_tokens": 10, + "total_tokens": 18, + }, + }, + status=200, + ) - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 + yield rsps -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg -): +def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api): + # type: (Any, Any, Any) -> None sentry_init( - integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], traces_sample_rate=1.0, - send_default_pii=send_default_pii, ) events = capture_events() - client = InferenceClient(model="https://") - - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - b"""data:{ - "token":{"id":2, "special": false, "text": "response"}, - "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0} - }""", - ] - ) - mock_client_post(client, post_mock) - - with start_transaction(name="huggingface_hub tx"): - response = list( - client.text_generation( - prompt="hello", - details=details_arg, - stream=True, - ) - ) - assert len(response) == 2 - if details_arg: - assert response[0].token.text + response[1].token.text == "the model response" - else: - assert response[0] + response[1] == "the model response" + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test_tx"): + response = client.text_generation(prompt="Hello") + + # Verify the response + assert response == "Mocked response" + # Verify Sentry integration worked tx = events[0] - assert tx["type"] == "transaction" span = tx["spans"][0] assert span["op"] == "gen_ai.generate_text" - if send_default_pii and include_prompts: - assert "hello" in span["data"]["gen_ai.request.messages"] - assert "the model response" in span["data"]["gen_ai.response.text"] - else: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 - - -def test_bad_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0) - events = capture_events() - - client = InferenceClient(model="https://") - post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded")) - mock_client_post(client, post_mock) - - with pytest.raises(OverloadedError): - client.text_generation(prompt="hello") - - (event,) = events - assert event["level"] == "error" - - -def test_span_origin(sentry_init, capture_events): +def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api): + # type: (Any, Any, Any) -> None sentry_init( - integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0, ) events = capture_events() - client = InferenceClient(model="https://") - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - ] - ) - mock_client_post(client, post_mock) - - with start_transaction(name="huggingface_hub tx"): - list( - client.text_generation( - prompt="hello", - stream=True, - ) - ) + client = InferenceClient(model="test-model") - (event,) = events + # Create a chat-style prompt using text generation + chat_prompt = "Human: Hello\nAssistant:" - assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub" + with sentry_sdk.start_transaction(name="test_chat_style_tx"): + client.chat_completion( + model="test-model", + messages=[{"role": "user", "content": chat_prompt}], + ) + + tx = events[0] + span = tx["spans"][0] + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["data"] == { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.model": "test-model-123", + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } diff --git a/tox.ini b/tox.ini index 335007664a..01f86be521 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ # The file (and all resulting CI YAMLs) then need to be regenerated via # "scripts/generate-test-files.sh". # -# Last generated: 2025-09-05T07:52:27.350774+00:00 +# Last generated: 2025-09-08T07:12:48.167820+00:00 [tox] requires = @@ -128,8 +128,8 @@ envlist = {py3.8,py3.11,py3.12}-openai-notiktoken-v1.71.0 {py3.8,py3.12,py3.13}-openai-notiktoken-v1.106.1 - {py3.9,py3.12,py3.13}-langgraph-v0.6.6 - {py3.10,py3.12,py3.13}-langgraph-v1.0.0a2 + {py3.9,py3.12,py3.13}-langgraph-v0.6.7 + {py3.10,py3.12,py3.13}-langgraph-v1.0.0a3 {py3.10,py3.11,py3.12}-openai_agents-v0.0.19 {py3.10,py3.12,py3.13}-openai_agents-v0.1.0 @@ -146,7 +146,7 @@ envlist = {py3.6,py3.7}-boto3-v1.12.49 {py3.6,py3.9,py3.10}-boto3-v1.20.54 {py3.7,py3.11,py3.12}-boto3-v1.28.85 - {py3.9,py3.12,py3.13}-boto3-v1.40.24 + {py3.9,py3.12,py3.13}-boto3-v1.40.25 {py3.6,py3.7,py3.8}-chalice-v1.16.0 {py3.6,py3.7,py3.8}-chalice-v1.21.9 @@ -205,7 +205,7 @@ envlist = {py3.6,py3.9,py3.10}-gql-v3.4.1 {py3.7,py3.11,py3.12}-gql-v3.5.3 {py3.9,py3.12,py3.13}-gql-v4.0.0 - {py3.9,py3.12,py3.13}-gql-v4.1.0b0 + {py3.9,py3.12,py3.13}-gql-v4.2.0b0 {py3.6,py3.9,py3.10}-graphene-v3.3 {py3.8,py3.12,py3.13}-graphene-v3.4.3 @@ -213,7 +213,7 @@ envlist = {py3.8,py3.10,py3.11}-strawberry-v0.209.8 {py3.8,py3.11,py3.12}-strawberry-v0.233.3 {py3.9,py3.12,py3.13}-strawberry-v0.257.0 - {py3.9,py3.12,py3.13}-strawberry-v0.281.0 + {py3.9,py3.12,py3.13}-strawberry-v0.282.0 # ~~~ Network ~~~ @@ -251,7 +251,7 @@ envlist = {py3.8,py3.9}-spark-v3.0.3 {py3.8,py3.10,py3.11}-spark-v3.5.6 - {py3.9,py3.12,py3.13}-spark-v4.0.0 + {py3.9,py3.12,py3.13}-spark-v4.0.1 # ~~~ Web 1 ~~~ @@ -325,7 +325,7 @@ envlist = {py3.7,py3.12,py3.13}-typer-v0.15.4 {py3.7,py3.12,py3.13}-typer-v0.16.1 - {py3.7,py3.12,py3.13}-typer-v0.17.3 + {py3.7,py3.12,py3.13}-typer-v0.17.4 @@ -515,8 +515,8 @@ deps = openai-notiktoken-v1.0.1: httpx<0.28 openai-notiktoken-v1.36.1: httpx<0.28 - langgraph-v0.6.6: langgraph==0.6.6 - langgraph-v1.0.0a2: langgraph==1.0.0a2 + langgraph-v0.6.7: langgraph==0.6.7 + langgraph-v1.0.0a3: langgraph==1.0.0a3 openai_agents-v0.0.19: openai-agents==0.0.19 openai_agents-v0.1.0: openai-agents==0.1.0 @@ -528,13 +528,14 @@ deps = huggingface_hub-v0.30.2: huggingface_hub==0.30.2 huggingface_hub-v0.34.4: huggingface_hub==0.34.4 huggingface_hub-v0.35.0rc0: huggingface_hub==0.35.0rc0 + huggingface_hub: responses # ~~~ Cloud ~~~ boto3-v1.12.49: boto3==1.12.49 boto3-v1.20.54: boto3==1.20.54 boto3-v1.28.85: boto3==1.28.85 - boto3-v1.40.24: boto3==1.40.24 + boto3-v1.40.25: boto3==1.40.25 {py3.7,py3.8}-boto3: urllib3<2.0.0 chalice-v1.16.0: chalice==1.16.0 @@ -601,7 +602,7 @@ deps = gql-v3.4.1: gql[all]==3.4.1 gql-v3.5.3: gql[all]==3.5.3 gql-v4.0.0: gql[all]==4.0.0 - gql-v4.1.0b0: gql[all]==4.1.0b0 + gql-v4.2.0b0: gql[all]==4.2.0b0 graphene-v3.3: graphene==3.3 graphene-v3.4.3: graphene==3.4.3 @@ -614,7 +615,7 @@ deps = strawberry-v0.209.8: strawberry-graphql[fastapi,flask]==0.209.8 strawberry-v0.233.3: strawberry-graphql[fastapi,flask]==0.233.3 strawberry-v0.257.0: strawberry-graphql[fastapi,flask]==0.257.0 - strawberry-v0.281.0: strawberry-graphql[fastapi,flask]==0.281.0 + strawberry-v0.282.0: strawberry-graphql[fastapi,flask]==0.282.0 strawberry: httpx strawberry-v0.209.8: pydantic<2.11 strawberry-v0.233.3: pydantic<2.11 @@ -667,7 +668,7 @@ deps = spark-v3.0.3: pyspark==3.0.3 spark-v3.5.6: pyspark==3.5.6 - spark-v4.0.0: pyspark==4.0.0 + spark-v4.0.1: pyspark==4.0.1 # ~~~ Web 1 ~~~ @@ -810,7 +811,7 @@ deps = typer-v0.15.4: typer==0.15.4 typer-v0.16.1: typer==0.16.1 - typer-v0.17.3: typer==0.17.3 + typer-v0.17.4: typer==0.17.4 From ddc622a98ec465b7eab380b5b461ce77fcd2b8b9 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 12:30:04 +0200 Subject: [PATCH 13/43] cleanup --- .../huggingface_hub/test_huggingface_hub.py | 40 +++++++++---------- 1 file changed, 19 insertions(+), 21 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 192b1eead2..48abe1aaf4 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -117,45 +117,43 @@ def mock_hf_chat_completion_api(): def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api): # type: (Any, Any, Any) -> None - sentry_init( - traces_sample_rate=1.0, - ) + sentry_init(traces_sample_rate=1.0) events = capture_events() client = InferenceClient(model="test-model") - with sentry_sdk.start_transaction(name="test_tx"): - response = client.text_generation(prompt="Hello") + with sentry_sdk.start_transaction(name="test"): + client.text_generation(prompt="Hello") - # Verify the response - assert response == "Mocked response" + (transaction,) = events + (span,) = transaction["spans"] - # Verify Sentry integration worked - tx = events[0] - span = tx["spans"][0] assert span["op"] == "gen_ai.generate_text" + assert span["description"] == "generate_text test-model" + assert span["data"] == { + "gen_ai.operation.name": "generate_text", + "gen_ai.request.model": "test-model", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api): # type: (Any, Any, Any) -> None - sentry_init( - traces_sample_rate=1.0, - ) + sentry_init(traces_sample_rate=1.0) events = capture_events() - client = InferenceClient(model="test-model") - - # Create a chat-style prompt using text generation - chat_prompt = "Human: Hello\nAssistant:" + client = InferenceClient() - with sentry_sdk.start_transaction(name="test_chat_style_tx"): + with sentry_sdk.start_transaction(name="test"): client.chat_completion( model="test-model", - messages=[{"role": "user", "content": chat_prompt}], + messages=[{"role": "user", "content": "Hello!"}], ) - tx = events[0] - span = tx["spans"][0] + (transaction,) = events + (span,) = transaction["spans"] + assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" assert span["data"] == { From b9a63da0869877cd1df71416d1c7bed87110ca22 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 13:35:56 +0200 Subject: [PATCH 14/43] asserts --- .../huggingface_hub/test_huggingface_hub.py | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 48abe1aaf4..6db5ac86a4 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -41,17 +41,15 @@ def mock_hf_text_generation_api(): rsps.add( responses.POST, f"https://router.huggingface.co/hf-inference/models/{model_name}", - json=[ - { - "generated_text": "Mocked response", - "details": { - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [], - "tokens": [], - }, - } - ], + json={ + "generated_text": "Mocked response", + "details": { + "finish_reason": "length", + "generated_tokens": 10, + "prefill": [], + "tokens": [], + }, + }, status=200, ) @@ -88,9 +86,9 @@ def mock_hf_chat_completion_api(): responses.POST, f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions", json={ - "id": f"{model_name}-123", + "id": "xyz-123", "created": 1234567890, - "model": "test-model-123", + "model": f"{model_name}-123", "system_fingerprint": "fp_123", "choices": [ { @@ -100,7 +98,7 @@ def mock_hf_chat_completion_api(): "role": "assistant", "content": "Hello! How can I help you today?", }, - "logprobs": None, + # "logprobs": None, } ], "usage": { @@ -120,10 +118,16 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap sentry_init(traces_sample_rate=1.0) events = capture_events() - client = InferenceClient(model="test-model") + client = InferenceClient( + model="test-model", + ) with sentry_sdk.start_transaction(name="test"): - client.text_generation(prompt="Hello") + client.text_generation( + prompt="Hello", + stream=False, + details=True, + ) (transaction,) = events (span,) = transaction["spans"] @@ -133,6 +137,9 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap assert span["data"] == { "gen_ai.operation.name": "generate_text", "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": False, + "gen_ai.usage.total_tokens": 10, "thread.id": mock.ANY, "thread.name": mock.ANY, } @@ -143,12 +150,14 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap sentry_init(traces_sample_rate=1.0) events = capture_events() - client = InferenceClient() + client = InferenceClient( + model="test-model", + ) with sentry_sdk.start_transaction(name="test"): client.chat_completion( - model="test-model", messages=[{"role": "user", "content": "Hello!"}], + stream=False, ) (transaction,) = events @@ -160,6 +169,7 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap "gen_ai.operation.name": "chat", "gen_ai.request.model": "test-model", "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": False, "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 8, "gen_ai.usage.total_tokens": 18, From 881d74b8981f102eec6916b07ab13a0e8cea31eb Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 13:40:44 +0200 Subject: [PATCH 15/43] finish reason --- sentry_sdk/integrations/huggingface_hub.py | 14 ++++++++++---- .../huggingface_hub/test_huggingface_hub.py | 2 +- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 618af608c5..e1e7a2058e 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -130,12 +130,18 @@ def new_huggingface_task(*args, **kwargs): if model: span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) + finish_reason = None if hasattr(res, "details") and res.details is not None: finish_reason = getattr(res.details, "finish_reason", None) - if finish_reason: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason - ) + + if finish_reason is None: + try: + finish_reason = res.choices[0].finish_reason + except Exception: + pass + + if finish_reason: + span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason) try: tool_calls = res.choices[0].message.tool_calls diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 6db5ac86a4..2355a40e3e 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -98,7 +98,6 @@ def mock_hf_chat_completion_api(): "role": "assistant", "content": "Hello! How can I help you today?", }, - # "logprobs": None, } ], "usage": { @@ -168,6 +167,7 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap assert span["data"] == { "gen_ai.operation.name": "chat", "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", "gen_ai.response.model": "test-model-123", "gen_ai.response.streaming": False, "gen_ai.usage.input_tokens": 10, From fa2cb56586d29665b08d850ed47ad09b022b33e8 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 14:03:08 +0200 Subject: [PATCH 16/43] asset --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 2355a40e3e..1b17a1f009 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -142,6 +142,8 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap "thread.id": mock.ANY, "thread.name": mock.ANY, } + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api): From 893d1ffee471f686463608e3bc84c9ef84566f41 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 14:25:43 +0200 Subject: [PATCH 17/43] old versions --- .../huggingface_hub/test_huggingface_hub.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 1b17a1f009..7c4f61128c 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -5,6 +5,7 @@ from huggingface_hub import InferenceClient import sentry_sdk +from sentry_sdk.utils import package_version from typing import TYPE_CHECKING @@ -12,6 +13,18 @@ from typing import Any +HF_VERSION = package_version("huggingface-hub") + +if HF_VERSION and HF_VERSION < (0, 30, 0): + MODEL_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}" + INFERENCE_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}" +else: + MODEL_ENDPOINT = "https://huggingface.co/api/models/{model_name}" + INFERENCE_ENDPOINT = ( + "https://router.huggingface.co/hf-inference/models/{model_name}" + ) + + @pytest.fixture def mock_hf_text_generation_api(): # type: () -> Any @@ -22,7 +35,7 @@ def mock_hf_text_generation_api(): # Mock model info endpoint rsps.add( responses.GET, - f"https://huggingface.co/api/models/{model_name}", + MODEL_ENDPOINT.format(model_name=model_name), json={ "id": model_name, "pipeline_tag": "text-generation", @@ -40,7 +53,7 @@ def mock_hf_text_generation_api(): # Mock text generation endpoint rsps.add( responses.POST, - f"https://router.huggingface.co/hf-inference/models/{model_name}", + INFERENCE_ENDPOINT.format(model_name=model_name), json={ "generated_text": "Mocked response", "details": { @@ -66,7 +79,7 @@ def mock_hf_chat_completion_api(): # Mock model info endpoint rsps.add( responses.GET, - f"https://huggingface.co/api/models/{model_name}", + MODEL_ENDPOINT.format(model_name=model_name), json={ "id": model_name, "pipeline_tag": "conversational", @@ -84,7 +97,7 @@ def mock_hf_chat_completion_api(): # Mock chat completion endpoint rsps.add( responses.POST, - f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions", + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", json={ "id": "xyz-123", "created": 1234567890, From 44d0a9e8fa1fc0831119024ec2368bc40250124e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 14:42:51 +0200 Subject: [PATCH 18/43] bump --- sentry_sdk/integrations/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index 7f202221a7..af5db856b2 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -141,7 +141,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "gql": (3, 4, 1), "graphene": (3, 3), "grpc": (1, 32, 0), # grpcio - "huggingface_hub": (0, 22), + "huggingface_hub": (0, 23), "langchain": (0, 1, 0), "langgraph": (0, 6, 6), "launchdarkly": (9, 8, 0), From 3855f56653fc21d2959d368af41933479912f5eb Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 14:55:22 +0200 Subject: [PATCH 19/43] bump --- sentry_sdk/integrations/__init__.py | 2 +- tox.ini | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py index af5db856b2..2f5a1f397e 100644 --- a/sentry_sdk/integrations/__init__.py +++ b/sentry_sdk/integrations/__init__.py @@ -141,7 +141,7 @@ def iter_default_integrations(with_auto_enabling_integrations): "gql": (3, 4, 1), "graphene": (3, 3), "grpc": (1, 32, 0), # grpcio - "huggingface_hub": (0, 23), + "huggingface_hub": (0, 24, 7), "langchain": (0, 1, 0), "langgraph": (0, 6, 6), "launchdarkly": (9, 8, 0), diff --git a/tox.ini b/tox.ini index 326bdc431e..6026278fb9 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ # The file (and all resulting CI YAMLs) then need to be regenerated via # "scripts/generate-test-files.sh". # -# Last generated: 2025-09-08T11:35:09.849536+00:00 +# Last generated: 2025-09-08T12:54:55.709539+00:00 [tox] requires = @@ -130,8 +130,8 @@ envlist = {py3.10,py3.12,py3.13}-openai_agents-v0.1.0 {py3.10,py3.12,py3.13}-openai_agents-v0.2.11 - {py3.8,py3.10,py3.11}-huggingface_hub-v0.22.2 - {py3.8,py3.11,py3.12}-huggingface_hub-v0.26.5 + {py3.8,py3.10,py3.11}-huggingface_hub-v0.24.7 + {py3.8,py3.12,py3.13}-huggingface_hub-v0.27.1 {py3.8,py3.12,py3.13}-huggingface_hub-v0.30.2 {py3.8,py3.12,py3.13}-huggingface_hub-v0.34.4 {py3.8,py3.12,py3.13}-huggingface_hub-v0.35.0rc0 @@ -509,8 +509,8 @@ deps = openai_agents-v0.2.11: openai-agents==0.2.11 openai_agents: pytest-asyncio - huggingface_hub-v0.22.2: huggingface_hub==0.22.2 - huggingface_hub-v0.26.5: huggingface_hub==0.26.5 + huggingface_hub-v0.24.7: huggingface_hub==0.24.7 + huggingface_hub-v0.27.1: huggingface_hub==0.27.1 huggingface_hub-v0.30.2: huggingface_hub==0.30.2 huggingface_hub-v0.34.4: huggingface_hub==0.34.4 huggingface_hub-v0.35.0rc0: huggingface_hub==0.35.0rc0 From e45a9c7f8b88d8a731b31d168eabd6bfd11699c6 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 15:14:26 +0200 Subject: [PATCH 20/43] pii testing --- .../huggingface_hub/test_huggingface_hub.py | 53 +++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 7c4f61128c..dfa423371a 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -55,7 +55,7 @@ def mock_hf_text_generation_api(): responses.POST, INFERENCE_ENDPOINT.format(model_name=model_name), json={ - "generated_text": "Mocked response", + "generated_text": "[mocked] Hello! How can i help you?", "details": { "finish_reason": "length", "generated_tokens": 10, @@ -109,7 +109,7 @@ def mock_hf_chat_completion_api(): "finish_reason": "stop", "message": { "role": "assistant", - "content": "Hello! How can I help you today?", + "content": "[mocked] Hello! How can I help you today?", }, } ], @@ -125,9 +125,12 @@ def mock_hf_chat_completion_api(): yield rsps -def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api): - # type: (Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0) +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_text_generation( + sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api +): + # type: (Any, Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() client = InferenceClient( @@ -146,7 +149,8 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap assert span["op"] == "gen_ai.generate_text" assert span["description"] == "generate_text test-model" - assert span["data"] == { + + expected_data = { "gen_ai.operation.name": "generate_text", "gen_ai.request.model": "test-model", "gen_ai.response.finish_reasons": "length", @@ -155,13 +159,27 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap "thread.id": mock.ANY, "thread.name": mock.ANY, } + + if send_default_pii: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?" + + if not send_default_pii: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + # text generation does not set the response model assert "gen_ai.response.model" not in span["data"] -def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api): - # type: (Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0) +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_chat_completion( + sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api +): + # type: (Any, Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() client = InferenceClient( @@ -179,7 +197,8 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" - assert span["data"] == { + + expected_data = { "gen_ai.operation.name": "chat", "gen_ai.request.model": "test-model", "gen_ai.response.finish_reasons": "stop", @@ -191,3 +210,17 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap "thread.id": mock.ANY, "thread.name": mock.ANY, } + + if send_default_pii: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = ( + "[mocked] Hello! How can I help you today?" + ) + + if not send_default_pii: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data From cce5d8512781bcea8235d0441188ed34379c3add Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 17:34:12 +0200 Subject: [PATCH 21/43] streaming text genreation response --- sentry_sdk/integrations/huggingface_hub.py | 9 ++ .../huggingface_hub/test_huggingface_hub.py | 93 +++++++++++++++++++ 2 files changed, 102 insertions(+) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index e1e7a2058e..214dc21210 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -230,7 +230,16 @@ def new_details_iterator(): x.details, "generated_tokens" ): tokens_used = x.details.generated_tokens + if hasattr(x, "details") and hasattr( + x.details, "finish_reason" + ): + span.set_data( + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + x.details.finish_reason, + ) + yield x + if ( len(data_buf) > 0 and should_send_default_pii() diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index dfa423371a..db655b525b 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -69,6 +69,49 @@ def mock_hf_text_generation_api(): yield rsps +@pytest.fixture +def mock_hf_text_generation_api_streaming(): + # type: () -> Any + """Mock streaming HuggingFace text generation API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "text-generation", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "text-generation", + } + }, + }, + status=200, + ) + + # Mock text generation endpoint for streaming + streaming_response = b'data:{"token":{"id":1, "special": false, "text": "the mocked "}}\n\ndata:{"token":{"id":2, "special": false, "text": "model response"}, "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}}\n\n' + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name), + body=streaming_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + yield rsps + + @pytest.fixture def mock_hf_chat_completion_api(): # type: () -> Any @@ -174,6 +217,56 @@ def test_text_generation( assert "gen_ai.response.model" not in span["data"] +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_text_generation_streaming( + sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api_streaming +): + # type: (Any, Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + events = capture_events() + + client = InferenceClient( + model="test-model", + ) + + with sentry_sdk.start_transaction(name="test"): + for _ in client.text_generation( + prompt="Hello", + stream=True, + details=True, + ): + pass + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.generate_text" + assert span["description"] == "generate_text test-model" + + expected_data = { + "gen_ai.operation.name": "generate_text", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "length", + "gen_ai.response.streaming": True, + "gen_ai.usage.total_tokens": 10, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii: + expected_data["gen_ai.request.messages"] = "Hello" + expected_data["gen_ai.response.text"] = "the mocked model response" + + if not send_default_pii: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data + + # text generation does not set the response model + assert "gen_ai.response.model" not in span["data"] + + @pytest.mark.parametrize("send_default_pii", [True, False]) def test_chat_completion( sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api From 07d611c3256b53be531e125eb5c7bf662b92c36d Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 20:10:56 +0200 Subject: [PATCH 22/43] chat completion streaming --- sentry_sdk/integrations/huggingface_hub.py | 51 +++++++-- .../huggingface_hub/test_huggingface_hub.py | 100 ++++++++++++++++++ 2 files changed, 141 insertions(+), 10 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 214dc21210..0eb51904db 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -1,6 +1,5 @@ from functools import wraps - from typing import Any, Iterable, Callable import sentry_sdk @@ -17,7 +16,11 @@ try: import huggingface_hub.inference._client - from huggingface_hub import ChatCompletionOutput, TextGenerationOutput + from huggingface_hub import ( + ChatCompletionOutput, + TextGenerationOutput, + ChatCompletionStreamOutput, + ) except ImportError: raise DidNotEnable("Huggingface not installed") @@ -217,7 +220,7 @@ def new_huggingface_task(*args, **kwargs): return res if kwargs.get("details", False): - + # text-generation stream output def new_details_iterator(): # type: () -> Iterable[Any] with capture_internal_exceptions(): @@ -257,20 +260,47 @@ def new_details_iterator(): span, total_tokens=tokens_used, ) + span.__exit__(None, None, None) return new_details_iterator() else: - # res is Iterable[str] - + # chat-completion stream output def new_iterator(): # type: () -> Iterable[str] - data_buf: list[str] = [] with capture_internal_exceptions(): - for s in res: - if isinstance(s, str): - data_buf.append(s) - yield s + data_buf: list[str] = [] + for chunk in res: + if isinstance(chunk, ChatCompletionStreamOutput): + for choice in chunk.choices: + data_buf.append(choice.delta.content) + + if ( + hasattr(choice, "finish_reason") + and choice.finish_reason is not None + ): + span.set_data( + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + choice.finish_reason, + ) + if hasattr(chunk, "model") and chunk.model is not None: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model + ) + + if hasattr(chunk, "usage") and chunk.usage is not None: + record_token_usage( + span, + input_tokens=chunk.usage.prompt_tokens, + output_tokens=chunk.usage.completion_tokens, + total_tokens=chunk.usage.total_tokens, + ) + + elif isinstance(chunk, str): + data_buf.append(chunk) + + yield chunk + if ( len(data_buf) > 0 and should_send_default_pii() @@ -283,6 +313,7 @@ def new_iterator(): SPANDATA.GEN_AI_RESPONSE_TEXT, text_response, ) + span.__exit__(None, None, None) return new_iterator() diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index db655b525b..02a9a32b3d 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -168,6 +168,53 @@ def mock_hf_chat_completion_api(): yield rsps +@pytest.fixture +def mock_hf_chat_completion_api_streaming(): + # type: () -> Any + """Mock streaming HuggingFace chat completion API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion streaming endpoint + streaming_chat_response = ( + b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"the mocked "},"index":0,"finish_reason":null}],"usage":null}\n\n' + b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"model response"},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n' + # b'data:[DONE]\n\n' + ) + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + body=streaming_chat_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + yield rsps + + @pytest.mark.parametrize("send_default_pii", [True, False]) def test_text_generation( sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api @@ -317,3 +364,56 @@ def test_chat_completion( assert "gen_ai.response.text" not in expected_data assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_chat_completion_streaming( + sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_streaming +): + # type: (Any, Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + events = capture_events() + + client = InferenceClient( + model="test-model", + ) + + with sentry_sdk.start_transaction(name="test"): + response = client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=True, + ) + + for x in response: + print(x) + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "stop", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "gen_ai.usage.input_tokens": 183, + "gen_ai.usage.output_tokens": 14, + "gen_ai.usage.total_tokens": 197, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "Hello!"}]' + ) + expected_data["gen_ai.response.text"] = "the mocked model response" + + if not send_default_pii: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data From 8d54b811764be00fb033b628fe3da182cc4789aa Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 20:12:02 +0200 Subject: [PATCH 23/43] cleanup --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 02a9a32b3d..1a79f569d1 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -197,7 +197,6 @@ def mock_hf_chat_completion_api_streaming(): streaming_chat_response = ( b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"the mocked "},"index":0,"finish_reason":null}],"usage":null}\n\n' b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"model response"},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n' - # b'data:[DONE]\n\n' ) rsps.add( From d8c89a37cefd7b0cca22bc3364ce4c828cdc01fc Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 20:23:44 +0200 Subject: [PATCH 24/43] make it work with older huggingface sdk --- .../integrations/huggingface_hub/test_huggingface_hub.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 1a79f569d1..7131459bfd 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -398,12 +398,14 @@ def test_chat_completion_streaming( "gen_ai.response.finish_reasons": "stop", "gen_ai.response.model": "test-model-123", "gen_ai.response.streaming": True, - "gen_ai.usage.input_tokens": 183, - "gen_ai.usage.output_tokens": 14, - "gen_ai.usage.total_tokens": 197, "thread.id": mock.ANY, "thread.name": mock.ANY, } + # usage is not available in older versions of the library + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = (183,) + expected_data["gen_ai.usage.output_tokens"] = (14,) + expected_data["gen_ai.usage.total_tokens"] = (197,) if send_default_pii: expected_data["gen_ai.request.messages"] = ( From f4f6d6dbe7e3020b1623f4ebde57c94ed20e866f Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 20:24:00 +0200 Subject: [PATCH 25/43] make it work with older huggingface sdk --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 7131459bfd..8753ab847d 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -403,9 +403,9 @@ def test_chat_completion_streaming( } # usage is not available in older versions of the library if HF_VERSION and HF_VERSION >= (0, 26, 0): - expected_data["gen_ai.usage.input_tokens"] = (183,) - expected_data["gen_ai.usage.output_tokens"] = (14,) - expected_data["gen_ai.usage.total_tokens"] = (197,) + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 if send_default_pii: expected_data["gen_ai.request.messages"] = ( From e5ffe0c8d9cec917eb25dc6257122d67ed78b02a Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 8 Sep 2025 20:51:07 +0200 Subject: [PATCH 26/43] testing for error --- .../huggingface_hub/test_huggingface_hub.py | 101 +++++++++++++++--- 1 file changed, 89 insertions(+), 12 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 8753ab847d..537be1c5b8 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -2,6 +2,7 @@ import pytest import responses +import huggingface_hub from huggingface_hub import InferenceClient import sentry_sdk @@ -69,6 +70,48 @@ def mock_hf_text_generation_api(): yield rsps +@pytest.fixture +def mock_hf_api_with_errors(): + # type: () -> Any + """Mock HuggingFace API that always raises errors for any request""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint with error + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={"error": "Model not found"}, + status=404, + ) + + # Mock text generation endpoint with error + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name), + json={"error": "Internal server error", "message": "Something went wrong"}, + status=500, + ) + + # Mock chat completion endpoint with error + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + json={"error": "Service unavailable", "message": "Chat completion failed"}, + status=503, + ) + + # Catch-all pattern for any other model requests + rsps.add( + responses.GET, + "https://huggingface.co/api/models/test-model-error", + json={"error": "Generic model error"}, + status=500, + ) + + yield rsps + + @pytest.fixture def mock_hf_text_generation_api_streaming(): # type: () -> Any @@ -222,9 +265,7 @@ def test_text_generation( sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() - client = InferenceClient( - model="test-model", - ) + client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): client.text_generation( @@ -271,9 +312,7 @@ def test_text_generation_streaming( sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() - client = InferenceClient( - model="test-model", - ) + client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): for _ in client.text_generation( @@ -321,9 +360,7 @@ def test_chat_completion( sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() - client = InferenceClient( - model="test-model", - ) + client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): client.chat_completion( @@ -373,9 +410,7 @@ def test_chat_completion_streaming( sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) events = capture_events() - client = InferenceClient( - model="test-model", - ) + client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): response = client.chat_completion( @@ -418,3 +453,45 @@ def test_chat_completion_streaming( assert "gen_ai.response.text" not in expected_data assert span["data"] == expected_data + + +def test_chat_completion_api_error( + sentry_init, capture_events, mock_hf_api_with_errors +): + # type: (Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0) + events = capture_events() + + client = InferenceClient(model="test-model") + + with sentry_sdk.start_transaction(name="test"): + with pytest.raises(huggingface_hub.errors.HfHubHTTPError): + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + ) + + ( + error, + transaction, + ) = events + + assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub" + assert not error["exception"]["values"][0]["mechanism"]["handled"] + + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span.get("tags", {}).get("status") == "error" + + assert ( + error["contexts"]["trace"]["trace_id"] + == transaction["contexts"]["trace"]["trace_id"] + ) + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.model": "test-model", + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + assert span["data"] == expected_data From c8e62df5a0dd7b6bfc17963fdc53dbe16aa14ad3 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 10:25:40 +0200 Subject: [PATCH 27/43] fixed hanging test --- .../huggingface_hub/test_huggingface_hub.py | 4 ++-- tox.ini | 14 +++++++------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 537be1c5b8..4b2e9e5711 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -97,8 +97,8 @@ def mock_hf_api_with_errors(): rsps.add( responses.POST, INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", - json={"error": "Service unavailable", "message": "Chat completion failed"}, - status=503, + json={"error": "Internal server error", "message": "Something went wrong"}, + status=500, ) # Catch-all pattern for any other model requests diff --git a/tox.ini b/tox.ini index 6026278fb9..1bc9757b9a 100644 --- a/tox.ini +++ b/tox.ini @@ -10,7 +10,7 @@ # The file (and all resulting CI YAMLs) then need to be regenerated via # "scripts/generate-test-files.sh". # -# Last generated: 2025-09-08T12:54:55.709539+00:00 +# Last generated: 2025-09-09T08:24:12.875177+00:00 [tox] requires = @@ -116,12 +116,12 @@ envlist = {py3.8,py3.11,py3.12}-openai-base-v1.0.1 {py3.8,py3.11,py3.12}-openai-base-v1.36.1 {py3.8,py3.11,py3.12}-openai-base-v1.71.0 - {py3.8,py3.12,py3.13}-openai-base-v1.106.1 + {py3.8,py3.12,py3.13}-openai-base-v1.107.0 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.0.1 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.36.1 {py3.8,py3.11,py3.12}-openai-notiktoken-v1.71.0 - {py3.8,py3.12,py3.13}-openai-notiktoken-v1.106.1 + {py3.8,py3.12,py3.13}-openai-notiktoken-v1.107.0 {py3.9,py3.12,py3.13}-langgraph-v0.6.7 {py3.10,py3.12,py3.13}-langgraph-v1.0.0a3 @@ -141,7 +141,7 @@ envlist = {py3.6,py3.7}-boto3-v1.12.49 {py3.6,py3.9,py3.10}-boto3-v1.20.54 {py3.7,py3.11,py3.12}-boto3-v1.28.85 - {py3.9,py3.12,py3.13}-boto3-v1.40.25 + {py3.9,py3.12,py3.13}-boto3-v1.40.26 {py3.6,py3.7,py3.8}-chalice-v1.16.0 {py3.6,py3.7,py3.8}-chalice-v1.21.9 @@ -487,7 +487,7 @@ deps = openai-base-v1.0.1: openai==1.0.1 openai-base-v1.36.1: openai==1.36.1 openai-base-v1.71.0: openai==1.71.0 - openai-base-v1.106.1: openai==1.106.1 + openai-base-v1.107.0: openai==1.107.0 openai-base: pytest-asyncio openai-base: tiktoken openai-base-v1.0.1: httpx<0.28 @@ -496,7 +496,7 @@ deps = openai-notiktoken-v1.0.1: openai==1.0.1 openai-notiktoken-v1.36.1: openai==1.36.1 openai-notiktoken-v1.71.0: openai==1.71.0 - openai-notiktoken-v1.106.1: openai==1.106.1 + openai-notiktoken-v1.107.0: openai==1.107.0 openai-notiktoken: pytest-asyncio openai-notiktoken-v1.0.1: httpx<0.28 openai-notiktoken-v1.36.1: httpx<0.28 @@ -521,7 +521,7 @@ deps = boto3-v1.12.49: boto3==1.12.49 boto3-v1.20.54: boto3==1.20.54 boto3-v1.28.85: boto3==1.28.85 - boto3-v1.40.25: boto3==1.40.25 + boto3-v1.40.26: boto3==1.40.26 {py3.7,py3.8}-boto3: urllib3<2.0.0 chalice-v1.16.0: chalice==1.16.0 From 06f3746e5a08bc8a39609edd509b5b33381610b0 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 10:38:45 +0200 Subject: [PATCH 28/43] fix test --- .../integrations/huggingface_hub/test_huggingface_hub.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 4b2e9e5711..dd0f5ad333 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -2,7 +2,6 @@ import pytest import responses -import huggingface_hub from huggingface_hub import InferenceClient import sentry_sdk @@ -10,6 +9,12 @@ from typing import TYPE_CHECKING +try: + from huggingface_hub.utils._errors import HfHubHTTPError +except ImportError: + from huggingface_hub.errors import HfHubHTTPError + + if TYPE_CHECKING: from typing import Any @@ -465,7 +470,7 @@ def test_chat_completion_api_error( client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): - with pytest.raises(huggingface_hub.errors.HfHubHTTPError): + with pytest.raises(HfHubHTTPError): client.chat_completion( messages=[{"role": "user", "content": "Hello!"}], ) From 711cf515301bffeacf460a5276ec07929b8b2f0e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 11:13:59 +0200 Subject: [PATCH 29/43] Tool calls test --- .../huggingface_hub/test_huggingface_hub.py | 133 ++++++++++++++++++ 1 file changed, 133 insertions(+) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index dd0f5ad333..961196dbda 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -216,6 +216,71 @@ def mock_hf_chat_completion_api(): yield rsps +@pytest.fixture +def mock_hf_chat_completion_api_tools(): + # type: () -> Any + """Mock HuggingFace chat completion API""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion endpoint + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + json={ + "id": "xyz-123", + "created": 1234567890, + "model": f"{model_name}-123", + "system_fingerprint": "fp_123", + "choices": [ + { + "index": 0, + "finish_reason": "tool_calls", + "message": { + "role": "assistant", + "tool_calls": [ + { + "id": "call_123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": {"location": "Paris"}, + }, + } + ], + }, + } + ], + "usage": { + "completion_tokens": 8, + "prompt_tokens": 10, + "total_tokens": 18, + }, + }, + status=200, + ) + + yield rsps + + @pytest.fixture def mock_hf_chat_completion_api_streaming(): # type: () -> Any @@ -500,3 +565,71 @@ def test_chat_completion_api_error( "thread.name": mock.ANY, } assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +def test_chat_completion_with_tools( + sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_tools +): + # type: (Any, Any, Any, Any) -> None + sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + events = capture_events() + + client = InferenceClient(model="test-model") + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + with sentry_sdk.start_transaction(name="test"): + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + tools=tools, + tool_choice="auto", + ) + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": ' + '"get_weather", "description": "Get current ' + 'weather", "parameters": {"type": "object", ' + '"properties": {"location": {"type": ' + '"string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.tool_calls": '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]', + "gen_ai.usage.input_tokens": 10, + "gen_ai.usage.output_tokens": 8, + "gen_ai.usage.total_tokens": 18, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + + if not send_default_pii: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + + assert span["data"] == expected_data From 550234004f9781fb40519f996747bb56f8a8762e Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 11:14:35 +0200 Subject: [PATCH 30/43] Tool calls test --- tests/integrations/huggingface_hub/test_huggingface_hub.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 961196dbda..74650a20b4 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -607,11 +607,7 @@ def test_chat_completion_with_tools( expected_data = { "gen_ai.operation.name": "chat", - "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": ' - '"get_weather", "description": "Get current ' - 'weather", "parameters": {"type": "object", ' - '"properties": {"location": {"type": ' - '"string"}}, "required": ["location"]}}}]', + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', "gen_ai.request.model": "test-model", "gen_ai.response.finish_reasons": "tool_calls", "gen_ai.response.model": "test-model-123", From 00c9727f2b8c8531c0267e0ffca0d8dc7af5a136 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 11:26:47 +0200 Subject: [PATCH 31/43] Some test improvements --- .../integrations/huggingface_hub/test_huggingface_hub.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 74650a20b4..91f9e7e892 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -339,7 +339,7 @@ def test_text_generation( with sentry_sdk.start_transaction(name="test"): client.text_generation( - prompt="Hello", + "Hello", stream=False, details=True, ) @@ -349,6 +349,7 @@ def test_text_generation( assert span["op"] == "gen_ai.generate_text" assert span["description"] == "generate_text test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "generate_text", @@ -397,6 +398,7 @@ def test_text_generation_streaming( assert span["op"] == "gen_ai.generate_text" assert span["description"] == "generate_text test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "generate_text", @@ -443,6 +445,7 @@ def test_chat_completion( assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -496,6 +499,7 @@ def test_chat_completion_streaming( assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", @@ -552,6 +556,7 @@ def test_chat_completion_api_error( assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" assert span.get("tags", {}).get("status") == "error" assert ( @@ -604,6 +609,7 @@ def test_chat_completion_with_tools( assert span["op"] == "gen_ai.chat" assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" expected_data = { "gen_ai.operation.name": "chat", From 81cbbeb478e6c412de375d8e00a73643189bc591 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 11:47:16 +0200 Subject: [PATCH 32/43] tools improvements --- sentry_sdk/integrations/huggingface_hub.py | 72 ++++++++++++------- .../huggingface_hub/test_huggingface_hub.py | 4 +- 2 files changed, 50 insertions(+), 26 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 0eb51904db..413c3c05b4 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -146,24 +146,24 @@ def new_huggingface_task(*args, **kwargs): if finish_reason: span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason) - try: - tool_calls = res.choices[0].message.tool_calls - except Exception: - tool_calls = [] - - if tool_calls is not None and len(tool_calls) > 0: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, - tool_calls, - unpack=False, - ) - if should_send_default_pii() and integration.include_prompts: set_data_normalized( span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False ) + try: + tool_calls = res.choices[0].message.tool_calls + except Exception: + tool_calls = [] + + if tool_calls is not None and len(tool_calls) > 0: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, + ) + if isinstance(res, str): if should_send_default_pii() and integration.include_prompts: if res: @@ -226,22 +226,22 @@ def new_details_iterator(): with capture_internal_exceptions(): tokens_used = 0 data_buf: list[str] = [] - for x in res: - if hasattr(x, "token") and hasattr(x.token, "text"): - data_buf.append(x.token.text) - if hasattr(x, "details") and hasattr( - x.details, "generated_tokens" + for chunk in res: + if hasattr(chunk, "token") and hasattr(chunk.token, "text"): + data_buf.append(chunk.token.text) + if hasattr(chunk, "details") and hasattr( + chunk.details, "generated_tokens" ): - tokens_used = x.details.generated_tokens - if hasattr(x, "details") and hasattr( - x.details, "finish_reason" + tokens_used = chunk.details.generated_tokens + if hasattr(chunk, "details") and hasattr( + chunk.details, "finish_reason" ): span.set_data( SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, - x.details.finish_reason, + chunk.details.finish_reason, ) - yield x + yield chunk if ( len(data_buf) > 0 @@ -273,7 +273,12 @@ def new_iterator(): for chunk in res: if isinstance(chunk, ChatCompletionStreamOutput): for choice in chunk.choices: - data_buf.append(choice.delta.content) + if ( + hasattr(choice, "delta") + and hasattr(choice.delta, "content") + and choice.delta.content is not None + ): + data_buf.append(choice.delta.content) if ( hasattr(choice, "finish_reason") @@ -283,6 +288,22 @@ def new_iterator(): SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, choice.finish_reason, ) + if ( + hasattr(choice, "delta") + and hasattr(choice.delta, "tool_calls") + and choice.delta.tool_calls is not None + ): + if ( + should_send_default_pii() + and integration.include_prompts + ): + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + choice.delta.tool_calls, + unpack=False, + ) + if hasattr(chunk, "model") and chunk.model is not None: span.set_data( SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model @@ -297,7 +318,8 @@ def new_iterator(): ) elif isinstance(chunk, str): - data_buf.append(chunk) + if chunk is not None: + data_buf.append(chunk) yield chunk diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 91f9e7e892..d345791b7f 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -617,7 +617,6 @@ def test_chat_completion_with_tools( "gen_ai.request.model": "test-model", "gen_ai.response.finish_reasons": "tool_calls", "gen_ai.response.model": "test-model-123", - "gen_ai.response.tool_calls": '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]', "gen_ai.usage.input_tokens": 10, "gen_ai.usage.output_tokens": 8, "gen_ai.usage.total_tokens": 18, @@ -629,6 +628,9 @@ def test_chat_completion_with_tools( expected_data["gen_ai.request.messages"] = ( '[{"role": "user", "content": "What is the weather in Paris?"}]' ) + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' + ) if not send_default_pii: assert "gen_ai.request.messages" not in expected_data From 52b21c586603d34b2e02f00e309dcd3b0c7b4ba8 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 12:06:55 +0200 Subject: [PATCH 33/43] better tests --- .../huggingface_hub/test_huggingface_hub.py | 96 ++++++++++++++----- 1 file changed, 71 insertions(+), 25 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index d345791b7f..b66f3911be 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -6,6 +6,7 @@ import sentry_sdk from sentry_sdk.utils import package_version +from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration from typing import TYPE_CHECKING @@ -328,11 +329,20 @@ def mock_hf_chat_completion_api_streaming(): @pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation( - sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_text_generation_api, ): - # type: (Any, Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) events = capture_events() client = InferenceClient(model="test-model") @@ -361,11 +371,11 @@ def test_text_generation( "thread.name": mock.ANY, } - if send_default_pii: + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = "Hello" expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?" - if not send_default_pii: + if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data @@ -376,11 +386,20 @@ def test_text_generation( @pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation_streaming( - sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api_streaming + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_text_generation_api_streaming, ): - # type: (Any, Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) events = capture_events() client = InferenceClient(model="test-model") @@ -410,11 +429,11 @@ def test_text_generation_streaming( "thread.name": mock.ANY, } - if send_default_pii: + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = "Hello" expected_data["gen_ai.response.text"] = "the mocked model response" - if not send_default_pii: + if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data @@ -425,11 +444,20 @@ def test_text_generation_streaming( @pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion( - sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api, ): - # type: (Any, Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) events = capture_events() client = InferenceClient(model="test-model") @@ -460,7 +488,7 @@ def test_chat_completion( "thread.name": mock.ANY, } - if send_default_pii: + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = ( '[{"role": "user", "content": "Hello!"}]' ) @@ -468,7 +496,7 @@ def test_chat_completion( "[mocked] Hello! How can I help you today?" ) - if not send_default_pii: + if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data @@ -476,11 +504,20 @@ def test_chat_completion( @pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_streaming( - sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_streaming + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming, ): - # type: (Any, Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) events = capture_events() client = InferenceClient(model="test-model") @@ -516,13 +553,13 @@ def test_chat_completion_streaming( expected_data["gen_ai.usage.output_tokens"] = 14 expected_data["gen_ai.usage.total_tokens"] = 197 - if send_default_pii: + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = ( '[{"role": "user", "content": "Hello!"}]' ) expected_data["gen_ai.response.text"] = "the mocked model response" - if not send_default_pii: + if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data @@ -573,11 +610,20 @@ def test_chat_completion_api_error( @pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) def test_chat_completion_with_tools( - sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_tools + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_tools, ): - # type: (Any, Any, Any, Any) -> None - sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii) + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) events = capture_events() client = InferenceClient(model="test-model") @@ -624,7 +670,7 @@ def test_chat_completion_with_tools( "thread.name": mock.ANY, } - if send_default_pii: + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = ( '[{"role": "user", "content": "What is the weather in Paris?"}]' ) @@ -632,7 +678,7 @@ def test_chat_completion_with_tools( '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]' ) - if not send_default_pii: + if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data From bedc050d4f0d41ab7c16ad55f339aba40c51d363 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 13:00:35 +0200 Subject: [PATCH 34/43] more test coverage --- .../huggingface_hub/test_huggingface_hub.py | 132 +++++++++++++++++- 1 file changed, 131 insertions(+), 1 deletion(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index b66f3911be..db4e4c3754 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -220,7 +220,7 @@ def mock_hf_chat_completion_api(): @pytest.fixture def mock_hf_chat_completion_api_tools(): # type: () -> Any - """Mock HuggingFace chat completion API""" + """Mock HuggingFace chat completion API with tool calls.""" with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: model_name = "test-model" @@ -328,6 +328,52 @@ def mock_hf_chat_completion_api_streaming(): yield rsps +@pytest.fixture +def mock_hf_chat_completion_api_streaming_tools(): + # type: () -> Any + """Mock streaming HuggingFace chat completion API with tool calls.""" + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + model_name = "test-model" + + # Mock model info endpoint + rsps.add( + responses.GET, + MODEL_ENDPOINT.format(model_name=model_name), + json={ + "id": model_name, + "pipeline_tag": "conversational", + "inferenceProviderMapping": { + "hf-inference": { + "status": "live", + "providerId": model_name, + "task": "conversational", + } + }, + }, + status=200, + ) + + # Mock chat completion streaming endpoint + streaming_chat_response = ( + b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"response with tool calls follows"},"index":0,"finish_reason":null}],"usage":null}\n\n' + b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","tool_calls": [{"id": "call_123","type": "function","function": {"name": "get_weather", "arguments": {"location": "Paris"}}}]},"index":0,"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n' + ) + + rsps.add( + responses.POST, + INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions", + body=streaming_chat_response, + status=200, + headers={ + "Content-Type": "text/event-stream", + "Cache-Control": "no-cache", + "Connection": "keep-alive", + }, + ) + + yield rsps + + @pytest.mark.parametrize("send_default_pii", [True, False]) @pytest.mark.parametrize("include_prompts", [True, False]) def test_text_generation( @@ -681,5 +727,89 @@ def test_chat_completion_with_tools( if not send_default_pii or not include_prompts: assert "gen_ai.request.messages" not in expected_data assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data + + assert span["data"] == expected_data + + +@pytest.mark.parametrize("send_default_pii", [True, False]) +@pytest.mark.parametrize("include_prompts", [True, False]) +def test_chat_completion_streaming_with_tools( + sentry_init, + capture_events, + send_default_pii, + include_prompts, + mock_hf_chat_completion_api_streaming_tools, +): + # type: (Any, Any, Any, Any, Any) -> None + sentry_init( + traces_sample_rate=1.0, + send_default_pii=send_default_pii, + integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], + ) + events = capture_events() + + client = InferenceClient(model="test-model") + + tools = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + "parameters": { + "type": "object", + "properties": {"location": {"type": "string"}}, + "required": ["location"], + }, + }, + } + ] + + with sentry_sdk.start_transaction(name="test"): + response = client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + stream=True, + tools=tools, + tool_choice="auto", + ) + + for x in response: + print(x) + + (transaction,) = events + (span,) = transaction["spans"] + + assert span["op"] == "gen_ai.chat" + assert span["description"] == "chat test-model" + assert span["origin"] == "auto.ai.huggingface_hub" + + expected_data = { + "gen_ai.operation.name": "chat", + "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]', + "gen_ai.request.model": "test-model", + "gen_ai.response.finish_reasons": "tool_calls", + "gen_ai.response.model": "test-model-123", + "gen_ai.response.streaming": True, + "gen_ai.usage.input_tokens": 183, + "gen_ai.usage.output_tokens": 14, + "gen_ai.usage.total_tokens": 197, + "thread.id": mock.ANY, + "thread.name": mock.ANY, + } + + if send_default_pii and include_prompts: + expected_data["gen_ai.request.messages"] = ( + '[{"role": "user", "content": "What is the weather in Paris?"}]' + ) + expected_data["gen_ai.response.text"] = "response with tool calls follows" + expected_data["gen_ai.response.tool_calls"] = ( + '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]' + ) + + if not send_default_pii or not include_prompts: + assert "gen_ai.request.messages" not in expected_data + assert "gen_ai.response.text" not in expected_data + assert "gen_ai.response.tool_calls" not in expected_data assert span["data"] == expected_data From e8717fdff174f629fe9c6afe0c459166283533a9 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 13:07:12 +0200 Subject: [PATCH 35/43] usage --- .../integrations/huggingface_hub/test_huggingface_hub.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index db4e4c3754..c2fff8f35b 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -791,13 +791,15 @@ def test_chat_completion_streaming_with_tools( "gen_ai.response.finish_reasons": "tool_calls", "gen_ai.response.model": "test-model-123", "gen_ai.response.streaming": True, - "gen_ai.usage.input_tokens": 183, - "gen_ai.usage.output_tokens": 14, - "gen_ai.usage.total_tokens": 197, "thread.id": mock.ANY, "thread.name": mock.ANY, } + if HF_VERSION and HF_VERSION >= (0, 26, 0): + expected_data["gen_ai.usage.input_tokens"] = 183 + expected_data["gen_ai.usage.output_tokens"] = 14 + expected_data["gen_ai.usage.total_tokens"] = 197 + if send_default_pii and include_prompts: expected_data["gen_ai.request.messages"] = ( '[{"role": "user", "content": "What is the weather in Paris?"}]' From c837e14ca6da68701fed1c3a5582e0c2f7c10924 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 13:19:51 +0200 Subject: [PATCH 36/43] delete old tests --- .../old_test_huggingface_hub.py | 185 ------------------ 1 file changed, 185 deletions(-) delete mode 100644 tests/integrations/huggingface_hub/old_test_huggingface_hub.py diff --git a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py deleted file mode 100644 index a9dc450168..0000000000 --- a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py +++ /dev/null @@ -1,185 +0,0 @@ -import itertools -from unittest import mock - -import pytest -from huggingface_hub import ( - InferenceClient, -) -from huggingface_hub.errors import OverloadedError - -from sentry_sdk import start_transaction -from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration - - -def mock_client_post(client, post_mock): - # huggingface-hub==0.28.0 deprecates the `post` method - # so patch `_inner_post` instead - if hasattr(client, "post"): - client.post = post_mock - if hasattr(client, "_inner_post"): - client._inner_post = post_mock - - -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_nonstreaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg -): - sentry_init( - integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - traces_sample_rate=1.0, - send_default_pii=send_default_pii, - ) - events = capture_events() - - client = InferenceClient(model="https://") - - if details_arg: - post_mock = mock.Mock( - return_value=b"""[{ - "generated_text": "the model response", - "details": { - "finish_reason": "length", - "generated_tokens": 10, - "prefill": [], - "tokens": [] - } - }]""" - ) - else: - post_mock = mock.Mock( - return_value=b'[{"generated_text": "the model response"}]' - ) - mock_client_post(client, post_mock) - - with start_transaction(name="huggingface_hub tx"): - response = client.text_generation( - prompt="hello", - details=details_arg, - stream=False, - ) - if details_arg: - assert response.generated_text == "the model response" - else: - assert response == "the model response" - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.generate_text" - - if send_default_pii and include_prompts: - assert "hello" in span["data"]["gen_ai.request.messages"] - assert "the model response" in span["data"]["gen_ai.response.text"] - else: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] - - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 - - -@pytest.mark.parametrize( - "send_default_pii, include_prompts, details_arg", - itertools.product([True, False], repeat=3), -) -def test_streaming_chat_completion( - sentry_init, capture_events, send_default_pii, include_prompts, details_arg -): - sentry_init( - integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)], - traces_sample_rate=1.0, - send_default_pii=send_default_pii, - ) - events = capture_events() - - client = InferenceClient(model="https://") - - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - b"""data:{ - "token":{"id":2, "special": false, "text": "response"}, - "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0} - }""", - ] - ) - mock_client_post(client, post_mock) - - with start_transaction(name="huggingface_hub tx"): - response = list( - client.text_generation( - prompt="hello", - details=details_arg, - stream=True, - ) - ) - assert len(response) == 2 - if details_arg: - assert response[0].token.text + response[1].token.text == "the model response" - else: - assert response[0] + response[1] == "the model response" - - tx = events[0] - assert tx["type"] == "transaction" - span = tx["spans"][0] - assert span["op"] == "gen_ai.generate_text" - - if send_default_pii and include_prompts: - assert "hello" in span["data"]["gen_ai.request.messages"] - assert "the model response" in span["data"]["gen_ai.response.text"] - else: - assert "gen_ai.request.messages" not in span["data"] - assert "gen_ai.response.text" not in span["data"] - - if details_arg: - assert span["data"]["gen_ai.usage.total_tokens"] == 10 - - -def test_bad_chat_completion(sentry_init, capture_events): - sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0) - events = capture_events() - - client = InferenceClient(model="https://") - post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded")) - mock_client_post(client, post_mock) - - with pytest.raises(OverloadedError): - client.text_generation(prompt="hello") - - (event,) = events - assert event["level"] == "error" - - -def test_span_origin(sentry_init, capture_events): - sentry_init( - integrations=[HuggingfaceHubIntegration()], - traces_sample_rate=1.0, - ) - events = capture_events() - - client = InferenceClient(model="https://") - post_mock = mock.Mock( - return_value=[ - b"""data:{ - "token":{"id":1, "special": false, "text": "the model "} - }""", - ] - ) - mock_client_post(client, post_mock) - - with start_transaction(name="huggingface_hub tx"): - list( - client.text_generation( - prompt="hello", - stream=True, - ) - ) - - (event,) = events - - assert event["contexts"]["trace"]["origin"] == "manual" - assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub" From 78a31921a681cb564dace06b884751f85d03a5cc Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 13:58:02 +0200 Subject: [PATCH 37/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 55 +++++++++++++++++----- 1 file changed, 44 insertions(+), 11 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 413c3c05b4..9f37b6f56e 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -1,7 +1,5 @@ from functools import wraps -from typing import Any, Iterable, Callable - import sentry_sdk from sentry_sdk.ai.monitoring import record_token_usage from sentry_sdk.ai.utils import set_data_normalized @@ -13,6 +11,12 @@ event_from_exception, ) +from typing import Iterable, TYPE_CHECKING + +if TYPE_CHECKING: + from typing import Any, Callable + + try: import huggingface_hub.inference._client @@ -100,6 +104,11 @@ def new_huggingface_task(*args, **kwargs): span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model) # Input attributes + if should_send_default_pii() and integration.include_prompts: + set_data_normalized( + span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False + ) + attribute_mapping = { "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY, @@ -110,6 +119,7 @@ def new_huggingface_task(*args, **kwargs): "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K, "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING, } + for attribute, span_attribute in attribute_mapping.items(): value = kwargs.get(attribute, None) if value is not None: @@ -118,21 +128,23 @@ def new_huggingface_task(*args, **kwargs): else: set_data_normalized(span, span_attribute, value, unpack=False) + # LLM Execution try: res = f(*args, **kwargs) except Exception as e: + # Error Handling span.set_status("error") _capture_exception(e) span.__exit__(None, None, None) raise e from None + # Output attributes with capture_internal_exceptions(): - # Output attributes - if hasattr(res, "model"): - model = res.model - if model: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model) + # Response Model + if hasattr(res, "model") and res.model is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) + # Finish Reason finish_reason = None if hasattr(res, "details") and res.details is not None: finish_reason = getattr(res.details, "finish_reason", None) @@ -146,11 +158,9 @@ def new_huggingface_task(*args, **kwargs): if finish_reason: span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason) + # Request Messages if should_send_default_pii() and integration.include_prompts: - set_data_normalized( - span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False - ) - + # Response Tool Calls try: tool_calls = res.choices[0].message.tool_calls except Exception: @@ -164,6 +174,7 @@ def new_huggingface_task(*args, **kwargs): unpack=False, ) + # Response Text if isinstance(res, str): if should_send_default_pii() and integration.include_prompts: if res: @@ -172,10 +183,12 @@ def new_huggingface_task(*args, **kwargs): SPANDATA.GEN_AI_RESPONSE_TEXT, res, ) + span.__exit__(None, None, None) return res if isinstance(res, TextGenerationOutput): + # Response Text if should_send_default_pii() and integration.include_prompts: if res.generated_text: set_data_normalized( @@ -183,15 +196,18 @@ def new_huggingface_task(*args, **kwargs): SPANDATA.GEN_AI_RESPONSE_TEXT, res.generated_text, ) + # Usage if res.details is not None and res.details.generated_tokens > 0: record_token_usage( span, total_tokens=res.details.generated_tokens, ) + span.__exit__(None, None, None) return res if isinstance(res, ChatCompletionOutput): + # Response Text if should_send_default_pii() and integration.include_prompts: text_response = "".join( [ @@ -205,6 +221,7 @@ def new_huggingface_task(*args, **kwargs): SPANDATA.GEN_AI_RESPONSE_TEXT, text_response, ) + # Usage if hasattr(res, "usage") and res.usage is not None: record_token_usage( span, @@ -212,6 +229,7 @@ def new_huggingface_task(*args, **kwargs): output_tokens=res.usage.completion_tokens, total_tokens=res.usage.total_tokens, ) + span.__exit__(None, None, None) return res @@ -226,13 +244,18 @@ def new_details_iterator(): with capture_internal_exceptions(): tokens_used = 0 data_buf: list[str] = [] + for chunk in res: if hasattr(chunk, "token") and hasattr(chunk.token, "text"): data_buf.append(chunk.token.text) + + # Usage if hasattr(chunk, "details") and hasattr( chunk.details, "generated_tokens" ): tokens_used = chunk.details.generated_tokens + + # Finish Reason if hasattr(chunk, "details") and hasattr( chunk.details, "finish_reason" ): @@ -243,6 +266,7 @@ def new_details_iterator(): yield chunk + # Response Text if ( len(data_buf) > 0 and should_send_default_pii() @@ -255,6 +279,7 @@ def new_details_iterator(): SPANDATA.GEN_AI_RESPONSE_TEXT, text_response, ) + # Usage if tokens_used > 0: record_token_usage( span, @@ -264,12 +289,14 @@ def new_details_iterator(): span.__exit__(None, None, None) return new_details_iterator() + else: # chat-completion stream output def new_iterator(): # type: () -> Iterable[str] with capture_internal_exceptions(): data_buf: list[str] = [] + for chunk in res: if isinstance(chunk, ChatCompletionStreamOutput): for choice in chunk.choices: @@ -280,6 +307,7 @@ def new_iterator(): ): data_buf.append(choice.delta.content) + # Finish Reason if ( hasattr(choice, "finish_reason") and choice.finish_reason is not None @@ -288,6 +316,8 @@ def new_iterator(): SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, choice.finish_reason, ) + + # Response Tool Calls if ( hasattr(choice, "delta") and hasattr(choice.delta, "tool_calls") @@ -304,11 +334,13 @@ def new_iterator(): unpack=False, ) + # Response Model if hasattr(chunk, "model") and chunk.model is not None: span.set_data( SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model ) + # Usage if hasattr(chunk, "usage") and chunk.usage is not None: record_token_usage( span, @@ -323,6 +355,7 @@ def new_iterator(): yield chunk + # Response Text if ( len(data_buf) > 0 and should_send_default_pii() From a35a84d318b87db3e17a16d09b6164c34a49db5a Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 14:22:41 +0200 Subject: [PATCH 38/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 160 +++++++++++---------- 1 file changed, 86 insertions(+), 74 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 9f37b6f56e..536114e0fc 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -23,7 +23,6 @@ from huggingface_hub import ( ChatCompletionOutput, TextGenerationOutput, - ChatCompletionStreamOutput, ) except ImportError: raise DidNotEnable("Huggingface not installed") @@ -242,49 +241,55 @@ def new_huggingface_task(*args, **kwargs): def new_details_iterator(): # type: () -> Iterable[Any] with capture_internal_exceptions(): + finish_reason = None tokens_used = 0 - data_buf: list[str] = [] + response_text_buffer: list[str] = [] for chunk in res: - if hasattr(chunk, "token") and hasattr(chunk.token, "text"): - data_buf.append(chunk.token.text) - - # Usage - if hasattr(chunk, "details") and hasattr( - chunk.details, "generated_tokens" + if ( + hasattr(chunk, "token") + and hasattr(chunk.token, "text") + and chunk.token.text is not None ): - tokens_used = chunk.details.generated_tokens + response_text_buffer.append(chunk.token.text) - # Finish Reason - if hasattr(chunk, "details") and hasattr( - chunk.details, "finish_reason" - ): - span.set_data( - SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, - chunk.details.finish_reason, + details = getattr(chunk, "details", None) + if details is not None: + finish_reason = getattr(details, "finish_reason", None) + + generated_tokens = getattr( + details, "generated_tokens", None ) + if generated_tokens is not None: + tokens_used = generated_tokens yield chunk - # Response Text + if finish_reason: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + finish_reason, + ) + + if tokens_used > 0: + record_token_usage( + span, + total_tokens=tokens_used, + ) + if ( - len(data_buf) > 0 - and should_send_default_pii() + should_send_default_pii() and integration.include_prompts + and len(response_text_buffer) > 0 ): - text_response = "".join(data_buf) + text_response = "".join(response_text_buffer) if text_response: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, text_response, ) - # Usage - if tokens_used > 0: - record_token_usage( - span, - total_tokens=tokens_used, - ) span.__exit__(None, None, None) @@ -294,81 +299,88 @@ def new_details_iterator(): # chat-completion stream output def new_iterator(): # type: () -> Iterable[str] - with capture_internal_exceptions(): - data_buf: list[str] = [] + finish_reason = None + response_model = None + response_text_buffer: list[str] = [] + tool_calls = None + usage = None + with capture_internal_exceptions(): for chunk in res: - if isinstance(chunk, ChatCompletionStreamOutput): + if hasattr(chunk, "model") and chunk.model is not None: + response_model = chunk.model + + if hasattr(chunk, "usage") and chunk.usage is not None: + usage = chunk.usage + + if isinstance(chunk, str): + if chunk is not None: + response_text_buffer.append(chunk) + + if hasattr(chunk, "choices") and chunk.choices is not None: for choice in chunk.choices: if ( hasattr(choice, "delta") and hasattr(choice.delta, "content") and choice.delta.content is not None ): - data_buf.append(choice.delta.content) + response_text_buffer.append( + choice.delta.content + ) - # Finish Reason if ( hasattr(choice, "finish_reason") and choice.finish_reason is not None ): - span.set_data( - SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, - choice.finish_reason, - ) + finish_reason = choice.finish_reason - # Response Tool Calls if ( hasattr(choice, "delta") and hasattr(choice.delta, "tool_calls") and choice.delta.tool_calls is not None ): - if ( - should_send_default_pii() - and integration.include_prompts - ): - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, - choice.delta.tool_calls, - unpack=False, - ) - - # Response Model - if hasattr(chunk, "model") and chunk.model is not None: - span.set_data( - SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model - ) + tool_calls = choice.delta.tool_calls - # Usage - if hasattr(chunk, "usage") and chunk.usage is not None: - record_token_usage( - span, - input_tokens=chunk.usage.prompt_tokens, - output_tokens=chunk.usage.completion_tokens, - total_tokens=chunk.usage.total_tokens, - ) + yield chunk - elif isinstance(chunk, str): - if chunk is not None: - data_buf.append(chunk) + if response_model is not None: + span.set_data( + SPANDATA.GEN_AI_RESPONSE_MODEL, response_model + ) - yield chunk + if finish_reason is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + choice.finish_reason, + ) - # Response Text - if ( - len(data_buf) > 0 - and should_send_default_pii() - and integration.include_prompts - ): - text_response = "".join(data_buf) - if text_response: + if should_send_default_pii() and integration.include_prompts: + if tool_calls is not None: set_data_normalized( span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - text_response, + SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, + tool_calls, + unpack=False, ) + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + + if usage is not None: + record_token_usage( + span, + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) + span.__exit__(None, None, None) return new_iterator() From 6bd766e56d1c52a2aa30387e5c3ead6ff8aa7782 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 14:29:05 +0200 Subject: [PATCH 39/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 51 +++++++++++----------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 536114e0fc..f46a9ed49b 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -240,11 +240,11 @@ def new_huggingface_task(*args, **kwargs): # text-generation stream output def new_details_iterator(): # type: () -> Iterable[Any] - with capture_internal_exceptions(): - finish_reason = None - tokens_used = 0 - response_text_buffer: list[str] = [] + finish_reason = None + response_text_buffer: list[str] = [] + tokens_used = 0 + with capture_internal_exceptions(): for chunk in res: if ( hasattr(chunk, "token") @@ -253,44 +253,43 @@ def new_details_iterator(): ): response_text_buffer.append(chunk.token.text) - details = getattr(chunk, "details", None) - if details is not None: - finish_reason = getattr(details, "finish_reason", None) + if hasattr(chunk, "details") and hasattr( + chunk.details, "finish_reason" + ): + finish_reason = chunk.details.finish_reason - generated_tokens = getattr( - details, "generated_tokens", None - ) - if generated_tokens is not None: - tokens_used = generated_tokens + if ( + hasattr(chunk, "details") + and hasattr(chunk.details, "generated_tokens") + and chunk.details.generated_tokens is not None + ): + tokens_used = chunk.details.generated_tokens yield chunk - if finish_reason: + if finish_reason is not None: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason, ) + if should_send_default_pii() and integration.include_prompts: + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) + if text_response: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_TEXT, + text_response, + ) + if tokens_used > 0: record_token_usage( span, total_tokens=tokens_used, ) - if ( - should_send_default_pii() - and integration.include_prompts - and len(response_text_buffer) > 0 - ): - text_response = "".join(response_text_buffer) - if text_response: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - text_response, - ) - span.__exit__(None, None, None) return new_details_iterator() From e294e83ea6f3bb480093d5f503b0b800bf5c660c Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 15:08:07 +0200 Subject: [PATCH 40/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 159 ++++++++++----------- 1 file changed, 74 insertions(+), 85 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index f46a9ed49b..d94b2b2214 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -1,3 +1,4 @@ +import inspect from functools import wraps import sentry_sdk @@ -11,19 +12,13 @@ event_from_exception, ) -from typing import Iterable, TYPE_CHECKING +from typing import TYPE_CHECKING if TYPE_CHECKING: - from typing import Any, Callable - + from typing import Any, Callable, Iterable try: import huggingface_hub.inference._client - - from huggingface_hub import ( - ChatCompletionOutput, - TextGenerationOutput, - ) except ImportError: raise DidNotEnable("Huggingface not installed") @@ -138,33 +133,62 @@ def new_huggingface_task(*args, **kwargs): raise e from None # Output attributes - with capture_internal_exceptions(): - # Response Model - if hasattr(res, "model") and res.model is not None: - span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, res.model) + finish_reason = None + response_model = None + response_text_buffer: list[str] = [] + tokens_used = 0 + tool_calls = None + usage = None - # Finish Reason - finish_reason = None - if hasattr(res, "details") and res.details is not None: - finish_reason = getattr(res.details, "finish_reason", None) + with capture_internal_exceptions(): + if isinstance(res, str) and res is not None: + response_text_buffer.append(res) - if finish_reason is None: - try: - finish_reason = res.choices[0].finish_reason - except Exception: - pass + if hasattr(res, "generated_text") and res.generated_text is not None: + response_text_buffer.append(res.generated_text) - if finish_reason: - span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason) + if hasattr(res, "model") and res.model is not None: + response_model = res.model + + if hasattr(res, "details") and hasattr(res.details, "finish_reason"): + finish_reason = res.details.finish_reason + + if ( + hasattr(res, "details") + and hasattr(res.details, "generated_tokens") + and res.details.generated_tokens is not None + ): + tokens_used = res.details.generated_tokens + + if hasattr(res, "usage") and res.usage is not None: + usage = res.usage + + if hasattr(res, "choices") and res.choices is not None: + for choice in res.choices: + if hasattr(choice, "finish_reason"): + finish_reason = choice.finish_reason + if hasattr(choice, "message") and hasattr( + choice.message, "tool_calls" + ): + tool_calls = choice.message.tool_calls + if ( + hasattr(choice, "message") + and hasattr(choice.message, "content") + and choice.message.content is not None + ): + response_text_buffer.append(choice.message.content) + + if response_model is not None: + span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model) + + if finish_reason is not None: + set_data_normalized( + span, + SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, + finish_reason, + ) - # Request Messages if should_send_default_pii() and integration.include_prompts: - # Response Tool Calls - try: - tool_calls = res.choices[0].message.tool_calls - except Exception: - tool_calls = [] - if tool_calls is not None and len(tool_calls) > 0: set_data_normalized( span, @@ -173,66 +197,31 @@ def new_huggingface_task(*args, **kwargs): unpack=False, ) - # Response Text - if isinstance(res, str): - if should_send_default_pii() and integration.include_prompts: - if res: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - res, - ) - - span.__exit__(None, None, None) - return res - - if isinstance(res, TextGenerationOutput): - # Response Text - if should_send_default_pii() and integration.include_prompts: - if res.generated_text: - set_data_normalized( - span, - SPANDATA.GEN_AI_RESPONSE_TEXT, - res.generated_text, - ) - # Usage - if res.details is not None and res.details.generated_tokens > 0: - record_token_usage( - span, - total_tokens=res.details.generated_tokens, - ) - - span.__exit__(None, None, None) - return res - - if isinstance(res, ChatCompletionOutput): - # Response Text - if should_send_default_pii() and integration.include_prompts: - text_response = "".join( - [ - x.get("message", {}).get("content", None) or "" - for x in res.choices - ] - ) + if len(response_text_buffer) > 0: + text_response = "".join(response_text_buffer) if text_response: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TEXT, text_response, ) - # Usage - if hasattr(res, "usage") and res.usage is not None: - record_token_usage( - span, - input_tokens=res.usage.prompt_tokens, - output_tokens=res.usage.completion_tokens, - total_tokens=res.usage.total_tokens, - ) - span.__exit__(None, None, None) - return res - - if not isinstance(res, Iterable): + if usage is not None: + record_token_usage( + span, + input_tokens=usage.prompt_tokens, + output_tokens=usage.completion_tokens, + total_tokens=usage.total_tokens, + ) + elif tokens_used > 0: + record_token_usage( + span, + total_tokens=tokens_used, + ) + + # If the response is not a generator (meaning a streaming response) + # we are done and can return the response + if not inspect.isgenerator(res): span.__exit__(None, None, None) return res @@ -351,11 +340,11 @@ def new_iterator(): set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, - choice.finish_reason, + finish_reason, ) if should_send_default_pii() and integration.include_prompts: - if tool_calls is not None: + if tool_calls is not None and len(tool_calls) > 0: set_data_normalized( span, SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS, From 96b8f89377d80c6df4e277e3c01acd5de11c48ba Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 15:34:44 +0200 Subject: [PATCH 41/43] cleanup --- .../huggingface_hub/test_huggingface_hub.py | 26 +++++++++---------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index c2fff8f35b..6e1296987f 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -569,14 +569,13 @@ def test_chat_completion_streaming( client = InferenceClient(model="test-model") with sentry_sdk.start_transaction(name="test"): - response = client.chat_completion( - messages=[{"role": "user", "content": "Hello!"}], - stream=True, + _ = list( + client.chat_completion( + messages=[{"role": "user", "content": "Hello!"}], + stream=True, + ) ) - for x in response: - print(x) - (transaction,) = events (span,) = transaction["spans"] @@ -767,16 +766,15 @@ def test_chat_completion_streaming_with_tools( ] with sentry_sdk.start_transaction(name="test"): - response = client.chat_completion( - messages=[{"role": "user", "content": "What is the weather in Paris?"}], - stream=True, - tools=tools, - tool_choice="auto", + _ = list( + client.chat_completion( + messages=[{"role": "user", "content": "What is the weather in Paris?"}], + stream=True, + tools=tools, + tool_choice="auto", + ) ) - for x in response: - print(x) - (transaction,) = events (span,) = transaction["spans"] From f5574cf6ad4c0454bf7c3c5be1074183acb4b3d5 Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 15:39:40 +0200 Subject: [PATCH 42/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index d94b2b2214..3b152cb16a 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -74,9 +74,8 @@ def new_huggingface_task(*args, **kwargs): elif "messages" in kwargs: prompt = kwargs["messages"] elif len(args) >= 2: - kwargs["prompt"] = args[1] - prompt = kwargs["prompt"] - args = (args[0],) + args[2:] + if isinstance(args[1], str): + prompt = args[1] else: # invalid call, dont instrument, let it return error return f(*args, **kwargs) From bd5b15f1df82d557915df3018e153113edc9a8ad Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Tue, 9 Sep 2025 15:47:28 +0200 Subject: [PATCH 43/43] cleanup --- sentry_sdk/integrations/huggingface_hub.py | 6 ++++-- tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py index 3b152cb16a..cb76ccf507 100644 --- a/sentry_sdk/integrations/huggingface_hub.py +++ b/sentry_sdk/integrations/huggingface_hub.py @@ -69,14 +69,16 @@ def new_huggingface_task(*args, **kwargs): if integration is None: return f(*args, **kwargs) + prompt = None if "prompt" in kwargs: prompt = kwargs["prompt"] elif "messages" in kwargs: prompt = kwargs["messages"] elif len(args) >= 2: - if isinstance(args[1], str): + if isinstance(args[1], str) or isinstance(args[1], list): prompt = args[1] - else: + + if prompt is None: # invalid call, dont instrument, let it return error return f(*args, **kwargs) diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py index 6e1296987f..86f9c10109 100644 --- a/tests/integrations/huggingface_hub/test_huggingface_hub.py +++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py @@ -571,7 +571,7 @@ def test_chat_completion_streaming( with sentry_sdk.start_transaction(name="test"): _ = list( client.chat_completion( - messages=[{"role": "user", "content": "Hello!"}], + [{"role": "user", "content": "Hello!"}], stream=True, ) )