From 88cd97eac65d8d8c67b3400c1fc4cf3414380f26 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 10:14:11 +0200
Subject: [PATCH 01/43] Rename attributes

---
 sentry_sdk/consts.py                       |  1 +
 sentry_sdk/integrations/huggingface_hub.py | 38 +++++++++++++---------
 2 files changed, 23 insertions(+), 16 deletions(-)

diff --git a/sentry_sdk/consts.py b/sentry_sdk/consts.py
index d7a0603a10..6c82cffc90 100644
--- a/sentry_sdk/consts.py
+++ b/sentry_sdk/consts.py
@@ -794,6 +794,7 @@ class OP:
     GEN_AI_CHAT = "gen_ai.chat"
     GEN_AI_EMBEDDINGS = "gen_ai.embeddings"
     GEN_AI_EXECUTE_TOOL = "gen_ai.execute_tool"
+    GEN_AI_GENERATE_TEXT = "gen_ai.generate_text"
     GEN_AI_HANDOFF = "gen_ai.handoff"
     GEN_AI_PIPELINE = "gen_ai.pipeline"
     GEN_AI_INVOKE_AGENT = "gen_ai.invoke_agent"
diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 2dfcb5925a..22099c5559 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -1,15 +1,14 @@
 from functools import wraps
 
-from sentry_sdk import consts
-from sentry_sdk.ai.monitoring import record_token_usage
-from sentry_sdk.ai.utils import set_data_normalized
-from sentry_sdk.consts import SPANDATA
 
 from typing import Any, Iterable, Callable
 
 import sentry_sdk
-from sentry_sdk.scope import should_send_default_pii
+from sentry_sdk.ai.monitoring import record_token_usage
+from sentry_sdk.ai.utils import set_data_normalized
+from sentry_sdk.consts import OP, SPANDATA
 from sentry_sdk.integrations import DidNotEnable, Integration
+from sentry_sdk.scope import should_send_default_pii
 from sentry_sdk.utils import (
     capture_internal_exceptions,
     event_from_exception,
@@ -34,6 +33,8 @@ def __init__(self, include_prompts=True):
     @staticmethod
     def setup_once():
         # type: () -> None
+
+        # Other tasks that can be called: https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks
         huggingface_hub.inference._client.InferenceClient.text_generation = (
             _wrap_text_generation(
                 huggingface_hub.inference._client.InferenceClient.text_generation
@@ -70,15 +71,22 @@ def new_text_generation(*args, **kwargs):
             # invalid call, let it return error
             return f(*args, **kwargs)
 
-        model = kwargs.get("model")
+        client = args[0]
+        model = client.model or kwargs.get("model") or ""
         streaming = kwargs.get("stream")
 
         span = sentry_sdk.start_span(
-            op=consts.OP.HUGGINGFACE_HUB_CHAT_COMPLETIONS_CREATE,
-            name="Text Generation",
+            op=OP.GEN_AI_GENERATE_TEXT,
+            name=f"generate_text {model}",
             origin=HuggingfaceHubIntegration.origin,
         )
         span.__enter__()
+
+        span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text")
+        if model:
+            span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
+        span.set_data(SPANDATA.GEN_AI_SYSTEM, "TODO!!!!!")
+
         try:
             res = f(*args, **kwargs)
         except Exception as e:
@@ -88,16 +96,15 @@ def new_text_generation(*args, **kwargs):
 
         with capture_internal_exceptions():
             if should_send_default_pii() and integration.include_prompts:
-                set_data_normalized(span, SPANDATA.AI_INPUT_MESSAGES, prompt)
+                set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt)
 
-            set_data_normalized(span, SPANDATA.AI_MODEL_ID, model)
-            set_data_normalized(span, SPANDATA.AI_STREAMING, streaming)
+            span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming)
 
             if isinstance(res, str):
                 if should_send_default_pii() and integration.include_prompts:
                     set_data_normalized(
                         span,
-                        SPANDATA.AI_RESPONSES,
+                        SPANDATA.GEN_AI_RESPONSE_TEXT,
                         [res],
                     )
                 span.__exit__(None, None, None)
@@ -107,7 +114,7 @@ def new_text_generation(*args, **kwargs):
                 if should_send_default_pii() and integration.include_prompts:
                     set_data_normalized(
                         span,
-                        SPANDATA.AI_RESPONSES,
+                        SPANDATA.GEN_AI_RESPONSE_TEXT,
                         [res.generated_text],
                     )
                 if res.details is not None and res.details.generated_tokens > 0:
@@ -120,7 +127,6 @@ def new_text_generation(*args, **kwargs):
 
             if not isinstance(res, Iterable):
                 # we only know how to deal with strings and iterables, ignore
-                set_data_normalized(span, "unknown_response", True)
                 span.__exit__(None, None, None)
                 return res
 
@@ -145,7 +151,7 @@ def new_details_iterator():
                             and integration.include_prompts
                         ):
                             set_data_normalized(
-                                span, SPANDATA.AI_RESPONSES, "".join(data_buf)
+                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf)
                             )
                         if tokens_used > 0:
                             record_token_usage(
@@ -172,7 +178,7 @@ def new_iterator():
                             and integration.include_prompts
                         ):
                             set_data_normalized(
-                                span, SPANDATA.AI_RESPONSES, "".join(data_buf)
+                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf)
                             )
                         span.__exit__(None, None, None)
 

From 54164cd1462c0cf713211ef37a6edc1b40953c0f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 10:56:38 +0200
Subject: [PATCH 02/43] text generation done.

---
 sentry_sdk/integrations/huggingface_hub.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 22099c5559..a4ad16fdcb 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -85,7 +85,6 @@ def new_text_generation(*args, **kwargs):
         span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text")
         if model:
             span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
-        span.set_data(SPANDATA.GEN_AI_SYSTEM, "TODO!!!!!")
 
         try:
             res = f(*args, **kwargs)

From 05ef7e338ff5b11ee4beb23dc3451ab248ce6c1f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 12:25:13 +0200
Subject: [PATCH 03/43] First version of supporting chat-completion

---
 sentry_sdk/integrations/huggingface_hub.py | 54 ++++++++++++++++++----
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index a4ad16fdcb..1c399e6082 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -17,7 +17,7 @@
 try:
     import huggingface_hub.inference._client
 
-    from huggingface_hub import ChatCompletionStreamOutput, TextGenerationOutput
+    from huggingface_hub import ChatCompletionOutput, TextGenerationOutput
 except ImportError:
     raise DidNotEnable("Huggingface not installed")
 
@@ -40,6 +40,11 @@ def setup_once():
                 huggingface_hub.inference._client.InferenceClient.text_generation
             )
         )
+        huggingface_hub.inference._client.InferenceClient.chat_completion = (
+            _wrap_text_generation(
+                huggingface_hub.inference._client.InferenceClient.chat_completion
+            )
+        )
 
 
 def _capture_exception(exc):
@@ -63,12 +68,14 @@ def new_text_generation(*args, **kwargs):
 
         if "prompt" in kwargs:
             prompt = kwargs["prompt"]
+        elif "messages" in kwargs:
+            prompt = kwargs["messages"]
         elif len(args) >= 2:
             kwargs["prompt"] = args[1]
             prompt = kwargs["prompt"]
             args = (args[0],) + args[2:]
         else:
-            # invalid call, let it return error
+            # invalid call, dont instrument, let it return error
             return f(*args, **kwargs)
 
         client = args[0]
@@ -95,7 +102,9 @@ def new_text_generation(*args, **kwargs):
 
         with capture_internal_exceptions():
             if should_send_default_pii() and integration.include_prompts:
-                set_data_normalized(span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt)
+                set_data_normalized(
+                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False
+                )
 
             span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming)
 
@@ -104,17 +113,20 @@ def new_text_generation(*args, **kwargs):
                     set_data_normalized(
                         span,
                         SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        [res],
+                        res,
                     )
                 span.__exit__(None, None, None)
                 return res
 
             if isinstance(res, TextGenerationOutput):
                 if should_send_default_pii() and integration.include_prompts:
+                    import ipdb
+
+                    ipdb.set_trace()
                     set_data_normalized(
                         span,
                         SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        [res.generated_text],
+                        res.generated_text,
                     )
                 if res.details is not None and res.details.generated_tokens > 0:
                     record_token_usage(
@@ -124,15 +136,35 @@ def new_text_generation(*args, **kwargs):
                 span.__exit__(None, None, None)
                 return res
 
+            if isinstance(res, ChatCompletionOutput):
+                if should_send_default_pii() and integration.include_prompts:
+                    text_response = "".join(
+                        [x.get("message", {}).get("content") for x in res.choices]
+                    )
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_RESPONSE_TEXT,
+                        text_response,
+                    )
+                if hasattr(res, "usage") and res.usage is not None:
+                    record_token_usage(
+                        span,
+                        input_tokens=res.usage.prompt_tokens,
+                        output_tokens=res.usage.completion_tokens,
+                        total_tokens=res.usage.total_tokens,
+                    )
+                span.__exit__(None, None, None)
+                return res
+
             if not isinstance(res, Iterable):
                 # we only know how to deal with strings and iterables, ignore
                 span.__exit__(None, None, None)
                 return res
 
             if kwargs.get("details", False):
-                # res is Iterable[TextGenerationStreamOutput]
+
                 def new_details_iterator():
-                    # type: () -> Iterable[ChatCompletionStreamOutput]
+                    # type: () -> Iterable[Any]
                     with capture_internal_exceptions():
                         tokens_used = 0
                         data_buf: list[str] = []
@@ -150,7 +182,9 @@ def new_details_iterator():
                             and integration.include_prompts
                         ):
                             set_data_normalized(
-                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf)
+                                span,
+                                SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                "".join(data_buf),
                             )
                         if tokens_used > 0:
                             record_token_usage(
@@ -177,7 +211,9 @@ def new_iterator():
                             and integration.include_prompts
                         ):
                             set_data_normalized(
-                                span, SPANDATA.GEN_AI_RESPONSE_TEXT, "".join(data_buf)
+                                span,
+                                SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                "".join(data_buf),
                             )
                         span.__exit__(None, None, None)
 

From d43d17fae203c2642589121ded2f40f42672b2a7 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 12:42:12 +0200
Subject: [PATCH 04/43] Cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 25 +++++++++++-----------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 1c399e6082..d8ae2a2285 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -36,13 +36,15 @@ def setup_once():
 
         # Other tasks that can be called: https://huggingface.co/docs/huggingface_hub/guides/inference#supported-providers-and-tasks
         huggingface_hub.inference._client.InferenceClient.text_generation = (
-            _wrap_text_generation(
-                huggingface_hub.inference._client.InferenceClient.text_generation
+            _wrap_huggingface_task(
+                huggingface_hub.inference._client.InferenceClient.text_generation,
+                OP.GEN_AI_GENERATE_TEXT,
             )
         )
         huggingface_hub.inference._client.InferenceClient.chat_completion = (
-            _wrap_text_generation(
-                huggingface_hub.inference._client.InferenceClient.chat_completion
+            _wrap_huggingface_task(
+                huggingface_hub.inference._client.InferenceClient.chat_completion,
+                OP.GEN_AI_CHAT,
             )
         )
 
@@ -57,8 +59,8 @@ def _capture_exception(exc):
     sentry_sdk.capture_event(event, hint=hint)
 
 
-def _wrap_text_generation(f):
-    # type: (Callable[..., Any]) -> Callable[..., Any]
+def _wrap_huggingface_task(f, op):
+    # type: (Callable[..., Any], str) -> Callable[..., Any]
     @wraps(f)
     def new_text_generation(*args, **kwargs):
         # type: (*Any, **Any) -> Any
@@ -81,21 +83,23 @@ def new_text_generation(*args, **kwargs):
         client = args[0]
         model = client.model or kwargs.get("model") or ""
         streaming = kwargs.get("stream")
+        operation_name = op.split(".")[-1]
 
         span = sentry_sdk.start_span(
-            op=OP.GEN_AI_GENERATE_TEXT,
-            name=f"generate_text {model}",
+            op=op,
+            name=f"{operation_name} {model}",
             origin=HuggingfaceHubIntegration.origin,
         )
         span.__enter__()
 
-        span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, "generate_text")
+        span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name)
         if model:
             span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
 
         try:
             res = f(*args, **kwargs)
         except Exception as e:
+            span.set_status("error")
             _capture_exception(e)
             span.__exit__(None, None, None)
             raise e from None
@@ -120,9 +124,6 @@ def new_text_generation(*args, **kwargs):
 
             if isinstance(res, TextGenerationOutput):
                 if should_send_default_pii() and integration.include_prompts:
-                    import ipdb
-
-                    ipdb.set_trace()
                     set_data_normalized(
                         span,
                         SPANDATA.GEN_AI_RESPONSE_TEXT,

From 27c851dd627c6b5916d1c9cbefcaa8bd8309f2a8 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 12:47:26 +0200
Subject: [PATCH 05/43] better format of dict in span data

---
 sentry_sdk/ai/utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index cf52cba6e8..d6e5293a68 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -1,3 +1,5 @@
+import json
+
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -33,4 +35,4 @@ def set_data_normalized(span, key, value, unpack=True):
     if isinstance(normalized, (int, float, bool, str)):
         span.set_data(key, normalized)
     else:
-        span.set_data(key, str(normalized))
+        span.set_data(key, json.dumps(normalized))

From ade94106f6c4b2365dcd7348340b43088759f60f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 13:44:48 +0200
Subject: [PATCH 06/43] fix test

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index df0c6c6d76..8a50dd0fe2 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -68,7 +68,7 @@ def test_nonstreaming_chat_completion(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.huggingface_hub"
+    assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
         assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
@@ -127,7 +127,7 @@ def test_streaming_chat_completion(
     tx = events[0]
     assert tx["type"] == "transaction"
     span = tx["spans"][0]
-    assert span["op"] == "ai.chat_completions.create.huggingface_hub"
+    assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
         assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]

From afa687cfc08e6bba52a1b481d954e2a53db15084 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Wed, 3 Sep 2025 15:04:38 +0200
Subject: [PATCH 07/43] attributes for huggingface requests/responses

---
 sentry_sdk/integrations/huggingface_hub.py | 115 +++++++++++++++------
 1 file changed, 84 insertions(+), 31 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index d8ae2a2285..5a5cbe61d6 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -62,7 +62,7 @@ def _capture_exception(exc):
 def _wrap_huggingface_task(f, op):
     # type: (Callable[..., Any], str) -> Callable[..., Any]
     @wraps(f)
-    def new_text_generation(*args, **kwargs):
+    def new_huggingface_task(*args, **kwargs):
         # type: (*Any, **Any) -> Any
         integration = sentry_sdk.get_client().get_integration(HuggingfaceHubIntegration)
         if integration is None:
@@ -82,7 +82,6 @@ def new_text_generation(*args, **kwargs):
 
         client = args[0]
         model = client.model or kwargs.get("model") or ""
-        streaming = kwargs.get("stream")
         operation_name = op.split(".")[-1]
 
         span = sentry_sdk.start_span(
@@ -93,9 +92,29 @@ def new_text_generation(*args, **kwargs):
         span.__enter__()
 
         span.set_data(SPANDATA.GEN_AI_OPERATION_NAME, operation_name)
+
         if model:
             span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
 
+        # Input attributes
+        attribute_mapping = {
+            "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
+            "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
+            "max_tokens": SPANDATA.GEN_AI_REQUEST_MAX_TOKENS,
+            "presence_penalty": SPANDATA.GEN_AI_REQUEST_PRESENCE_PENALTY,
+            "temperature": SPANDATA.GEN_AI_REQUEST_TEMPERATURE,
+            "top_p": SPANDATA.GEN_AI_REQUEST_TOP_P,
+            "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K,
+            "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
+        }
+        for attribute, span_attribute in attribute_mapping.items():
+            value = kwargs.get(attribute, None)
+            if value is not None:
+                if isinstance(value, (int, float, bool, str)):
+                    span.set_data(span_attribute, value)
+                else:
+                    set_data_normalized(span, span_attribute, value, unpack=False)
+
         try:
             res = f(*args, **kwargs)
         except Exception as e:
@@ -105,30 +124,56 @@ def new_text_generation(*args, **kwargs):
             raise e from None
 
         with capture_internal_exceptions():
+            # Output attributes
+            if hasattr(res, "model"):
+                model = res.model
+                if model:
+                    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
+
+            if hasattr(res, "details") and res.details is not None:
+                finish_reason = getattr(res.details, "finish_reason", None)
+                if finish_reason:
+                    span.set_data(
+                        SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason
+                    )
+
+            try:
+                tool_calls = res.choices[0].message.tool_calls
+            except Exception:
+                tool_calls = []
+
+            if len(tool_calls) > 0:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                    tool_calls,
+                    unpack=False,
+                )
+
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(
                     span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False
                 )
 
-            span.set_data(SPANDATA.GEN_AI_RESPONSE_STREAMING, streaming)
-
             if isinstance(res, str):
                 if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span,
-                        SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        res,
-                    )
+                    if res:
+                        set_data_normalized(
+                            span,
+                            SPANDATA.GEN_AI_RESPONSE_TEXT,
+                            res,
+                        )
                 span.__exit__(None, None, None)
                 return res
 
             if isinstance(res, TextGenerationOutput):
                 if should_send_default_pii() and integration.include_prompts:
-                    set_data_normalized(
-                        span,
-                        SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        res.generated_text,
-                    )
+                    if res.generated_text:
+                        set_data_normalized(
+                            span,
+                            SPANDATA.GEN_AI_RESPONSE_TEXT,
+                            res.generated_text,
+                        )
                 if res.details is not None and res.details.generated_tokens > 0:
                     record_token_usage(
                         span,
@@ -140,13 +185,17 @@ def new_text_generation(*args, **kwargs):
             if isinstance(res, ChatCompletionOutput):
                 if should_send_default_pii() and integration.include_prompts:
                     text_response = "".join(
-                        [x.get("message", {}).get("content") for x in res.choices]
-                    )
-                    set_data_normalized(
-                        span,
-                        SPANDATA.GEN_AI_RESPONSE_TEXT,
-                        text_response,
+                        [
+                            x.get("message", {}).get("content", None) or ""
+                            for x in res.choices
+                        ]
                     )
+                    if text_response:
+                        set_data_normalized(
+                            span,
+                            SPANDATA.GEN_AI_RESPONSE_TEXT,
+                            text_response,
+                        )
                 if hasattr(res, "usage") and res.usage is not None:
                     record_token_usage(
                         span,
@@ -182,11 +231,13 @@ def new_details_iterator():
                             and should_send_default_pii()
                             and integration.include_prompts
                         ):
-                            set_data_normalized(
-                                span,
-                                SPANDATA.GEN_AI_RESPONSE_TEXT,
-                                "".join(data_buf),
-                            )
+                            text_response = "".join(data_buf)
+                            if text_response:
+                                set_data_normalized(
+                                    span,
+                                    SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                    text_response,
+                                )
                         if tokens_used > 0:
                             record_token_usage(
                                 span,
@@ -211,13 +262,15 @@ def new_iterator():
                             and should_send_default_pii()
                             and integration.include_prompts
                         ):
-                            set_data_normalized(
-                                span,
-                                SPANDATA.GEN_AI_RESPONSE_TEXT,
-                                "".join(data_buf),
-                            )
+                            text_response = "".join(data_buf)
+                            if text_response:
+                                set_data_normalized(
+                                    span,
+                                    SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                    text_response,
+                                )
                         span.__exit__(None, None, None)
 
                 return new_iterator()
 
-    return new_text_generation
+    return new_huggingface_task

From 8b439a33da14514045e83c3069d1bbbce6d35d6e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 4 Sep 2025 08:31:06 +0200
Subject: [PATCH 08/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 5a5cbe61d6..9e7f6ff2d5 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -207,7 +207,6 @@ def new_huggingface_task(*args, **kwargs):
                 return res
 
             if not isinstance(res, Iterable):
-                # we only know how to deal with strings and iterables, ignore
                 span.__exit__(None, None, None)
                 return res
 

From 4ca5442dc106e330bdd88f2ab882af32eec9e792 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 4 Sep 2025 14:27:28 +0200
Subject: [PATCH 09/43] updated tests

---
 .../huggingface_hub/test_huggingface_hub.py      | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 8a50dd0fe2..897c4bb223 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -71,11 +71,11 @@ def test_nonstreaming_chat_completion(
     assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
-        assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     if details_arg:
         assert span["data"]["gen_ai.usage.total_tokens"] == 10
@@ -130,11 +130,11 @@ def test_streaming_chat_completion(
     assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.AI_INPUT_MESSAGES]
-        assert "the model response" in span["data"][SPANDATA.AI_RESPONSES]
+        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
     else:
-        assert SPANDATA.AI_INPUT_MESSAGES not in span["data"]
-        assert SPANDATA.AI_RESPONSES not in span["data"]
+        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
+        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
 
     if details_arg:
         assert span["data"]["gen_ai.usage.total_tokens"] == 10

From 9b1d23a48a4104c7de9d24f3260eecfc73881519 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 4 Sep 2025 14:31:05 +0200
Subject: [PATCH 10/43] better tests

---
 .../huggingface_hub/test_huggingface_hub.py     | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 897c4bb223..a9dc450168 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -8,7 +8,6 @@
 from huggingface_hub.errors import OverloadedError
 
 from sentry_sdk import start_transaction
-from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
 
 
@@ -71,11 +70,11 @@ def test_nonstreaming_chat_completion(
     assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "hello" in span["data"]["gen_ai.request.messages"]
+        assert "the model response" in span["data"]["gen_ai.response.text"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert "gen_ai.request.messages" not in span["data"]
+        assert "gen_ai.response.text" not in span["data"]
 
     if details_arg:
         assert span["data"]["gen_ai.usage.total_tokens"] == 10
@@ -130,11 +129,11 @@ def test_streaming_chat_completion(
     assert span["op"] == "gen_ai.generate_text"
 
     if send_default_pii and include_prompts:
-        assert "hello" in span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
-        assert "the model response" in span["data"][SPANDATA.GEN_AI_RESPONSE_TEXT]
+        assert "hello" in span["data"]["gen_ai.request.messages"]
+        assert "the model response" in span["data"]["gen_ai.response.text"]
     else:
-        assert SPANDATA.GEN_AI_REQUEST_MESSAGES not in span["data"]
-        assert SPANDATA.GEN_AI_RESPONSE_TEXT not in span["data"]
+        assert "gen_ai.request.messages" not in span["data"]
+        assert "gen_ai.response.text" not in span["data"]
 
     if details_arg:
         assert span["data"]["gen_ai.usage.total_tokens"] == 10

From 8451b6d996d9778c8185f3a99dd3f9507deca489 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Thu, 4 Sep 2025 14:41:25 +0200
Subject: [PATCH 11/43] do this in separate pr

---
 sentry_sdk/ai/utils.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index d6e5293a68..cf52cba6e8 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -1,5 +1,3 @@
-import json
-
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
@@ -35,4 +33,4 @@ def set_data_normalized(span, key, value, unpack=True):
     if isinstance(normalized, (int, float, bool, str)):
         span.set_data(key, normalized)
     else:
-        span.set_data(key, json.dumps(normalized))
+        span.set_data(key, str(normalized))

From 75e55c1c3872b6957dc52470a98e2bce46a7263c Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 12:20:04 +0200
Subject: [PATCH 12/43] First step of refactoring test

---
 scripts/populate_tox/config.py                |   3 +
 sentry_sdk/integrations/huggingface_hub.py    |   2 +-
 .../old_test_huggingface_hub.py               | 185 +++++++++++
 .../huggingface_hub/test_huggingface_hub.py   | 297 +++++++++---------
 tox.ini                                       |  31 +-
 5 files changed, 346 insertions(+), 172 deletions(-)
 create mode 100644 tests/integrations/huggingface_hub/old_test_huggingface_hub.py

diff --git a/scripts/populate_tox/config.py b/scripts/populate_tox/config.py
index b05c4297f1..38d3e7fc09 100644
--- a/scripts/populate_tox/config.py
+++ b/scripts/populate_tox/config.py
@@ -155,6 +155,9 @@
     },
     "huggingface_hub": {
         "package": "huggingface_hub",
+        "deps": {
+            "*": ["responses"],
+        },
     },
     "langchain-base": {
         "package": "langchain",
diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 9e7f6ff2d5..618af608c5 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -142,7 +142,7 @@ def new_huggingface_task(*args, **kwargs):
             except Exception:
                 tool_calls = []
 
-            if len(tool_calls) > 0:
+            if tool_calls is not None and len(tool_calls) > 0:
                 set_data_normalized(
                     span,
                     SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
diff --git a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py
new file mode 100644
index 0000000000..a9dc450168
--- /dev/null
+++ b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py
@@ -0,0 +1,185 @@
+import itertools
+from unittest import mock
+
+import pytest
+from huggingface_hub import (
+    InferenceClient,
+)
+from huggingface_hub.errors import OverloadedError
+
+from sentry_sdk import start_transaction
+from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
+
+
+def mock_client_post(client, post_mock):
+    # huggingface-hub==0.28.0 deprecates the `post` method
+    # so patch `_inner_post` instead
+    if hasattr(client, "post"):
+        client.post = post_mock
+    if hasattr(client, "_inner_post"):
+        client._inner_post = post_mock
+
+
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts, details_arg",
+    itertools.product([True, False], repeat=3),
+)
+def test_nonstreaming_chat_completion(
+    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
+):
+    sentry_init(
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    client = InferenceClient(model="https://")
+
+    if details_arg:
+        post_mock = mock.Mock(
+            return_value=b"""[{
+                "generated_text": "the model response",
+                "details": {
+                    "finish_reason": "length",
+                    "generated_tokens": 10,
+                    "prefill": [],
+                    "tokens": []
+                }
+            }]"""
+        )
+    else:
+        post_mock = mock.Mock(
+            return_value=b'[{"generated_text": "the model response"}]'
+        )
+    mock_client_post(client, post_mock)
+
+    with start_transaction(name="huggingface_hub tx"):
+        response = client.text_generation(
+            prompt="hello",
+            details=details_arg,
+            stream=False,
+        )
+    if details_arg:
+        assert response.generated_text == "the model response"
+    else:
+        assert response == "the model response"
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.generate_text"
+
+    if send_default_pii and include_prompts:
+        assert "hello" in span["data"]["gen_ai.request.messages"]
+        assert "the model response" in span["data"]["gen_ai.response.text"]
+    else:
+        assert "gen_ai.request.messages" not in span["data"]
+        assert "gen_ai.response.text" not in span["data"]
+
+    if details_arg:
+        assert span["data"]["gen_ai.usage.total_tokens"] == 10
+
+
+@pytest.mark.parametrize(
+    "send_default_pii, include_prompts, details_arg",
+    itertools.product([True, False], repeat=3),
+)
+def test_streaming_chat_completion(
+    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
+):
+    sentry_init(
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+    )
+    events = capture_events()
+
+    client = InferenceClient(model="https://")
+
+    post_mock = mock.Mock(
+        return_value=[
+            b"""data:{
+                "token":{"id":1, "special": false, "text": "the model "}
+            }""",
+            b"""data:{
+                "token":{"id":2, "special": false, "text": "response"},
+                "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}
+            }""",
+        ]
+    )
+    mock_client_post(client, post_mock)
+
+    with start_transaction(name="huggingface_hub tx"):
+        response = list(
+            client.text_generation(
+                prompt="hello",
+                details=details_arg,
+                stream=True,
+            )
+        )
+    assert len(response) == 2
+    if details_arg:
+        assert response[0].token.text + response[1].token.text == "the model response"
+    else:
+        assert response[0] + response[1] == "the model response"
+
+    tx = events[0]
+    assert tx["type"] == "transaction"
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.generate_text"
+
+    if send_default_pii and include_prompts:
+        assert "hello" in span["data"]["gen_ai.request.messages"]
+        assert "the model response" in span["data"]["gen_ai.response.text"]
+    else:
+        assert "gen_ai.request.messages" not in span["data"]
+        assert "gen_ai.response.text" not in span["data"]
+
+    if details_arg:
+        assert span["data"]["gen_ai.usage.total_tokens"] == 10
+
+
+def test_bad_chat_completion(sentry_init, capture_events):
+    sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0)
+    events = capture_events()
+
+    client = InferenceClient(model="https://")
+    post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded"))
+    mock_client_post(client, post_mock)
+
+    with pytest.raises(OverloadedError):
+        client.text_generation(prompt="hello")
+
+    (event,) = events
+    assert event["level"] == "error"
+
+
+def test_span_origin(sentry_init, capture_events):
+    sentry_init(
+        integrations=[HuggingfaceHubIntegration()],
+        traces_sample_rate=1.0,
+    )
+    events = capture_events()
+
+    client = InferenceClient(model="https://")
+    post_mock = mock.Mock(
+        return_value=[
+            b"""data:{
+                "token":{"id":1, "special": false, "text": "the model "}
+            }""",
+        ]
+    )
+    mock_client_post(client, post_mock)
+
+    with start_transaction(name="huggingface_hub tx"):
+        list(
+            client.text_generation(
+                prompt="hello",
+                stream=True,
+            )
+        )
+
+    (event,) = events
+
+    assert event["contexts"]["trace"]["origin"] == "manual"
+    assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub"
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index a9dc450168..192b1eead2 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -1,185 +1,170 @@
-import itertools
 from unittest import mock
-
 import pytest
-from huggingface_hub import (
-    InferenceClient,
-)
-from huggingface_hub.errors import OverloadedError
-
-from sentry_sdk import start_transaction
-from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
-
-
-def mock_client_post(client, post_mock):
-    # huggingface-hub==0.28.0 deprecates the `post` method
-    # so patch `_inner_post` instead
-    if hasattr(client, "post"):
-        client.post = post_mock
-    if hasattr(client, "_inner_post"):
-        client._inner_post = post_mock
-
-
-@pytest.mark.parametrize(
-    "send_default_pii, include_prompts, details_arg",
-    itertools.product([True, False], repeat=3),
-)
-def test_nonstreaming_chat_completion(
-    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
-):
-    sentry_init(
-        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        traces_sample_rate=1.0,
-        send_default_pii=send_default_pii,
-    )
-    events = capture_events()
+import responses
+
+from huggingface_hub import InferenceClient
+
+import sentry_sdk
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any
+
+
+@pytest.fixture
+def mock_hf_text_generation_api():
+    # type: () -> Any
+    """Mock HuggingFace text generation API"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            f"https://huggingface.co/api/models/{model_name}",
+            json={
+                "id": model_name,
+                "pipeline_tag": "text-generation",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "text-generation",
+                    }
+                },
+            },
+            status=200,
+        )
 
-    client = InferenceClient(model="https://")
-
-    if details_arg:
-        post_mock = mock.Mock(
-            return_value=b"""[{
-                "generated_text": "the model response",
-                "details": {
-                    "finish_reason": "length",
-                    "generated_tokens": 10,
-                    "prefill": [],
-                    "tokens": []
+        # Mock text generation endpoint
+        rsps.add(
+            responses.POST,
+            f"https://router.huggingface.co/hf-inference/models/{model_name}",
+            json=[
+                {
+                    "generated_text": "Mocked response",
+                    "details": {
+                        "finish_reason": "length",
+                        "generated_tokens": 10,
+                        "prefill": [],
+                        "tokens": [],
+                    },
                 }
-            }]"""
+            ],
+            status=200,
         )
-    else:
-        post_mock = mock.Mock(
-            return_value=b'[{"generated_text": "the model response"}]'
-        )
-    mock_client_post(client, post_mock)
 
-    with start_transaction(name="huggingface_hub tx"):
-        response = client.text_generation(
-            prompt="hello",
-            details=details_arg,
-            stream=False,
+        yield rsps
+
+
+@pytest.fixture
+def mock_hf_chat_completion_api():
+    # type: () -> Any
+    """Mock HuggingFace chat completion API"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            f"https://huggingface.co/api/models/{model_name}",
+            json={
+                "id": model_name,
+                "pipeline_tag": "conversational",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "conversational",
+                    }
+                },
+            },
+            status=200,
         )
-    if details_arg:
-        assert response.generated_text == "the model response"
-    else:
-        assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.generate_text"
 
-    if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["gen_ai.request.messages"]
-        assert "the model response" in span["data"]["gen_ai.response.text"]
-    else:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
+        # Mock chat completion endpoint
+        rsps.add(
+            responses.POST,
+            f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions",
+            json={
+                "id": f"{model_name}-123",
+                "created": 1234567890,
+                "model": "test-model-123",
+                "system_fingerprint": "fp_123",
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "stop",
+                        "message": {
+                            "role": "assistant",
+                            "content": "Hello! How can I help you today?",
+                        },
+                        "logprobs": None,
+                    }
+                ],
+                "usage": {
+                    "completion_tokens": 8,
+                    "prompt_tokens": 10,
+                    "total_tokens": 18,
+                },
+            },
+            status=200,
+        )
 
-    if details_arg:
-        assert span["data"]["gen_ai.usage.total_tokens"] == 10
+        yield rsps
 
 
-@pytest.mark.parametrize(
-    "send_default_pii, include_prompts, details_arg",
-    itertools.product([True, False], repeat=3),
-)
-def test_streaming_chat_completion(
-    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
-):
+def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api):
+    # type: (Any, Any, Any) -> None
     sentry_init(
-        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
         traces_sample_rate=1.0,
-        send_default_pii=send_default_pii,
     )
     events = capture_events()
 
-    client = InferenceClient(model="https://")
-
-    post_mock = mock.Mock(
-        return_value=[
-            b"""data:{
-                "token":{"id":1, "special": false, "text": "the model "}
-            }""",
-            b"""data:{
-                "token":{"id":2, "special": false, "text": "response"},
-                "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}
-            }""",
-        ]
-    )
-    mock_client_post(client, post_mock)
-
-    with start_transaction(name="huggingface_hub tx"):
-        response = list(
-            client.text_generation(
-                prompt="hello",
-                details=details_arg,
-                stream=True,
-            )
-        )
-    assert len(response) == 2
-    if details_arg:
-        assert response[0].token.text + response[1].token.text == "the model response"
-    else:
-        assert response[0] + response[1] == "the model response"
+    client = InferenceClient(model="test-model")
+
+    with sentry_sdk.start_transaction(name="test_tx"):
+        response = client.text_generation(prompt="Hello")
+
+    # Verify the response
+    assert response == "Mocked response"
 
+    # Verify Sentry integration worked
     tx = events[0]
-    assert tx["type"] == "transaction"
     span = tx["spans"][0]
     assert span["op"] == "gen_ai.generate_text"
 
-    if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["gen_ai.request.messages"]
-        assert "the model response" in span["data"]["gen_ai.response.text"]
-    else:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
 
-    if details_arg:
-        assert span["data"]["gen_ai.usage.total_tokens"] == 10
-
-
-def test_bad_chat_completion(sentry_init, capture_events):
-    sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
-
-    client = InferenceClient(model="https://")
-    post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded"))
-    mock_client_post(client, post_mock)
-
-    with pytest.raises(OverloadedError):
-        client.text_generation(prompt="hello")
-
-    (event,) = events
-    assert event["level"] == "error"
-
-
-def test_span_origin(sentry_init, capture_events):
+def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api):
+    # type: (Any, Any, Any) -> None
     sentry_init(
-        integrations=[HuggingfaceHubIntegration()],
         traces_sample_rate=1.0,
     )
     events = capture_events()
 
-    client = InferenceClient(model="https://")
-    post_mock = mock.Mock(
-        return_value=[
-            b"""data:{
-                "token":{"id":1, "special": false, "text": "the model "}
-            }""",
-        ]
-    )
-    mock_client_post(client, post_mock)
-
-    with start_transaction(name="huggingface_hub tx"):
-        list(
-            client.text_generation(
-                prompt="hello",
-                stream=True,
-            )
-        )
+    client = InferenceClient(model="test-model")
 
-    (event,) = events
+    # Create a chat-style prompt using text generation
+    chat_prompt = "Human: Hello\nAssistant:"
 
-    assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub"
+    with sentry_sdk.start_transaction(name="test_chat_style_tx"):
+        client.chat_completion(
+            model="test-model",
+            messages=[{"role": "user", "content": chat_prompt}],
+        )
+
+    tx = events[0]
+    span = tx["spans"][0]
+    assert span["op"] == "gen_ai.chat"
+    assert span["description"] == "chat test-model"
+    assert span["data"] == {
+        "gen_ai.operation.name": "chat",
+        "gen_ai.request.model": "test-model",
+        "gen_ai.response.model": "test-model-123",
+        "gen_ai.usage.input_tokens": 10,
+        "gen_ai.usage.output_tokens": 8,
+        "gen_ai.usage.total_tokens": 18,
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
diff --git a/tox.ini b/tox.ini
index 335007664a..01f86be521 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@
 # The file (and all resulting CI YAMLs) then need to be regenerated via
 # "scripts/generate-test-files.sh".
 #
-# Last generated: 2025-09-05T07:52:27.350774+00:00
+# Last generated: 2025-09-08T07:12:48.167820+00:00
 
 [tox]
 requires =
@@ -128,8 +128,8 @@ envlist =
     {py3.8,py3.11,py3.12}-openai-notiktoken-v1.71.0
     {py3.8,py3.12,py3.13}-openai-notiktoken-v1.106.1
 
-    {py3.9,py3.12,py3.13}-langgraph-v0.6.6
-    {py3.10,py3.12,py3.13}-langgraph-v1.0.0a2
+    {py3.9,py3.12,py3.13}-langgraph-v0.6.7
+    {py3.10,py3.12,py3.13}-langgraph-v1.0.0a3
 
     {py3.10,py3.11,py3.12}-openai_agents-v0.0.19
     {py3.10,py3.12,py3.13}-openai_agents-v0.1.0
@@ -146,7 +146,7 @@ envlist =
     {py3.6,py3.7}-boto3-v1.12.49
     {py3.6,py3.9,py3.10}-boto3-v1.20.54
     {py3.7,py3.11,py3.12}-boto3-v1.28.85
-    {py3.9,py3.12,py3.13}-boto3-v1.40.24
+    {py3.9,py3.12,py3.13}-boto3-v1.40.25
 
     {py3.6,py3.7,py3.8}-chalice-v1.16.0
     {py3.6,py3.7,py3.8}-chalice-v1.21.9
@@ -205,7 +205,7 @@ envlist =
     {py3.6,py3.9,py3.10}-gql-v3.4.1
     {py3.7,py3.11,py3.12}-gql-v3.5.3
     {py3.9,py3.12,py3.13}-gql-v4.0.0
-    {py3.9,py3.12,py3.13}-gql-v4.1.0b0
+    {py3.9,py3.12,py3.13}-gql-v4.2.0b0
 
     {py3.6,py3.9,py3.10}-graphene-v3.3
     {py3.8,py3.12,py3.13}-graphene-v3.4.3
@@ -213,7 +213,7 @@ envlist =
     {py3.8,py3.10,py3.11}-strawberry-v0.209.8
     {py3.8,py3.11,py3.12}-strawberry-v0.233.3
     {py3.9,py3.12,py3.13}-strawberry-v0.257.0
-    {py3.9,py3.12,py3.13}-strawberry-v0.281.0
+    {py3.9,py3.12,py3.13}-strawberry-v0.282.0
 
 
     # ~~~ Network ~~~
@@ -251,7 +251,7 @@ envlist =
 
     {py3.8,py3.9}-spark-v3.0.3
     {py3.8,py3.10,py3.11}-spark-v3.5.6
-    {py3.9,py3.12,py3.13}-spark-v4.0.0
+    {py3.9,py3.12,py3.13}-spark-v4.0.1
 
 
     # ~~~ Web 1 ~~~
@@ -325,7 +325,7 @@ envlist =
 
     {py3.7,py3.12,py3.13}-typer-v0.15.4
     {py3.7,py3.12,py3.13}-typer-v0.16.1
-    {py3.7,py3.12,py3.13}-typer-v0.17.3
+    {py3.7,py3.12,py3.13}-typer-v0.17.4
 
 
 
@@ -515,8 +515,8 @@ deps =
     openai-notiktoken-v1.0.1: httpx<0.28
     openai-notiktoken-v1.36.1: httpx<0.28
 
-    langgraph-v0.6.6: langgraph==0.6.6
-    langgraph-v1.0.0a2: langgraph==1.0.0a2
+    langgraph-v0.6.7: langgraph==0.6.7
+    langgraph-v1.0.0a3: langgraph==1.0.0a3
 
     openai_agents-v0.0.19: openai-agents==0.0.19
     openai_agents-v0.1.0: openai-agents==0.1.0
@@ -528,13 +528,14 @@ deps =
     huggingface_hub-v0.30.2: huggingface_hub==0.30.2
     huggingface_hub-v0.34.4: huggingface_hub==0.34.4
     huggingface_hub-v0.35.0rc0: huggingface_hub==0.35.0rc0
+    huggingface_hub: responses
 
 
     # ~~~ Cloud ~~~
     boto3-v1.12.49: boto3==1.12.49
     boto3-v1.20.54: boto3==1.20.54
     boto3-v1.28.85: boto3==1.28.85
-    boto3-v1.40.24: boto3==1.40.24
+    boto3-v1.40.25: boto3==1.40.25
     {py3.7,py3.8}-boto3: urllib3<2.0.0
 
     chalice-v1.16.0: chalice==1.16.0
@@ -601,7 +602,7 @@ deps =
     gql-v3.4.1: gql[all]==3.4.1
     gql-v3.5.3: gql[all]==3.5.3
     gql-v4.0.0: gql[all]==4.0.0
-    gql-v4.1.0b0: gql[all]==4.1.0b0
+    gql-v4.2.0b0: gql[all]==4.2.0b0
 
     graphene-v3.3: graphene==3.3
     graphene-v3.4.3: graphene==3.4.3
@@ -614,7 +615,7 @@ deps =
     strawberry-v0.209.8: strawberry-graphql[fastapi,flask]==0.209.8
     strawberry-v0.233.3: strawberry-graphql[fastapi,flask]==0.233.3
     strawberry-v0.257.0: strawberry-graphql[fastapi,flask]==0.257.0
-    strawberry-v0.281.0: strawberry-graphql[fastapi,flask]==0.281.0
+    strawberry-v0.282.0: strawberry-graphql[fastapi,flask]==0.282.0
     strawberry: httpx
     strawberry-v0.209.8: pydantic<2.11
     strawberry-v0.233.3: pydantic<2.11
@@ -667,7 +668,7 @@ deps =
 
     spark-v3.0.3: pyspark==3.0.3
     spark-v3.5.6: pyspark==3.5.6
-    spark-v4.0.0: pyspark==4.0.0
+    spark-v4.0.1: pyspark==4.0.1
 
 
     # ~~~ Web 1 ~~~
@@ -810,7 +811,7 @@ deps =
 
     typer-v0.15.4: typer==0.15.4
     typer-v0.16.1: typer==0.16.1
-    typer-v0.17.3: typer==0.17.3
+    typer-v0.17.4: typer==0.17.4
 
 
 

From ddc622a98ec465b7eab380b5b461ce77fcd2b8b9 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 12:30:04 +0200
Subject: [PATCH 13/43] cleanup

---
 .../huggingface_hub/test_huggingface_hub.py   | 40 +++++++++----------
 1 file changed, 19 insertions(+), 21 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 192b1eead2..48abe1aaf4 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -117,45 +117,43 @@ def mock_hf_chat_completion_api():
 
 def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api):
     # type: (Any, Any, Any) -> None
-    sentry_init(
-        traces_sample_rate=1.0,
-    )
+    sentry_init(traces_sample_rate=1.0)
     events = capture_events()
 
     client = InferenceClient(model="test-model")
 
-    with sentry_sdk.start_transaction(name="test_tx"):
-        response = client.text_generation(prompt="Hello")
+    with sentry_sdk.start_transaction(name="test"):
+        client.text_generation(prompt="Hello")
 
-    # Verify the response
-    assert response == "Mocked response"
+    (transaction,) = events
+    (span,) = transaction["spans"]
 
-    # Verify Sentry integration worked
-    tx = events[0]
-    span = tx["spans"][0]
     assert span["op"] == "gen_ai.generate_text"
+    assert span["description"] == "generate_text test-model"
+    assert span["data"] == {
+        "gen_ai.operation.name": "generate_text",
+        "gen_ai.request.model": "test-model",
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
 
 
 def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api):
     # type: (Any, Any, Any) -> None
-    sentry_init(
-        traces_sample_rate=1.0,
-    )
+    sentry_init(traces_sample_rate=1.0)
     events = capture_events()
 
-    client = InferenceClient(model="test-model")
-
-    # Create a chat-style prompt using text generation
-    chat_prompt = "Human: Hello\nAssistant:"
+    client = InferenceClient()
 
-    with sentry_sdk.start_transaction(name="test_chat_style_tx"):
+    with sentry_sdk.start_transaction(name="test"):
         client.chat_completion(
             model="test-model",
-            messages=[{"role": "user", "content": chat_prompt}],
+            messages=[{"role": "user", "content": "Hello!"}],
         )
 
-    tx = events[0]
-    span = tx["spans"][0]
+    (transaction,) = events
+    (span,) = transaction["spans"]
+
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
     assert span["data"] == {

From b9a63da0869877cd1df71416d1c7bed87110ca22 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 13:35:56 +0200
Subject: [PATCH 14/43] asserts

---
 .../huggingface_hub/test_huggingface_hub.py   | 46 +++++++++++--------
 1 file changed, 28 insertions(+), 18 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 48abe1aaf4..6db5ac86a4 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -41,17 +41,15 @@ def mock_hf_text_generation_api():
         rsps.add(
             responses.POST,
             f"https://router.huggingface.co/hf-inference/models/{model_name}",
-            json=[
-                {
-                    "generated_text": "Mocked response",
-                    "details": {
-                        "finish_reason": "length",
-                        "generated_tokens": 10,
-                        "prefill": [],
-                        "tokens": [],
-                    },
-                }
-            ],
+            json={
+                "generated_text": "Mocked response",
+                "details": {
+                    "finish_reason": "length",
+                    "generated_tokens": 10,
+                    "prefill": [],
+                    "tokens": [],
+                },
+            },
             status=200,
         )
 
@@ -88,9 +86,9 @@ def mock_hf_chat_completion_api():
             responses.POST,
             f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions",
             json={
-                "id": f"{model_name}-123",
+                "id": "xyz-123",
                 "created": 1234567890,
-                "model": "test-model-123",
+                "model": f"{model_name}-123",
                 "system_fingerprint": "fp_123",
                 "choices": [
                     {
@@ -100,7 +98,7 @@ def mock_hf_chat_completion_api():
                             "role": "assistant",
                             "content": "Hello! How can I help you today?",
                         },
-                        "logprobs": None,
+                        # "logprobs": None,
                     }
                 ],
                 "usage": {
@@ -120,10 +118,16 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap
     sentry_init(traces_sample_rate=1.0)
     events = capture_events()
 
-    client = InferenceClient(model="test-model")
+    client = InferenceClient(
+        model="test-model",
+    )
 
     with sentry_sdk.start_transaction(name="test"):
-        client.text_generation(prompt="Hello")
+        client.text_generation(
+            prompt="Hello",
+            stream=False,
+            details=True,
+        )
 
     (transaction,) = events
     (span,) = transaction["spans"]
@@ -133,6 +137,9 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap
     assert span["data"] == {
         "gen_ai.operation.name": "generate_text",
         "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "length",
+        "gen_ai.response.streaming": False,
+        "gen_ai.usage.total_tokens": 10,
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
@@ -143,12 +150,14 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap
     sentry_init(traces_sample_rate=1.0)
     events = capture_events()
 
-    client = InferenceClient()
+    client = InferenceClient(
+        model="test-model",
+    )
 
     with sentry_sdk.start_transaction(name="test"):
         client.chat_completion(
-            model="test-model",
             messages=[{"role": "user", "content": "Hello!"}],
+            stream=False,
         )
 
     (transaction,) = events
@@ -160,6 +169,7 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "test-model",
         "gen_ai.response.model": "test-model-123",
+        "gen_ai.response.streaming": False,
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,

From 881d74b8981f102eec6916b07ab13a0e8cea31eb Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 13:40:44 +0200
Subject: [PATCH 15/43] finish reason

---
 sentry_sdk/integrations/huggingface_hub.py         | 14 ++++++++++----
 .../huggingface_hub/test_huggingface_hub.py        |  2 +-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 618af608c5..e1e7a2058e 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -130,12 +130,18 @@ def new_huggingface_task(*args, **kwargs):
                 if model:
                     span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
 
+            finish_reason = None
             if hasattr(res, "details") and res.details is not None:
                 finish_reason = getattr(res.details, "finish_reason", None)
-                if finish_reason:
-                    span.set_data(
-                        SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason
-                    )
+
+            if finish_reason is None:
+                try:
+                    finish_reason = res.choices[0].finish_reason
+                except Exception:
+                    pass
+
+            if finish_reason:
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason)
 
             try:
                 tool_calls = res.choices[0].message.tool_calls
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 6db5ac86a4..2355a40e3e 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -98,7 +98,6 @@ def mock_hf_chat_completion_api():
                             "role": "assistant",
                             "content": "Hello! How can I help you today?",
                         },
-                        # "logprobs": None,
                     }
                 ],
                 "usage": {
@@ -168,6 +167,7 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap
     assert span["data"] == {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "stop",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": False,
         "gen_ai.usage.input_tokens": 10,

From fa2cb56586d29665b08d850ed47ad09b022b33e8 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 14:03:08 +0200
Subject: [PATCH 16/43] asset

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 2355a40e3e..1b17a1f009 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -142,6 +142,8 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
+    # text generation does not set the response model
+    assert "gen_ai.response.model" not in span["data"]
 
 
 def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api):

From 893d1ffee471f686463608e3bc84c9ef84566f41 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 14:25:43 +0200
Subject: [PATCH 17/43] old versions

---
 .../huggingface_hub/test_huggingface_hub.py   | 21 +++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 1b17a1f009..7c4f61128c 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -5,6 +5,7 @@
 from huggingface_hub import InferenceClient
 
 import sentry_sdk
+from sentry_sdk.utils import package_version
 
 from typing import TYPE_CHECKING
 
@@ -12,6 +13,18 @@
     from typing import Any
 
 
+HF_VERSION = package_version("huggingface-hub")
+
+if HF_VERSION and HF_VERSION < (0, 30, 0):
+    MODEL_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}"
+    INFERENCE_ENDPOINT = "https://api-inference.huggingface.co/models/{model_name}"
+else:
+    MODEL_ENDPOINT = "https://huggingface.co/api/models/{model_name}"
+    INFERENCE_ENDPOINT = (
+        "https://router.huggingface.co/hf-inference/models/{model_name}"
+    )
+
+
 @pytest.fixture
 def mock_hf_text_generation_api():
     # type: () -> Any
@@ -22,7 +35,7 @@ def mock_hf_text_generation_api():
         # Mock model info endpoint
         rsps.add(
             responses.GET,
-            f"https://huggingface.co/api/models/{model_name}",
+            MODEL_ENDPOINT.format(model_name=model_name),
             json={
                 "id": model_name,
                 "pipeline_tag": "text-generation",
@@ -40,7 +53,7 @@ def mock_hf_text_generation_api():
         # Mock text generation endpoint
         rsps.add(
             responses.POST,
-            f"https://router.huggingface.co/hf-inference/models/{model_name}",
+            INFERENCE_ENDPOINT.format(model_name=model_name),
             json={
                 "generated_text": "Mocked response",
                 "details": {
@@ -66,7 +79,7 @@ def mock_hf_chat_completion_api():
         # Mock model info endpoint
         rsps.add(
             responses.GET,
-            f"https://huggingface.co/api/models/{model_name}",
+            MODEL_ENDPOINT.format(model_name=model_name),
             json={
                 "id": model_name,
                 "pipeline_tag": "conversational",
@@ -84,7 +97,7 @@ def mock_hf_chat_completion_api():
         # Mock chat completion endpoint
         rsps.add(
             responses.POST,
-            f"https://router.huggingface.co/hf-inference/models/{model_name}/v1/chat/completions",
+            INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
             json={
                 "id": "xyz-123",
                 "created": 1234567890,

From 44d0a9e8fa1fc0831119024ec2368bc40250124e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 14:42:51 +0200
Subject: [PATCH 18/43] bump

---
 sentry_sdk/integrations/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py
index 7f202221a7..af5db856b2 100644
--- a/sentry_sdk/integrations/__init__.py
+++ b/sentry_sdk/integrations/__init__.py
@@ -141,7 +141,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
     "gql": (3, 4, 1),
     "graphene": (3, 3),
     "grpc": (1, 32, 0),  # grpcio
-    "huggingface_hub": (0, 22),
+    "huggingface_hub": (0, 23),
     "langchain": (0, 1, 0),
     "langgraph": (0, 6, 6),
     "launchdarkly": (9, 8, 0),

From 3855f56653fc21d2959d368af41933479912f5eb Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 14:55:22 +0200
Subject: [PATCH 19/43] bump

---
 sentry_sdk/integrations/__init__.py |  2 +-
 tox.ini                             | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/sentry_sdk/integrations/__init__.py b/sentry_sdk/integrations/__init__.py
index af5db856b2..2f5a1f397e 100644
--- a/sentry_sdk/integrations/__init__.py
+++ b/sentry_sdk/integrations/__init__.py
@@ -141,7 +141,7 @@ def iter_default_integrations(with_auto_enabling_integrations):
     "gql": (3, 4, 1),
     "graphene": (3, 3),
     "grpc": (1, 32, 0),  # grpcio
-    "huggingface_hub": (0, 23),
+    "huggingface_hub": (0, 24, 7),
     "langchain": (0, 1, 0),
     "langgraph": (0, 6, 6),
     "launchdarkly": (9, 8, 0),
diff --git a/tox.ini b/tox.ini
index 326bdc431e..6026278fb9 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@
 # The file (and all resulting CI YAMLs) then need to be regenerated via
 # "scripts/generate-test-files.sh".
 #
-# Last generated: 2025-09-08T11:35:09.849536+00:00
+# Last generated: 2025-09-08T12:54:55.709539+00:00
 
 [tox]
 requires =
@@ -130,8 +130,8 @@ envlist =
     {py3.10,py3.12,py3.13}-openai_agents-v0.1.0
     {py3.10,py3.12,py3.13}-openai_agents-v0.2.11
 
-    {py3.8,py3.10,py3.11}-huggingface_hub-v0.22.2
-    {py3.8,py3.11,py3.12}-huggingface_hub-v0.26.5
+    {py3.8,py3.10,py3.11}-huggingface_hub-v0.24.7
+    {py3.8,py3.12,py3.13}-huggingface_hub-v0.27.1
     {py3.8,py3.12,py3.13}-huggingface_hub-v0.30.2
     {py3.8,py3.12,py3.13}-huggingface_hub-v0.34.4
     {py3.8,py3.12,py3.13}-huggingface_hub-v0.35.0rc0
@@ -509,8 +509,8 @@ deps =
     openai_agents-v0.2.11: openai-agents==0.2.11
     openai_agents: pytest-asyncio
 
-    huggingface_hub-v0.22.2: huggingface_hub==0.22.2
-    huggingface_hub-v0.26.5: huggingface_hub==0.26.5
+    huggingface_hub-v0.24.7: huggingface_hub==0.24.7
+    huggingface_hub-v0.27.1: huggingface_hub==0.27.1
     huggingface_hub-v0.30.2: huggingface_hub==0.30.2
     huggingface_hub-v0.34.4: huggingface_hub==0.34.4
     huggingface_hub-v0.35.0rc0: huggingface_hub==0.35.0rc0

From e45a9c7f8b88d8a731b31d168eabd6bfd11699c6 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 15:14:26 +0200
Subject: [PATCH 20/43] pii testing

---
 .../huggingface_hub/test_huggingface_hub.py   | 53 +++++++++++++++----
 1 file changed, 43 insertions(+), 10 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 7c4f61128c..dfa423371a 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -55,7 +55,7 @@ def mock_hf_text_generation_api():
             responses.POST,
             INFERENCE_ENDPOINT.format(model_name=model_name),
             json={
-                "generated_text": "Mocked response",
+                "generated_text": "[mocked] Hello! How can i help you?",
                 "details": {
                     "finish_reason": "length",
                     "generated_tokens": 10,
@@ -109,7 +109,7 @@ def mock_hf_chat_completion_api():
                         "finish_reason": "stop",
                         "message": {
                             "role": "assistant",
-                            "content": "Hello! How can I help you today?",
+                            "content": "[mocked] Hello! How can I help you today?",
                         },
                     }
                 ],
@@ -125,9 +125,12 @@ def mock_hf_chat_completion_api():
         yield rsps
 
 
-def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_api):
-    # type: (Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0)
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_text_generation(
+    sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api
+):
+    # type: (Any, Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
     client = InferenceClient(
@@ -146,7 +149,8 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap
 
     assert span["op"] == "gen_ai.generate_text"
     assert span["description"] == "generate_text test-model"
-    assert span["data"] == {
+
+    expected_data = {
         "gen_ai.operation.name": "generate_text",
         "gen_ai.request.model": "test-model",
         "gen_ai.response.finish_reasons": "length",
@@ -155,13 +159,27 @@ def test_text_generation(sentry_init, capture_events, mock_hf_text_generation_ap
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
+
+    if send_default_pii:
+        expected_data["gen_ai.request.messages"] = "Hello"
+        expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?"
+
+    if not send_default_pii:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+
+    assert span["data"] == expected_data
+
     # text generation does not set the response model
     assert "gen_ai.response.model" not in span["data"]
 
 
-def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_api):
-    # type: (Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0)
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_chat_completion(
+    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api
+):
+    # type: (Any, Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
     client = InferenceClient(
@@ -179,7 +197,8 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap
 
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
-    assert span["data"] == {
+
+    expected_data = {
         "gen_ai.operation.name": "chat",
         "gen_ai.request.model": "test-model",
         "gen_ai.response.finish_reasons": "stop",
@@ -191,3 +210,17 @@ def test_chat_completion(sentry_init, capture_events, mock_hf_chat_completion_ap
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
+
+    if send_default_pii:
+        expected_data["gen_ai.request.messages"] = (
+            '[{"role": "user", "content": "Hello!"}]'
+        )
+        expected_data["gen_ai.response.text"] = (
+            "[mocked] Hello! How can I help you today?"
+        )
+
+    if not send_default_pii:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+
+    assert span["data"] == expected_data

From cce5d8512781bcea8235d0441188ed34379c3add Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 17:34:12 +0200
Subject: [PATCH 21/43] streaming text genreation response

---
 sentry_sdk/integrations/huggingface_hub.py    |  9 ++
 .../huggingface_hub/test_huggingface_hub.py   | 93 +++++++++++++++++++
 2 files changed, 102 insertions(+)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index e1e7a2058e..214dc21210 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -230,7 +230,16 @@ def new_details_iterator():
                                 x.details, "generated_tokens"
                             ):
                                 tokens_used = x.details.generated_tokens
+                            if hasattr(x, "details") and hasattr(
+                                x.details, "finish_reason"
+                            ):
+                                span.set_data(
+                                    SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                                    x.details.finish_reason,
+                                )
+
                             yield x
+
                         if (
                             len(data_buf) > 0
                             and should_send_default_pii()
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index dfa423371a..db655b525b 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -69,6 +69,49 @@ def mock_hf_text_generation_api():
         yield rsps
 
 
+@pytest.fixture
+def mock_hf_text_generation_api_streaming():
+    # type: () -> Any
+    """Mock streaming HuggingFace text generation API"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            MODEL_ENDPOINT.format(model_name=model_name),
+            json={
+                "id": model_name,
+                "pipeline_tag": "text-generation",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "text-generation",
+                    }
+                },
+            },
+            status=200,
+        )
+
+        # Mock text generation endpoint for streaming
+        streaming_response = b'data:{"token":{"id":1, "special": false, "text": "the mocked "}}\n\ndata:{"token":{"id":2, "special": false, "text": "model response"}, "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}}\n\n'
+
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name),
+            body=streaming_response,
+            status=200,
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+
+        yield rsps
+
+
 @pytest.fixture
 def mock_hf_chat_completion_api():
     # type: () -> Any
@@ -174,6 +217,56 @@ def test_text_generation(
     assert "gen_ai.response.model" not in span["data"]
 
 
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_text_generation_streaming(
+    sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api_streaming
+):
+    # type: (Any, Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    events = capture_events()
+
+    client = InferenceClient(
+        model="test-model",
+    )
+
+    with sentry_sdk.start_transaction(name="test"):
+        for _ in client.text_generation(
+            prompt="Hello",
+            stream=True,
+            details=True,
+        ):
+            pass
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+
+    assert span["op"] == "gen_ai.generate_text"
+    assert span["description"] == "generate_text test-model"
+
+    expected_data = {
+        "gen_ai.operation.name": "generate_text",
+        "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "length",
+        "gen_ai.response.streaming": True,
+        "gen_ai.usage.total_tokens": 10,
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+
+    if send_default_pii:
+        expected_data["gen_ai.request.messages"] = "Hello"
+        expected_data["gen_ai.response.text"] = "the mocked model response"
+
+    if not send_default_pii:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+
+    assert span["data"] == expected_data
+
+    # text generation does not set the response model
+    assert "gen_ai.response.model" not in span["data"]
+
+
 @pytest.mark.parametrize("send_default_pii", [True, False])
 def test_chat_completion(
     sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api

From 07d611c3256b53be531e125eb5c7bf662b92c36d Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 20:10:56 +0200
Subject: [PATCH 22/43] chat completion streaming

---
 sentry_sdk/integrations/huggingface_hub.py    |  51 +++++++--
 .../huggingface_hub/test_huggingface_hub.py   | 100 ++++++++++++++++++
 2 files changed, 141 insertions(+), 10 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 214dc21210..0eb51904db 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -1,6 +1,5 @@
 from functools import wraps
 
-
 from typing import Any, Iterable, Callable
 
 import sentry_sdk
@@ -17,7 +16,11 @@
 try:
     import huggingface_hub.inference._client
 
-    from huggingface_hub import ChatCompletionOutput, TextGenerationOutput
+    from huggingface_hub import (
+        ChatCompletionOutput,
+        TextGenerationOutput,
+        ChatCompletionStreamOutput,
+    )
 except ImportError:
     raise DidNotEnable("Huggingface not installed")
 
@@ -217,7 +220,7 @@ def new_huggingface_task(*args, **kwargs):
                 return res
 
             if kwargs.get("details", False):
-
+                # text-generation stream output
                 def new_details_iterator():
                     # type: () -> Iterable[Any]
                     with capture_internal_exceptions():
@@ -257,20 +260,47 @@ def new_details_iterator():
                                 span,
                                 total_tokens=tokens_used,
                             )
+
                     span.__exit__(None, None, None)
 
                 return new_details_iterator()
             else:
-                # res is Iterable[str]
-
+                # chat-completion stream output
                 def new_iterator():
                     # type: () -> Iterable[str]
-                    data_buf: list[str] = []
                     with capture_internal_exceptions():
-                        for s in res:
-                            if isinstance(s, str):
-                                data_buf.append(s)
-                            yield s
+                        data_buf: list[str] = []
+                        for chunk in res:
+                            if isinstance(chunk, ChatCompletionStreamOutput):
+                                for choice in chunk.choices:
+                                    data_buf.append(choice.delta.content)
+
+                                    if (
+                                        hasattr(choice, "finish_reason")
+                                        and choice.finish_reason is not None
+                                    ):
+                                        span.set_data(
+                                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                                            choice.finish_reason,
+                                        )
+                                if hasattr(chunk, "model") and chunk.model is not None:
+                                    span.set_data(
+                                        SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model
+                                    )
+
+                                if hasattr(chunk, "usage") and chunk.usage is not None:
+                                    record_token_usage(
+                                        span,
+                                        input_tokens=chunk.usage.prompt_tokens,
+                                        output_tokens=chunk.usage.completion_tokens,
+                                        total_tokens=chunk.usage.total_tokens,
+                                    )
+
+                            elif isinstance(chunk, str):
+                                data_buf.append(chunk)
+
+                            yield chunk
+
                         if (
                             len(data_buf) > 0
                             and should_send_default_pii()
@@ -283,6 +313,7 @@ def new_iterator():
                                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                                     text_response,
                                 )
+
                         span.__exit__(None, None, None)
 
                 return new_iterator()
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index db655b525b..02a9a32b3d 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -168,6 +168,53 @@ def mock_hf_chat_completion_api():
         yield rsps
 
 
+@pytest.fixture
+def mock_hf_chat_completion_api_streaming():
+    # type: () -> Any
+    """Mock streaming HuggingFace chat completion API"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            MODEL_ENDPOINT.format(model_name=model_name),
+            json={
+                "id": model_name,
+                "pipeline_tag": "conversational",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "conversational",
+                    }
+                },
+            },
+            status=200,
+        )
+
+        # Mock chat completion streaming endpoint
+        streaming_chat_response = (
+            b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"the mocked "},"index":0,"finish_reason":null}],"usage":null}\n\n'
+            b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"model response"},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n'
+            # b'data:[DONE]\n\n'
+        )
+
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
+            body=streaming_chat_response,
+            status=200,
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+
+        yield rsps
+
+
 @pytest.mark.parametrize("send_default_pii", [True, False])
 def test_text_generation(
     sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api
@@ -317,3 +364,56 @@ def test_chat_completion(
         assert "gen_ai.response.text" not in expected_data
 
     assert span["data"] == expected_data
+
+
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_chat_completion_streaming(
+    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_streaming
+):
+    # type: (Any, Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    events = capture_events()
+
+    client = InferenceClient(
+        model="test-model",
+    )
+
+    with sentry_sdk.start_transaction(name="test"):
+        response = client.chat_completion(
+            messages=[{"role": "user", "content": "Hello!"}],
+            stream=True,
+        )
+
+        for x in response:
+            print(x)
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+
+    assert span["op"] == "gen_ai.chat"
+    assert span["description"] == "chat test-model"
+
+    expected_data = {
+        "gen_ai.operation.name": "chat",
+        "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "stop",
+        "gen_ai.response.model": "test-model-123",
+        "gen_ai.response.streaming": True,
+        "gen_ai.usage.input_tokens": 183,
+        "gen_ai.usage.output_tokens": 14,
+        "gen_ai.usage.total_tokens": 197,
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+
+    if send_default_pii:
+        expected_data["gen_ai.request.messages"] = (
+            '[{"role": "user", "content": "Hello!"}]'
+        )
+        expected_data["gen_ai.response.text"] = "the mocked model response"
+
+    if not send_default_pii:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+
+    assert span["data"] == expected_data

From 8d54b811764be00fb033b628fe3da182cc4789aa Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 20:12:02 +0200
Subject: [PATCH 23/43] cleanup

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 02a9a32b3d..1a79f569d1 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -197,7 +197,6 @@ def mock_hf_chat_completion_api_streaming():
         streaming_chat_response = (
             b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"the mocked "},"index":0,"finish_reason":null}],"usage":null}\n\n'
             b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"model response"},"index":0,"finish_reason":"stop"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n'
-            # b'data:[DONE]\n\n'
         )
 
         rsps.add(

From d8c89a37cefd7b0cca22bc3364ce4c828cdc01fc Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 20:23:44 +0200
Subject: [PATCH 24/43] make it work with older huggingface sdk

---
 .../integrations/huggingface_hub/test_huggingface_hub.py  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 1a79f569d1..7131459bfd 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -398,12 +398,14 @@ def test_chat_completion_streaming(
         "gen_ai.response.finish_reasons": "stop",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
-        "gen_ai.usage.input_tokens": 183,
-        "gen_ai.usage.output_tokens": 14,
-        "gen_ai.usage.total_tokens": 197,
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
+    # usage is not available in older versions of the library
+    if HF_VERSION and HF_VERSION >= (0, 26, 0):
+        expected_data["gen_ai.usage.input_tokens"] = (183,)
+        expected_data["gen_ai.usage.output_tokens"] = (14,)
+        expected_data["gen_ai.usage.total_tokens"] = (197,)
 
     if send_default_pii:
         expected_data["gen_ai.request.messages"] = (

From f4f6d6dbe7e3020b1623f4ebde57c94ed20e866f Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 20:24:00 +0200
Subject: [PATCH 25/43] make it work with older huggingface sdk

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 7131459bfd..8753ab847d 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -403,9 +403,9 @@ def test_chat_completion_streaming(
     }
     # usage is not available in older versions of the library
     if HF_VERSION and HF_VERSION >= (0, 26, 0):
-        expected_data["gen_ai.usage.input_tokens"] = (183,)
-        expected_data["gen_ai.usage.output_tokens"] = (14,)
-        expected_data["gen_ai.usage.total_tokens"] = (197,)
+        expected_data["gen_ai.usage.input_tokens"] = 183
+        expected_data["gen_ai.usage.output_tokens"] = 14
+        expected_data["gen_ai.usage.total_tokens"] = 197
 
     if send_default_pii:
         expected_data["gen_ai.request.messages"] = (

From e5ffe0c8d9cec917eb25dc6257122d67ed78b02a Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Mon, 8 Sep 2025 20:51:07 +0200
Subject: [PATCH 26/43] testing for error

---
 .../huggingface_hub/test_huggingface_hub.py   | 101 +++++++++++++++---
 1 file changed, 89 insertions(+), 12 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 8753ab847d..537be1c5b8 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -2,6 +2,7 @@
 import pytest
 import responses
 
+import huggingface_hub
 from huggingface_hub import InferenceClient
 
 import sentry_sdk
@@ -69,6 +70,48 @@ def mock_hf_text_generation_api():
         yield rsps
 
 
+@pytest.fixture
+def mock_hf_api_with_errors():
+    # type: () -> Any
+    """Mock HuggingFace API that always raises errors for any request"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint with error
+        rsps.add(
+            responses.GET,
+            MODEL_ENDPOINT.format(model_name=model_name),
+            json={"error": "Model not found"},
+            status=404,
+        )
+
+        # Mock text generation endpoint with error
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name),
+            json={"error": "Internal server error", "message": "Something went wrong"},
+            status=500,
+        )
+
+        # Mock chat completion endpoint with error
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
+            json={"error": "Service unavailable", "message": "Chat completion failed"},
+            status=503,
+        )
+
+        # Catch-all pattern for any other model requests
+        rsps.add(
+            responses.GET,
+            "https://huggingface.co/api/models/test-model-error",
+            json={"error": "Generic model error"},
+            status=500,
+        )
+
+        yield rsps
+
+
 @pytest.fixture
 def mock_hf_text_generation_api_streaming():
     # type: () -> Any
@@ -222,9 +265,7 @@ def test_text_generation(
     sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
-    client = InferenceClient(
-        model="test-model",
-    )
+    client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
         client.text_generation(
@@ -271,9 +312,7 @@ def test_text_generation_streaming(
     sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
-    client = InferenceClient(
-        model="test-model",
-    )
+    client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
         for _ in client.text_generation(
@@ -321,9 +360,7 @@ def test_chat_completion(
     sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
-    client = InferenceClient(
-        model="test-model",
-    )
+    client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
         client.chat_completion(
@@ -373,9 +410,7 @@ def test_chat_completion_streaming(
     sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
     events = capture_events()
 
-    client = InferenceClient(
-        model="test-model",
-    )
+    client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
         response = client.chat_completion(
@@ -418,3 +453,45 @@ def test_chat_completion_streaming(
         assert "gen_ai.response.text" not in expected_data
 
     assert span["data"] == expected_data
+
+
+def test_chat_completion_api_error(
+    sentry_init, capture_events, mock_hf_api_with_errors
+):
+    # type: (Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0)
+    events = capture_events()
+
+    client = InferenceClient(model="test-model")
+
+    with sentry_sdk.start_transaction(name="test"):
+        with pytest.raises(huggingface_hub.errors.HfHubHTTPError):
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+            )
+
+    (
+        error,
+        transaction,
+    ) = events
+
+    assert error["exception"]["values"][0]["mechanism"]["type"] == "huggingface_hub"
+    assert not error["exception"]["values"][0]["mechanism"]["handled"]
+
+    (span,) = transaction["spans"]
+
+    assert span["op"] == "gen_ai.chat"
+    assert span["description"] == "chat test-model"
+    assert span.get("tags", {}).get("status") == "error"
+
+    assert (
+        error["contexts"]["trace"]["trace_id"]
+        == transaction["contexts"]["trace"]["trace_id"]
+    )
+    expected_data = {
+        "gen_ai.operation.name": "chat",
+        "gen_ai.request.model": "test-model",
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+    assert span["data"] == expected_data

From c8e62df5a0dd7b6bfc17963fdc53dbe16aa14ad3 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 10:25:40 +0200
Subject: [PATCH 27/43] fixed hanging test

---
 .../huggingface_hub/test_huggingface_hub.py        |  4 ++--
 tox.ini                                            | 14 +++++++-------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 537be1c5b8..4b2e9e5711 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -97,8 +97,8 @@ def mock_hf_api_with_errors():
         rsps.add(
             responses.POST,
             INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
-            json={"error": "Service unavailable", "message": "Chat completion failed"},
-            status=503,
+            json={"error": "Internal server error", "message": "Something went wrong"},
+            status=500,
         )
 
         # Catch-all pattern for any other model requests
diff --git a/tox.ini b/tox.ini
index 6026278fb9..1bc9757b9a 100644
--- a/tox.ini
+++ b/tox.ini
@@ -10,7 +10,7 @@
 # The file (and all resulting CI YAMLs) then need to be regenerated via
 # "scripts/generate-test-files.sh".
 #
-# Last generated: 2025-09-08T12:54:55.709539+00:00
+# Last generated: 2025-09-09T08:24:12.875177+00:00
 
 [tox]
 requires =
@@ -116,12 +116,12 @@ envlist =
     {py3.8,py3.11,py3.12}-openai-base-v1.0.1
     {py3.8,py3.11,py3.12}-openai-base-v1.36.1
     {py3.8,py3.11,py3.12}-openai-base-v1.71.0
-    {py3.8,py3.12,py3.13}-openai-base-v1.106.1
+    {py3.8,py3.12,py3.13}-openai-base-v1.107.0
 
     {py3.8,py3.11,py3.12}-openai-notiktoken-v1.0.1
     {py3.8,py3.11,py3.12}-openai-notiktoken-v1.36.1
     {py3.8,py3.11,py3.12}-openai-notiktoken-v1.71.0
-    {py3.8,py3.12,py3.13}-openai-notiktoken-v1.106.1
+    {py3.8,py3.12,py3.13}-openai-notiktoken-v1.107.0
 
     {py3.9,py3.12,py3.13}-langgraph-v0.6.7
     {py3.10,py3.12,py3.13}-langgraph-v1.0.0a3
@@ -141,7 +141,7 @@ envlist =
     {py3.6,py3.7}-boto3-v1.12.49
     {py3.6,py3.9,py3.10}-boto3-v1.20.54
     {py3.7,py3.11,py3.12}-boto3-v1.28.85
-    {py3.9,py3.12,py3.13}-boto3-v1.40.25
+    {py3.9,py3.12,py3.13}-boto3-v1.40.26
 
     {py3.6,py3.7,py3.8}-chalice-v1.16.0
     {py3.6,py3.7,py3.8}-chalice-v1.21.9
@@ -487,7 +487,7 @@ deps =
     openai-base-v1.0.1: openai==1.0.1
     openai-base-v1.36.1: openai==1.36.1
     openai-base-v1.71.0: openai==1.71.0
-    openai-base-v1.106.1: openai==1.106.1
+    openai-base-v1.107.0: openai==1.107.0
     openai-base: pytest-asyncio
     openai-base: tiktoken
     openai-base-v1.0.1: httpx<0.28
@@ -496,7 +496,7 @@ deps =
     openai-notiktoken-v1.0.1: openai==1.0.1
     openai-notiktoken-v1.36.1: openai==1.36.1
     openai-notiktoken-v1.71.0: openai==1.71.0
-    openai-notiktoken-v1.106.1: openai==1.106.1
+    openai-notiktoken-v1.107.0: openai==1.107.0
     openai-notiktoken: pytest-asyncio
     openai-notiktoken-v1.0.1: httpx<0.28
     openai-notiktoken-v1.36.1: httpx<0.28
@@ -521,7 +521,7 @@ deps =
     boto3-v1.12.49: boto3==1.12.49
     boto3-v1.20.54: boto3==1.20.54
     boto3-v1.28.85: boto3==1.28.85
-    boto3-v1.40.25: boto3==1.40.25
+    boto3-v1.40.26: boto3==1.40.26
     {py3.7,py3.8}-boto3: urllib3<2.0.0
 
     chalice-v1.16.0: chalice==1.16.0

From 06f3746e5a08bc8a39609edd509b5b33381610b0 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 10:38:45 +0200
Subject: [PATCH 28/43] fix test

---
 .../integrations/huggingface_hub/test_huggingface_hub.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 4b2e9e5711..dd0f5ad333 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -2,7 +2,6 @@
 import pytest
 import responses
 
-import huggingface_hub
 from huggingface_hub import InferenceClient
 
 import sentry_sdk
@@ -10,6 +9,12 @@
 
 from typing import TYPE_CHECKING
 
+try:
+    from huggingface_hub.utils._errors import HfHubHTTPError
+except ImportError:
+    from huggingface_hub.errors import HfHubHTTPError
+
+
 if TYPE_CHECKING:
     from typing import Any
 
@@ -465,7 +470,7 @@ def test_chat_completion_api_error(
     client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
-        with pytest.raises(huggingface_hub.errors.HfHubHTTPError):
+        with pytest.raises(HfHubHTTPError):
             client.chat_completion(
                 messages=[{"role": "user", "content": "Hello!"}],
             )

From 711cf515301bffeacf460a5276ec07929b8b2f0e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 11:13:59 +0200
Subject: [PATCH 29/43] Tool calls test

---
 .../huggingface_hub/test_huggingface_hub.py   | 133 ++++++++++++++++++
 1 file changed, 133 insertions(+)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index dd0f5ad333..961196dbda 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -216,6 +216,71 @@ def mock_hf_chat_completion_api():
         yield rsps
 
 
+@pytest.fixture
+def mock_hf_chat_completion_api_tools():
+    # type: () -> Any
+    """Mock HuggingFace chat completion API"""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            MODEL_ENDPOINT.format(model_name=model_name),
+            json={
+                "id": model_name,
+                "pipeline_tag": "conversational",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "conversational",
+                    }
+                },
+            },
+            status=200,
+        )
+
+        # Mock chat completion endpoint
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
+            json={
+                "id": "xyz-123",
+                "created": 1234567890,
+                "model": f"{model_name}-123",
+                "system_fingerprint": "fp_123",
+                "choices": [
+                    {
+                        "index": 0,
+                        "finish_reason": "tool_calls",
+                        "message": {
+                            "role": "assistant",
+                            "tool_calls": [
+                                {
+                                    "id": "call_123",
+                                    "type": "function",
+                                    "function": {
+                                        "name": "get_weather",
+                                        "arguments": {"location": "Paris"},
+                                    },
+                                }
+                            ],
+                        },
+                    }
+                ],
+                "usage": {
+                    "completion_tokens": 8,
+                    "prompt_tokens": 10,
+                    "total_tokens": 18,
+                },
+            },
+            status=200,
+        )
+
+        yield rsps
+
+
 @pytest.fixture
 def mock_hf_chat_completion_api_streaming():
     # type: () -> Any
@@ -500,3 +565,71 @@ def test_chat_completion_api_error(
         "thread.name": mock.ANY,
     }
     assert span["data"] == expected_data
+
+
+@pytest.mark.parametrize("send_default_pii", [True, False])
+def test_chat_completion_with_tools(
+    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_tools
+):
+    # type: (Any, Any, Any, Any) -> None
+    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    events = capture_events()
+
+    client = InferenceClient(model="test-model")
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"location": {"type": "string"}},
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    with sentry_sdk.start_transaction(name="test"):
+        client.chat_completion(
+            messages=[{"role": "user", "content": "What is the weather in Paris?"}],
+            tools=tools,
+            tool_choice="auto",
+        )
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+
+    assert span["op"] == "gen_ai.chat"
+    assert span["description"] == "chat test-model"
+
+    expected_data = {
+        "gen_ai.operation.name": "chat",
+        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": '
+        '"get_weather", "description": "Get current '
+        'weather", "parameters": {"type": "object", '
+        '"properties": {"location": {"type": '
+        '"string"}}, "required": ["location"]}}}]',
+        "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "tool_calls",
+        "gen_ai.response.model": "test-model-123",
+        "gen_ai.response.tool_calls": '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]',
+        "gen_ai.usage.input_tokens": 10,
+        "gen_ai.usage.output_tokens": 8,
+        "gen_ai.usage.total_tokens": 18,
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+
+    if send_default_pii:
+        expected_data["gen_ai.request.messages"] = (
+            '[{"role": "user", "content": "What is the weather in Paris?"}]'
+        )
+
+    if not send_default_pii:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+
+    assert span["data"] == expected_data

From 550234004f9781fb40519f996747bb56f8a8762e Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 11:14:35 +0200
Subject: [PATCH 30/43] Tool calls test

---
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 961196dbda..74650a20b4 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -607,11 +607,7 @@ def test_chat_completion_with_tools(
 
     expected_data = {
         "gen_ai.operation.name": "chat",
-        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": '
-        '"get_weather", "description": "Get current '
-        'weather", "parameters": {"type": "object", '
-        '"properties": {"location": {"type": '
-        '"string"}}, "required": ["location"]}}}]',
+        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
         "gen_ai.request.model": "test-model",
         "gen_ai.response.finish_reasons": "tool_calls",
         "gen_ai.response.model": "test-model-123",

From 00c9727f2b8c8531c0267e0ffca0d8dc7af5a136 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 11:26:47 +0200
Subject: [PATCH 31/43] Some test improvements

---
 .../integrations/huggingface_hub/test_huggingface_hub.py  | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 74650a20b4..91f9e7e892 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -339,7 +339,7 @@ def test_text_generation(
 
     with sentry_sdk.start_transaction(name="test"):
         client.text_generation(
-            prompt="Hello",
+            "Hello",
             stream=False,
             details=True,
         )
@@ -349,6 +349,7 @@ def test_text_generation(
 
     assert span["op"] == "gen_ai.generate_text"
     assert span["description"] == "generate_text test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "generate_text",
@@ -397,6 +398,7 @@ def test_text_generation_streaming(
 
     assert span["op"] == "gen_ai.generate_text"
     assert span["description"] == "generate_text test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "generate_text",
@@ -443,6 +445,7 @@ def test_chat_completion(
 
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -496,6 +499,7 @@ def test_chat_completion_streaming(
 
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",
@@ -552,6 +556,7 @@ def test_chat_completion_api_error(
 
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
     assert span.get("tags", {}).get("status") == "error"
 
     assert (
@@ -604,6 +609,7 @@ def test_chat_completion_with_tools(
 
     assert span["op"] == "gen_ai.chat"
     assert span["description"] == "chat test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
 
     expected_data = {
         "gen_ai.operation.name": "chat",

From 81cbbeb478e6c412de375d8e00a73643189bc591 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 11:47:16 +0200
Subject: [PATCH 32/43] tools improvements

---
 sentry_sdk/integrations/huggingface_hub.py    | 72 ++++++++++++-------
 .../huggingface_hub/test_huggingface_hub.py   |  4 +-
 2 files changed, 50 insertions(+), 26 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 0eb51904db..413c3c05b4 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -146,24 +146,24 @@ def new_huggingface_task(*args, **kwargs):
             if finish_reason:
                 span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason)
 
-            try:
-                tool_calls = res.choices[0].message.tool_calls
-            except Exception:
-                tool_calls = []
-
-            if tool_calls is not None and len(tool_calls) > 0:
-                set_data_normalized(
-                    span,
-                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
-                    tool_calls,
-                    unpack=False,
-                )
-
             if should_send_default_pii() and integration.include_prompts:
                 set_data_normalized(
                     span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False
                 )
 
+                try:
+                    tool_calls = res.choices[0].message.tool_calls
+                except Exception:
+                    tool_calls = []
+
+                if tool_calls is not None and len(tool_calls) > 0:
+                    set_data_normalized(
+                        span,
+                        SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                        tool_calls,
+                        unpack=False,
+                    )
+
             if isinstance(res, str):
                 if should_send_default_pii() and integration.include_prompts:
                     if res:
@@ -226,22 +226,22 @@ def new_details_iterator():
                     with capture_internal_exceptions():
                         tokens_used = 0
                         data_buf: list[str] = []
-                        for x in res:
-                            if hasattr(x, "token") and hasattr(x.token, "text"):
-                                data_buf.append(x.token.text)
-                            if hasattr(x, "details") and hasattr(
-                                x.details, "generated_tokens"
+                        for chunk in res:
+                            if hasattr(chunk, "token") and hasattr(chunk.token, "text"):
+                                data_buf.append(chunk.token.text)
+                            if hasattr(chunk, "details") and hasattr(
+                                chunk.details, "generated_tokens"
                             ):
-                                tokens_used = x.details.generated_tokens
-                            if hasattr(x, "details") and hasattr(
-                                x.details, "finish_reason"
+                                tokens_used = chunk.details.generated_tokens
+                            if hasattr(chunk, "details") and hasattr(
+                                chunk.details, "finish_reason"
                             ):
                                 span.set_data(
                                     SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
-                                    x.details.finish_reason,
+                                    chunk.details.finish_reason,
                                 )
 
-                            yield x
+                            yield chunk
 
                         if (
                             len(data_buf) > 0
@@ -273,7 +273,12 @@ def new_iterator():
                         for chunk in res:
                             if isinstance(chunk, ChatCompletionStreamOutput):
                                 for choice in chunk.choices:
-                                    data_buf.append(choice.delta.content)
+                                    if (
+                                        hasattr(choice, "delta")
+                                        and hasattr(choice.delta, "content")
+                                        and choice.delta.content is not None
+                                    ):
+                                        data_buf.append(choice.delta.content)
 
                                     if (
                                         hasattr(choice, "finish_reason")
@@ -283,6 +288,22 @@ def new_iterator():
                                             SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
                                             choice.finish_reason,
                                         )
+                                    if (
+                                        hasattr(choice, "delta")
+                                        and hasattr(choice.delta, "tool_calls")
+                                        and choice.delta.tool_calls is not None
+                                    ):
+                                        if (
+                                            should_send_default_pii()
+                                            and integration.include_prompts
+                                        ):
+                                            set_data_normalized(
+                                                span,
+                                                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                                                choice.delta.tool_calls,
+                                                unpack=False,
+                                            )
+
                                 if hasattr(chunk, "model") and chunk.model is not None:
                                     span.set_data(
                                         SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model
@@ -297,7 +318,8 @@ def new_iterator():
                                     )
 
                             elif isinstance(chunk, str):
-                                data_buf.append(chunk)
+                                if chunk is not None:
+                                    data_buf.append(chunk)
 
                             yield chunk
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 91f9e7e892..d345791b7f 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -617,7 +617,6 @@ def test_chat_completion_with_tools(
         "gen_ai.request.model": "test-model",
         "gen_ai.response.finish_reasons": "tool_calls",
         "gen_ai.response.model": "test-model-123",
-        "gen_ai.response.tool_calls": '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]',
         "gen_ai.usage.input_tokens": 10,
         "gen_ai.usage.output_tokens": 8,
         "gen_ai.usage.total_tokens": 18,
@@ -629,6 +628,9 @@ def test_chat_completion_with_tools(
         expected_data["gen_ai.request.messages"] = (
             '[{"role": "user", "content": "What is the weather in Paris?"}]'
         )
+        expected_data["gen_ai.response.tool_calls"] = (
+            '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]'
+        )
 
     if not send_default_pii:
         assert "gen_ai.request.messages" not in expected_data

From 52b21c586603d34b2e02f00e309dcd3b0c7b4ba8 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 12:06:55 +0200
Subject: [PATCH 33/43] better tests

---
 .../huggingface_hub/test_huggingface_hub.py   | 96 ++++++++++++++-----
 1 file changed, 71 insertions(+), 25 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index d345791b7f..b66f3911be 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -6,6 +6,7 @@
 
 import sentry_sdk
 from sentry_sdk.utils import package_version
+from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
 
 from typing import TYPE_CHECKING
 
@@ -328,11 +329,20 @@ def mock_hf_chat_completion_api_streaming():
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
-    sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_text_generation_api,
 ):
-    # type: (Any, Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
     events = capture_events()
 
     client = InferenceClient(model="test-model")
@@ -361,11 +371,11 @@ def test_text_generation(
         "thread.name": mock.ANY,
     }
 
-    if send_default_pii:
+    if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = "Hello"
         expected_data["gen_ai.response.text"] = "[mocked] Hello! How can i help you?"
 
-    if not send_default_pii:
+    if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
@@ -376,11 +386,20 @@ def test_text_generation(
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation_streaming(
-    sentry_init, capture_events, send_default_pii, mock_hf_text_generation_api_streaming
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_text_generation_api_streaming,
 ):
-    # type: (Any, Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
     events = capture_events()
 
     client = InferenceClient(model="test-model")
@@ -410,11 +429,11 @@ def test_text_generation_streaming(
         "thread.name": mock.ANY,
     }
 
-    if send_default_pii:
+    if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = "Hello"
         expected_data["gen_ai.response.text"] = "the mocked model response"
 
-    if not send_default_pii:
+    if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
@@ -425,11 +444,20 @@ def test_text_generation_streaming(
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion(
-    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api,
 ):
-    # type: (Any, Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
     events = capture_events()
 
     client = InferenceClient(model="test-model")
@@ -460,7 +488,7 @@ def test_chat_completion(
         "thread.name": mock.ANY,
     }
 
-    if send_default_pii:
+    if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = (
             '[{"role": "user", "content": "Hello!"}]'
         )
@@ -468,7 +496,7 @@ def test_chat_completion(
             "[mocked] Hello! How can I help you today?"
         )
 
-    if not send_default_pii:
+    if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
@@ -476,11 +504,20 @@ def test_chat_completion(
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_streaming(
-    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_streaming
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_streaming,
 ):
-    # type: (Any, Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
     events = capture_events()
 
     client = InferenceClient(model="test-model")
@@ -516,13 +553,13 @@ def test_chat_completion_streaming(
         expected_data["gen_ai.usage.output_tokens"] = 14
         expected_data["gen_ai.usage.total_tokens"] = 197
 
-    if send_default_pii:
+    if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = (
             '[{"role": "user", "content": "Hello!"}]'
         )
         expected_data["gen_ai.response.text"] = "the mocked model response"
 
-    if not send_default_pii:
+    if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 
@@ -573,11 +610,20 @@ def test_chat_completion_api_error(
 
 
 @pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
 def test_chat_completion_with_tools(
-    sentry_init, capture_events, send_default_pii, mock_hf_chat_completion_api_tools
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_tools,
 ):
-    # type: (Any, Any, Any, Any) -> None
-    sentry_init(traces_sample_rate=1.0, send_default_pii=send_default_pii)
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
     events = capture_events()
 
     client = InferenceClient(model="test-model")
@@ -624,7 +670,7 @@ def test_chat_completion_with_tools(
         "thread.name": mock.ANY,
     }
 
-    if send_default_pii:
+    if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = (
             '[{"role": "user", "content": "What is the weather in Paris?"}]'
         )
@@ -632,7 +678,7 @@ def test_chat_completion_with_tools(
             '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather", "description": "None"}, "id": "call_123", "type": "function"}]'
         )
 
-    if not send_default_pii:
+    if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
 

From bedc050d4f0d41ab7c16ad55f339aba40c51d363 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 13:00:35 +0200
Subject: [PATCH 34/43] more test coverage

---
 .../huggingface_hub/test_huggingface_hub.py   | 132 +++++++++++++++++-
 1 file changed, 131 insertions(+), 1 deletion(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index b66f3911be..db4e4c3754 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -220,7 +220,7 @@ def mock_hf_chat_completion_api():
 @pytest.fixture
 def mock_hf_chat_completion_api_tools():
     # type: () -> Any
-    """Mock HuggingFace chat completion API"""
+    """Mock HuggingFace chat completion API with tool calls."""
     with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
         model_name = "test-model"
 
@@ -328,6 +328,52 @@ def mock_hf_chat_completion_api_streaming():
         yield rsps
 
 
+@pytest.fixture
+def mock_hf_chat_completion_api_streaming_tools():
+    # type: () -> Any
+    """Mock streaming HuggingFace chat completion API with tool calls."""
+    with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
+        model_name = "test-model"
+
+        # Mock model info endpoint
+        rsps.add(
+            responses.GET,
+            MODEL_ENDPOINT.format(model_name=model_name),
+            json={
+                "id": model_name,
+                "pipeline_tag": "conversational",
+                "inferenceProviderMapping": {
+                    "hf-inference": {
+                        "status": "live",
+                        "providerId": model_name,
+                        "task": "conversational",
+                    }
+                },
+            },
+            status=200,
+        )
+
+        # Mock chat completion streaming endpoint
+        streaming_chat_response = (
+            b'data:{"id":"xyz-123","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","content":"response with tool calls follows"},"index":0,"finish_reason":null}],"usage":null}\n\n'
+            b'data:{"id":"xyz-124","created":1234567890,"model":"test-model-123","system_fingerprint":"fp_123","choices":[{"delta":{"role":"assistant","tool_calls": [{"id": "call_123","type": "function","function": {"name": "get_weather", "arguments": {"location": "Paris"}}}]},"index":0,"finish_reason":"tool_calls"}],"usage":{"prompt_tokens":183,"completion_tokens":14,"total_tokens":197}}\n\n'
+        )
+
+        rsps.add(
+            responses.POST,
+            INFERENCE_ENDPOINT.format(model_name=model_name) + "/v1/chat/completions",
+            body=streaming_chat_response,
+            status=200,
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+            },
+        )
+
+        yield rsps
+
+
 @pytest.mark.parametrize("send_default_pii", [True, False])
 @pytest.mark.parametrize("include_prompts", [True, False])
 def test_text_generation(
@@ -681,5 +727,89 @@ def test_chat_completion_with_tools(
     if not send_default_pii or not include_prompts:
         assert "gen_ai.request.messages" not in expected_data
         assert "gen_ai.response.text" not in expected_data
+        assert "gen_ai.response.tool_calls" not in expected_data
+
+    assert span["data"] == expected_data
+
+
+@pytest.mark.parametrize("send_default_pii", [True, False])
+@pytest.mark.parametrize("include_prompts", [True, False])
+def test_chat_completion_streaming_with_tools(
+    sentry_init,
+    capture_events,
+    send_default_pii,
+    include_prompts,
+    mock_hf_chat_completion_api_streaming_tools,
+):
+    # type: (Any, Any, Any, Any, Any) -> None
+    sentry_init(
+        traces_sample_rate=1.0,
+        send_default_pii=send_default_pii,
+        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
+    )
+    events = capture_events()
+
+    client = InferenceClient(model="test-model")
+
+    tools = [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"location": {"type": "string"}},
+                    "required": ["location"],
+                },
+            },
+        }
+    ]
+
+    with sentry_sdk.start_transaction(name="test"):
+        response = client.chat_completion(
+            messages=[{"role": "user", "content": "What is the weather in Paris?"}],
+            stream=True,
+            tools=tools,
+            tool_choice="auto",
+        )
+
+        for x in response:
+            print(x)
+
+    (transaction,) = events
+    (span,) = transaction["spans"]
+
+    assert span["op"] == "gen_ai.chat"
+    assert span["description"] == "chat test-model"
+    assert span["origin"] == "auto.ai.huggingface_hub"
+
+    expected_data = {
+        "gen_ai.operation.name": "chat",
+        "gen_ai.request.available_tools": '[{"type": "function", "function": {"name": "get_weather", "description": "Get current weather", "parameters": {"type": "object", "properties": {"location": {"type": "string"}}, "required": ["location"]}}}]',
+        "gen_ai.request.model": "test-model",
+        "gen_ai.response.finish_reasons": "tool_calls",
+        "gen_ai.response.model": "test-model-123",
+        "gen_ai.response.streaming": True,
+        "gen_ai.usage.input_tokens": 183,
+        "gen_ai.usage.output_tokens": 14,
+        "gen_ai.usage.total_tokens": 197,
+        "thread.id": mock.ANY,
+        "thread.name": mock.ANY,
+    }
+
+    if send_default_pii and include_prompts:
+        expected_data["gen_ai.request.messages"] = (
+            '[{"role": "user", "content": "What is the weather in Paris?"}]'
+        )
+        expected_data["gen_ai.response.text"] = "response with tool calls follows"
+        expected_data["gen_ai.response.tool_calls"] = (
+            '[{"function": {"arguments": {"location": "Paris"}, "name": "get_weather"}, "id": "call_123", "type": "function", "index": "None"}]'
+        )
+
+    if not send_default_pii or not include_prompts:
+        assert "gen_ai.request.messages" not in expected_data
+        assert "gen_ai.response.text" not in expected_data
+        assert "gen_ai.response.tool_calls" not in expected_data
 
     assert span["data"] == expected_data

From e8717fdff174f629fe9c6afe0c459166283533a9 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 13:07:12 +0200
Subject: [PATCH 35/43] usage

---
 .../integrations/huggingface_hub/test_huggingface_hub.py  | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index db4e4c3754..c2fff8f35b 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -791,13 +791,15 @@ def test_chat_completion_streaming_with_tools(
         "gen_ai.response.finish_reasons": "tool_calls",
         "gen_ai.response.model": "test-model-123",
         "gen_ai.response.streaming": True,
-        "gen_ai.usage.input_tokens": 183,
-        "gen_ai.usage.output_tokens": 14,
-        "gen_ai.usage.total_tokens": 197,
         "thread.id": mock.ANY,
         "thread.name": mock.ANY,
     }
 
+    if HF_VERSION and HF_VERSION >= (0, 26, 0):
+        expected_data["gen_ai.usage.input_tokens"] = 183
+        expected_data["gen_ai.usage.output_tokens"] = 14
+        expected_data["gen_ai.usage.total_tokens"] = 197
+
     if send_default_pii and include_prompts:
         expected_data["gen_ai.request.messages"] = (
             '[{"role": "user", "content": "What is the weather in Paris?"}]'

From c837e14ca6da68701fed1c3a5582e0c2f7c10924 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 13:19:51 +0200
Subject: [PATCH 36/43] delete old tests

---
 .../old_test_huggingface_hub.py               | 185 ------------------
 1 file changed, 185 deletions(-)
 delete mode 100644 tests/integrations/huggingface_hub/old_test_huggingface_hub.py

diff --git a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py b/tests/integrations/huggingface_hub/old_test_huggingface_hub.py
deleted file mode 100644
index a9dc450168..0000000000
--- a/tests/integrations/huggingface_hub/old_test_huggingface_hub.py
+++ /dev/null
@@ -1,185 +0,0 @@
-import itertools
-from unittest import mock
-
-import pytest
-from huggingface_hub import (
-    InferenceClient,
-)
-from huggingface_hub.errors import OverloadedError
-
-from sentry_sdk import start_transaction
-from sentry_sdk.integrations.huggingface_hub import HuggingfaceHubIntegration
-
-
-def mock_client_post(client, post_mock):
-    # huggingface-hub==0.28.0 deprecates the `post` method
-    # so patch `_inner_post` instead
-    if hasattr(client, "post"):
-        client.post = post_mock
-    if hasattr(client, "_inner_post"):
-        client._inner_post = post_mock
-
-
-@pytest.mark.parametrize(
-    "send_default_pii, include_prompts, details_arg",
-    itertools.product([True, False], repeat=3),
-)
-def test_nonstreaming_chat_completion(
-    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
-):
-    sentry_init(
-        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        traces_sample_rate=1.0,
-        send_default_pii=send_default_pii,
-    )
-    events = capture_events()
-
-    client = InferenceClient(model="https://")
-
-    if details_arg:
-        post_mock = mock.Mock(
-            return_value=b"""[{
-                "generated_text": "the model response",
-                "details": {
-                    "finish_reason": "length",
-                    "generated_tokens": 10,
-                    "prefill": [],
-                    "tokens": []
-                }
-            }]"""
-        )
-    else:
-        post_mock = mock.Mock(
-            return_value=b'[{"generated_text": "the model response"}]'
-        )
-    mock_client_post(client, post_mock)
-
-    with start_transaction(name="huggingface_hub tx"):
-        response = client.text_generation(
-            prompt="hello",
-            details=details_arg,
-            stream=False,
-        )
-    if details_arg:
-        assert response.generated_text == "the model response"
-    else:
-        assert response == "the model response"
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.generate_text"
-
-    if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["gen_ai.request.messages"]
-        assert "the model response" in span["data"]["gen_ai.response.text"]
-    else:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
-
-    if details_arg:
-        assert span["data"]["gen_ai.usage.total_tokens"] == 10
-
-
-@pytest.mark.parametrize(
-    "send_default_pii, include_prompts, details_arg",
-    itertools.product([True, False], repeat=3),
-)
-def test_streaming_chat_completion(
-    sentry_init, capture_events, send_default_pii, include_prompts, details_arg
-):
-    sentry_init(
-        integrations=[HuggingfaceHubIntegration(include_prompts=include_prompts)],
-        traces_sample_rate=1.0,
-        send_default_pii=send_default_pii,
-    )
-    events = capture_events()
-
-    client = InferenceClient(model="https://")
-
-    post_mock = mock.Mock(
-        return_value=[
-            b"""data:{
-                "token":{"id":1, "special": false, "text": "the model "}
-            }""",
-            b"""data:{
-                "token":{"id":2, "special": false, "text": "response"},
-                "details":{"finish_reason": "length", "generated_tokens": 10, "seed": 0}
-            }""",
-        ]
-    )
-    mock_client_post(client, post_mock)
-
-    with start_transaction(name="huggingface_hub tx"):
-        response = list(
-            client.text_generation(
-                prompt="hello",
-                details=details_arg,
-                stream=True,
-            )
-        )
-    assert len(response) == 2
-    if details_arg:
-        assert response[0].token.text + response[1].token.text == "the model response"
-    else:
-        assert response[0] + response[1] == "the model response"
-
-    tx = events[0]
-    assert tx["type"] == "transaction"
-    span = tx["spans"][0]
-    assert span["op"] == "gen_ai.generate_text"
-
-    if send_default_pii and include_prompts:
-        assert "hello" in span["data"]["gen_ai.request.messages"]
-        assert "the model response" in span["data"]["gen_ai.response.text"]
-    else:
-        assert "gen_ai.request.messages" not in span["data"]
-        assert "gen_ai.response.text" not in span["data"]
-
-    if details_arg:
-        assert span["data"]["gen_ai.usage.total_tokens"] == 10
-
-
-def test_bad_chat_completion(sentry_init, capture_events):
-    sentry_init(integrations=[HuggingfaceHubIntegration()], traces_sample_rate=1.0)
-    events = capture_events()
-
-    client = InferenceClient(model="https://")
-    post_mock = mock.Mock(side_effect=OverloadedError("The server is overloaded"))
-    mock_client_post(client, post_mock)
-
-    with pytest.raises(OverloadedError):
-        client.text_generation(prompt="hello")
-
-    (event,) = events
-    assert event["level"] == "error"
-
-
-def test_span_origin(sentry_init, capture_events):
-    sentry_init(
-        integrations=[HuggingfaceHubIntegration()],
-        traces_sample_rate=1.0,
-    )
-    events = capture_events()
-
-    client = InferenceClient(model="https://")
-    post_mock = mock.Mock(
-        return_value=[
-            b"""data:{
-                "token":{"id":1, "special": false, "text": "the model "}
-            }""",
-        ]
-    )
-    mock_client_post(client, post_mock)
-
-    with start_transaction(name="huggingface_hub tx"):
-        list(
-            client.text_generation(
-                prompt="hello",
-                stream=True,
-            )
-        )
-
-    (event,) = events
-
-    assert event["contexts"]["trace"]["origin"] == "manual"
-    assert event["spans"][0]["origin"] == "auto.ai.huggingface_hub"

From 78a31921a681cb564dace06b884751f85d03a5cc Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 13:58:02 +0200
Subject: [PATCH 37/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 55 +++++++++++++++++-----
 1 file changed, 44 insertions(+), 11 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 413c3c05b4..9f37b6f56e 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -1,7 +1,5 @@
 from functools import wraps
 
-from typing import Any, Iterable, Callable
-
 import sentry_sdk
 from sentry_sdk.ai.monitoring import record_token_usage
 from sentry_sdk.ai.utils import set_data_normalized
@@ -13,6 +11,12 @@
     event_from_exception,
 )
 
+from typing import Iterable, TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from typing import Any, Callable
+
+
 try:
     import huggingface_hub.inference._client
 
@@ -100,6 +104,11 @@ def new_huggingface_task(*args, **kwargs):
             span.set_data(SPANDATA.GEN_AI_REQUEST_MODEL, model)
 
         # Input attributes
+        if should_send_default_pii() and integration.include_prompts:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False
+            )
+
         attribute_mapping = {
             "tools": SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS,
             "frequency_penalty": SPANDATA.GEN_AI_REQUEST_FREQUENCY_PENALTY,
@@ -110,6 +119,7 @@ def new_huggingface_task(*args, **kwargs):
             "top_k": SPANDATA.GEN_AI_REQUEST_TOP_K,
             "stream": SPANDATA.GEN_AI_RESPONSE_STREAMING,
         }
+
         for attribute, span_attribute in attribute_mapping.items():
             value = kwargs.get(attribute, None)
             if value is not None:
@@ -118,21 +128,23 @@ def new_huggingface_task(*args, **kwargs):
                 else:
                     set_data_normalized(span, span_attribute, value, unpack=False)
 
+        # LLM Execution
         try:
             res = f(*args, **kwargs)
         except Exception as e:
+            # Error Handling
             span.set_status("error")
             _capture_exception(e)
             span.__exit__(None, None, None)
             raise e from None
 
+        # Output attributes
         with capture_internal_exceptions():
-            # Output attributes
-            if hasattr(res, "model"):
-                model = res.model
-                if model:
-                    span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, model)
+            # Response Model
+            if hasattr(res, "model") and res.model is not None:
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
 
+            # Finish Reason
             finish_reason = None
             if hasattr(res, "details") and res.details is not None:
                 finish_reason = getattr(res.details, "finish_reason", None)
@@ -146,11 +158,9 @@ def new_huggingface_task(*args, **kwargs):
             if finish_reason:
                 span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason)
 
+            # Request Messages
             if should_send_default_pii() and integration.include_prompts:
-                set_data_normalized(
-                    span, SPANDATA.GEN_AI_REQUEST_MESSAGES, prompt, unpack=False
-                )
-
+                # Response Tool Calls
                 try:
                     tool_calls = res.choices[0].message.tool_calls
                 except Exception:
@@ -164,6 +174,7 @@ def new_huggingface_task(*args, **kwargs):
                         unpack=False,
                     )
 
+            # Response Text
             if isinstance(res, str):
                 if should_send_default_pii() and integration.include_prompts:
                     if res:
@@ -172,10 +183,12 @@ def new_huggingface_task(*args, **kwargs):
                             SPANDATA.GEN_AI_RESPONSE_TEXT,
                             res,
                         )
+
                 span.__exit__(None, None, None)
                 return res
 
             if isinstance(res, TextGenerationOutput):
+                # Response Text
                 if should_send_default_pii() and integration.include_prompts:
                     if res.generated_text:
                         set_data_normalized(
@@ -183,15 +196,18 @@ def new_huggingface_task(*args, **kwargs):
                             SPANDATA.GEN_AI_RESPONSE_TEXT,
                             res.generated_text,
                         )
+                # Usage
                 if res.details is not None and res.details.generated_tokens > 0:
                     record_token_usage(
                         span,
                         total_tokens=res.details.generated_tokens,
                     )
+
                 span.__exit__(None, None, None)
                 return res
 
             if isinstance(res, ChatCompletionOutput):
+                # Response Text
                 if should_send_default_pii() and integration.include_prompts:
                     text_response = "".join(
                         [
@@ -205,6 +221,7 @@ def new_huggingface_task(*args, **kwargs):
                             SPANDATA.GEN_AI_RESPONSE_TEXT,
                             text_response,
                         )
+                # Usage
                 if hasattr(res, "usage") and res.usage is not None:
                     record_token_usage(
                         span,
@@ -212,6 +229,7 @@ def new_huggingface_task(*args, **kwargs):
                         output_tokens=res.usage.completion_tokens,
                         total_tokens=res.usage.total_tokens,
                     )
+
                 span.__exit__(None, None, None)
                 return res
 
@@ -226,13 +244,18 @@ def new_details_iterator():
                     with capture_internal_exceptions():
                         tokens_used = 0
                         data_buf: list[str] = []
+
                         for chunk in res:
                             if hasattr(chunk, "token") and hasattr(chunk.token, "text"):
                                 data_buf.append(chunk.token.text)
+
+                            # Usage
                             if hasattr(chunk, "details") and hasattr(
                                 chunk.details, "generated_tokens"
                             ):
                                 tokens_used = chunk.details.generated_tokens
+
+                            # Finish Reason
                             if hasattr(chunk, "details") and hasattr(
                                 chunk.details, "finish_reason"
                             ):
@@ -243,6 +266,7 @@ def new_details_iterator():
 
                             yield chunk
 
+                        # Response Text
                         if (
                             len(data_buf) > 0
                             and should_send_default_pii()
@@ -255,6 +279,7 @@ def new_details_iterator():
                                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                                     text_response,
                                 )
+                        # Usage
                         if tokens_used > 0:
                             record_token_usage(
                                 span,
@@ -264,12 +289,14 @@ def new_details_iterator():
                     span.__exit__(None, None, None)
 
                 return new_details_iterator()
+
             else:
                 # chat-completion stream output
                 def new_iterator():
                     # type: () -> Iterable[str]
                     with capture_internal_exceptions():
                         data_buf: list[str] = []
+
                         for chunk in res:
                             if isinstance(chunk, ChatCompletionStreamOutput):
                                 for choice in chunk.choices:
@@ -280,6 +307,7 @@ def new_iterator():
                                     ):
                                         data_buf.append(choice.delta.content)
 
+                                    # Finish Reason
                                     if (
                                         hasattr(choice, "finish_reason")
                                         and choice.finish_reason is not None
@@ -288,6 +316,8 @@ def new_iterator():
                                             SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
                                             choice.finish_reason,
                                         )
+
+                                    # Response Tool Calls
                                     if (
                                         hasattr(choice, "delta")
                                         and hasattr(choice.delta, "tool_calls")
@@ -304,11 +334,13 @@ def new_iterator():
                                                 unpack=False,
                                             )
 
+                                # Response Model
                                 if hasattr(chunk, "model") and chunk.model is not None:
                                     span.set_data(
                                         SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model
                                     )
 
+                                # Usage
                                 if hasattr(chunk, "usage") and chunk.usage is not None:
                                     record_token_usage(
                                         span,
@@ -323,6 +355,7 @@ def new_iterator():
 
                             yield chunk
 
+                        # Response Text
                         if (
                             len(data_buf) > 0
                             and should_send_default_pii()

From a35a84d318b87db3e17a16d09b6164c34a49db5a Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 14:22:41 +0200
Subject: [PATCH 38/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 160 +++++++++++----------
 1 file changed, 86 insertions(+), 74 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 9f37b6f56e..536114e0fc 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -23,7 +23,6 @@
     from huggingface_hub import (
         ChatCompletionOutput,
         TextGenerationOutput,
-        ChatCompletionStreamOutput,
     )
 except ImportError:
     raise DidNotEnable("Huggingface not installed")
@@ -242,49 +241,55 @@ def new_huggingface_task(*args, **kwargs):
                 def new_details_iterator():
                     # type: () -> Iterable[Any]
                     with capture_internal_exceptions():
+                        finish_reason = None
                         tokens_used = 0
-                        data_buf: list[str] = []
+                        response_text_buffer: list[str] = []
 
                         for chunk in res:
-                            if hasattr(chunk, "token") and hasattr(chunk.token, "text"):
-                                data_buf.append(chunk.token.text)
-
-                            # Usage
-                            if hasattr(chunk, "details") and hasattr(
-                                chunk.details, "generated_tokens"
+                            if (
+                                hasattr(chunk, "token")
+                                and hasattr(chunk.token, "text")
+                                and chunk.token.text is not None
                             ):
-                                tokens_used = chunk.details.generated_tokens
+                                response_text_buffer.append(chunk.token.text)
 
-                            # Finish Reason
-                            if hasattr(chunk, "details") and hasattr(
-                                chunk.details, "finish_reason"
-                            ):
-                                span.set_data(
-                                    SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
-                                    chunk.details.finish_reason,
+                            details = getattr(chunk, "details", None)
+                            if details is not None:
+                                finish_reason = getattr(details, "finish_reason", None)
+
+                                generated_tokens = getattr(
+                                    details, "generated_tokens", None
                                 )
+                                if generated_tokens is not None:
+                                    tokens_used = generated_tokens
 
                             yield chunk
 
-                        # Response Text
+                        if finish_reason:
+                            set_data_normalized(
+                                span,
+                                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                                finish_reason,
+                            )
+
+                        if tokens_used > 0:
+                            record_token_usage(
+                                span,
+                                total_tokens=tokens_used,
+                            )
+
                         if (
-                            len(data_buf) > 0
-                            and should_send_default_pii()
+                            should_send_default_pii()
                             and integration.include_prompts
+                            and len(response_text_buffer) > 0
                         ):
-                            text_response = "".join(data_buf)
+                            text_response = "".join(response_text_buffer)
                             if text_response:
                                 set_data_normalized(
                                     span,
                                     SPANDATA.GEN_AI_RESPONSE_TEXT,
                                     text_response,
                                 )
-                        # Usage
-                        if tokens_used > 0:
-                            record_token_usage(
-                                span,
-                                total_tokens=tokens_used,
-                            )
 
                     span.__exit__(None, None, None)
 
@@ -294,81 +299,88 @@ def new_details_iterator():
                 # chat-completion stream output
                 def new_iterator():
                     # type: () -> Iterable[str]
-                    with capture_internal_exceptions():
-                        data_buf: list[str] = []
+                    finish_reason = None
+                    response_model = None
+                    response_text_buffer: list[str] = []
+                    tool_calls = None
+                    usage = None
 
+                    with capture_internal_exceptions():
                         for chunk in res:
-                            if isinstance(chunk, ChatCompletionStreamOutput):
+                            if hasattr(chunk, "model") and chunk.model is not None:
+                                response_model = chunk.model
+
+                            if hasattr(chunk, "usage") and chunk.usage is not None:
+                                usage = chunk.usage
+
+                            if isinstance(chunk, str):
+                                if chunk is not None:
+                                    response_text_buffer.append(chunk)
+
+                            if hasattr(chunk, "choices") and chunk.choices is not None:
                                 for choice in chunk.choices:
                                     if (
                                         hasattr(choice, "delta")
                                         and hasattr(choice.delta, "content")
                                         and choice.delta.content is not None
                                     ):
-                                        data_buf.append(choice.delta.content)
+                                        response_text_buffer.append(
+                                            choice.delta.content
+                                        )
 
-                                    # Finish Reason
                                     if (
                                         hasattr(choice, "finish_reason")
                                         and choice.finish_reason is not None
                                     ):
-                                        span.set_data(
-                                            SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
-                                            choice.finish_reason,
-                                        )
+                                        finish_reason = choice.finish_reason
 
-                                    # Response Tool Calls
                                     if (
                                         hasattr(choice, "delta")
                                         and hasattr(choice.delta, "tool_calls")
                                         and choice.delta.tool_calls is not None
                                     ):
-                                        if (
-                                            should_send_default_pii()
-                                            and integration.include_prompts
-                                        ):
-                                            set_data_normalized(
-                                                span,
-                                                SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
-                                                choice.delta.tool_calls,
-                                                unpack=False,
-                                            )
-
-                                # Response Model
-                                if hasattr(chunk, "model") and chunk.model is not None:
-                                    span.set_data(
-                                        SPANDATA.GEN_AI_RESPONSE_MODEL, chunk.model
-                                    )
+                                        tool_calls = choice.delta.tool_calls
 
-                                # Usage
-                                if hasattr(chunk, "usage") and chunk.usage is not None:
-                                    record_token_usage(
-                                        span,
-                                        input_tokens=chunk.usage.prompt_tokens,
-                                        output_tokens=chunk.usage.completion_tokens,
-                                        total_tokens=chunk.usage.total_tokens,
-                                    )
+                            yield chunk
 
-                            elif isinstance(chunk, str):
-                                if chunk is not None:
-                                    data_buf.append(chunk)
+                        if response_model is not None:
+                            span.set_data(
+                                SPANDATA.GEN_AI_RESPONSE_MODEL, response_model
+                            )
 
-                            yield chunk
+                        if finish_reason is not None:
+                            set_data_normalized(
+                                span,
+                                SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                                choice.finish_reason,
+                            )
 
-                        # Response Text
-                        if (
-                            len(data_buf) > 0
-                            and should_send_default_pii()
-                            and integration.include_prompts
-                        ):
-                            text_response = "".join(data_buf)
-                            if text_response:
+                        if should_send_default_pii() and integration.include_prompts:
+                            if tool_calls is not None:
                                 set_data_normalized(
                                     span,
-                                    SPANDATA.GEN_AI_RESPONSE_TEXT,
-                                    text_response,
+                                    SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,
+                                    tool_calls,
+                                    unpack=False,
                                 )
 
+                            if len(response_text_buffer) > 0:
+                                text_response = "".join(response_text_buffer)
+                                if text_response:
+                                    set_data_normalized(
+                                        span,
+                                        SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                        text_response,
+                                    )
+
+                        if usage is not None:
+                            record_token_usage(
+                                span,
+                                input_tokens=usage.prompt_tokens,
+                                output_tokens=usage.completion_tokens,
+                                total_tokens=usage.total_tokens,
+                            )
+
                         span.__exit__(None, None, None)
 
                 return new_iterator()

From 6bd766e56d1c52a2aa30387e5c3ead6ff8aa7782 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 14:29:05 +0200
Subject: [PATCH 39/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 51 +++++++++++-----------
 1 file changed, 25 insertions(+), 26 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 536114e0fc..f46a9ed49b 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -240,11 +240,11 @@ def new_huggingface_task(*args, **kwargs):
                 # text-generation stream output
                 def new_details_iterator():
                     # type: () -> Iterable[Any]
-                    with capture_internal_exceptions():
-                        finish_reason = None
-                        tokens_used = 0
-                        response_text_buffer: list[str] = []
+                    finish_reason = None
+                    response_text_buffer: list[str] = []
+                    tokens_used = 0
 
+                    with capture_internal_exceptions():
                         for chunk in res:
                             if (
                                 hasattr(chunk, "token")
@@ -253,44 +253,43 @@ def new_details_iterator():
                             ):
                                 response_text_buffer.append(chunk.token.text)
 
-                            details = getattr(chunk, "details", None)
-                            if details is not None:
-                                finish_reason = getattr(details, "finish_reason", None)
+                            if hasattr(chunk, "details") and hasattr(
+                                chunk.details, "finish_reason"
+                            ):
+                                finish_reason = chunk.details.finish_reason
 
-                                generated_tokens = getattr(
-                                    details, "generated_tokens", None
-                                )
-                                if generated_tokens is not None:
-                                    tokens_used = generated_tokens
+                            if (
+                                hasattr(chunk, "details")
+                                and hasattr(chunk.details, "generated_tokens")
+                                and chunk.details.generated_tokens is not None
+                            ):
+                                tokens_used = chunk.details.generated_tokens
 
                             yield chunk
 
-                        if finish_reason:
+                        if finish_reason is not None:
                             set_data_normalized(
                                 span,
                                 SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
                                 finish_reason,
                             )
 
+                        if should_send_default_pii() and integration.include_prompts:
+                            if len(response_text_buffer) > 0:
+                                text_response = "".join(response_text_buffer)
+                                if text_response:
+                                    set_data_normalized(
+                                        span,
+                                        SPANDATA.GEN_AI_RESPONSE_TEXT,
+                                        text_response,
+                                    )
+
                         if tokens_used > 0:
                             record_token_usage(
                                 span,
                                 total_tokens=tokens_used,
                             )
 
-                        if (
-                            should_send_default_pii()
-                            and integration.include_prompts
-                            and len(response_text_buffer) > 0
-                        ):
-                            text_response = "".join(response_text_buffer)
-                            if text_response:
-                                set_data_normalized(
-                                    span,
-                                    SPANDATA.GEN_AI_RESPONSE_TEXT,
-                                    text_response,
-                                )
-
                     span.__exit__(None, None, None)
 
                 return new_details_iterator()

From e294e83ea6f3bb480093d5f503b0b800bf5c660c Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 15:08:07 +0200
Subject: [PATCH 40/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 159 ++++++++++-----------
 1 file changed, 74 insertions(+), 85 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index f46a9ed49b..d94b2b2214 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -1,3 +1,4 @@
+import inspect
 from functools import wraps
 
 import sentry_sdk
@@ -11,19 +12,13 @@
     event_from_exception,
 )
 
-from typing import Iterable, TYPE_CHECKING
+from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any, Callable
-
+    from typing import Any, Callable, Iterable
 
 try:
     import huggingface_hub.inference._client
-
-    from huggingface_hub import (
-        ChatCompletionOutput,
-        TextGenerationOutput,
-    )
 except ImportError:
     raise DidNotEnable("Huggingface not installed")
 
@@ -138,33 +133,62 @@ def new_huggingface_task(*args, **kwargs):
             raise e from None
 
         # Output attributes
-        with capture_internal_exceptions():
-            # Response Model
-            if hasattr(res, "model") and res.model is not None:
-                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, res.model)
+        finish_reason = None
+        response_model = None
+        response_text_buffer: list[str] = []
+        tokens_used = 0
+        tool_calls = None
+        usage = None
 
-            # Finish Reason
-            finish_reason = None
-            if hasattr(res, "details") and res.details is not None:
-                finish_reason = getattr(res.details, "finish_reason", None)
+        with capture_internal_exceptions():
+            if isinstance(res, str) and res is not None:
+                response_text_buffer.append(res)
 
-            if finish_reason is None:
-                try:
-                    finish_reason = res.choices[0].finish_reason
-                except Exception:
-                    pass
+            if hasattr(res, "generated_text") and res.generated_text is not None:
+                response_text_buffer.append(res.generated_text)
 
-            if finish_reason:
-                span.set_data(SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS, finish_reason)
+            if hasattr(res, "model") and res.model is not None:
+                response_model = res.model
+
+            if hasattr(res, "details") and hasattr(res.details, "finish_reason"):
+                finish_reason = res.details.finish_reason
+
+            if (
+                hasattr(res, "details")
+                and hasattr(res.details, "generated_tokens")
+                and res.details.generated_tokens is not None
+            ):
+                tokens_used = res.details.generated_tokens
+
+            if hasattr(res, "usage") and res.usage is not None:
+                usage = res.usage
+
+            if hasattr(res, "choices") and res.choices is not None:
+                for choice in res.choices:
+                    if hasattr(choice, "finish_reason"):
+                        finish_reason = choice.finish_reason
+                    if hasattr(choice, "message") and hasattr(
+                        choice.message, "tool_calls"
+                    ):
+                        tool_calls = choice.message.tool_calls
+                    if (
+                        hasattr(choice, "message")
+                        and hasattr(choice.message, "content")
+                        and choice.message.content is not None
+                    ):
+                        response_text_buffer.append(choice.message.content)
+
+            if response_model is not None:
+                span.set_data(SPANDATA.GEN_AI_RESPONSE_MODEL, response_model)
+
+            if finish_reason is not None:
+                set_data_normalized(
+                    span,
+                    SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
+                    finish_reason,
+                )
 
-            # Request Messages
             if should_send_default_pii() and integration.include_prompts:
-                # Response Tool Calls
-                try:
-                    tool_calls = res.choices[0].message.tool_calls
-                except Exception:
-                    tool_calls = []
-
                 if tool_calls is not None and len(tool_calls) > 0:
                     set_data_normalized(
                         span,
@@ -173,66 +197,31 @@ def new_huggingface_task(*args, **kwargs):
                         unpack=False,
                     )
 
-            # Response Text
-            if isinstance(res, str):
-                if should_send_default_pii() and integration.include_prompts:
-                    if res:
-                        set_data_normalized(
-                            span,
-                            SPANDATA.GEN_AI_RESPONSE_TEXT,
-                            res,
-                        )
-
-                span.__exit__(None, None, None)
-                return res
-
-            if isinstance(res, TextGenerationOutput):
-                # Response Text
-                if should_send_default_pii() and integration.include_prompts:
-                    if res.generated_text:
-                        set_data_normalized(
-                            span,
-                            SPANDATA.GEN_AI_RESPONSE_TEXT,
-                            res.generated_text,
-                        )
-                # Usage
-                if res.details is not None and res.details.generated_tokens > 0:
-                    record_token_usage(
-                        span,
-                        total_tokens=res.details.generated_tokens,
-                    )
-
-                span.__exit__(None, None, None)
-                return res
-
-            if isinstance(res, ChatCompletionOutput):
-                # Response Text
-                if should_send_default_pii() and integration.include_prompts:
-                    text_response = "".join(
-                        [
-                            x.get("message", {}).get("content", None) or ""
-                            for x in res.choices
-                        ]
-                    )
+                if len(response_text_buffer) > 0:
+                    text_response = "".join(response_text_buffer)
                     if text_response:
                         set_data_normalized(
                             span,
                             SPANDATA.GEN_AI_RESPONSE_TEXT,
                             text_response,
                         )
-                # Usage
-                if hasattr(res, "usage") and res.usage is not None:
-                    record_token_usage(
-                        span,
-                        input_tokens=res.usage.prompt_tokens,
-                        output_tokens=res.usage.completion_tokens,
-                        total_tokens=res.usage.total_tokens,
-                    )
 
-                span.__exit__(None, None, None)
-                return res
-
-            if not isinstance(res, Iterable):
+            if usage is not None:
+                record_token_usage(
+                    span,
+                    input_tokens=usage.prompt_tokens,
+                    output_tokens=usage.completion_tokens,
+                    total_tokens=usage.total_tokens,
+                )
+            elif tokens_used > 0:
+                record_token_usage(
+                    span,
+                    total_tokens=tokens_used,
+                )
+
+            # If the response is not a generator (meaning a streaming response)
+            # we are done and can return the response
+            if not inspect.isgenerator(res):
                 span.__exit__(None, None, None)
                 return res
 
@@ -351,11 +340,11 @@ def new_iterator():
                             set_data_normalized(
                                 span,
                                 SPANDATA.GEN_AI_RESPONSE_FINISH_REASONS,
-                                choice.finish_reason,
+                                finish_reason,
                             )
 
                         if should_send_default_pii() and integration.include_prompts:
-                            if tool_calls is not None:
+                            if tool_calls is not None and len(tool_calls) > 0:
                                 set_data_normalized(
                                     span,
                                     SPANDATA.GEN_AI_RESPONSE_TOOL_CALLS,

From 96b8f89377d80c6df4e277e3c01acd5de11c48ba Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 15:34:44 +0200
Subject: [PATCH 41/43] cleanup

---
 .../huggingface_hub/test_huggingface_hub.py   | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index c2fff8f35b..6e1296987f 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -569,14 +569,13 @@ def test_chat_completion_streaming(
     client = InferenceClient(model="test-model")
 
     with sentry_sdk.start_transaction(name="test"):
-        response = client.chat_completion(
-            messages=[{"role": "user", "content": "Hello!"}],
-            stream=True,
+        _ = list(
+            client.chat_completion(
+                messages=[{"role": "user", "content": "Hello!"}],
+                stream=True,
+            )
         )
 
-        for x in response:
-            print(x)
-
     (transaction,) = events
     (span,) = transaction["spans"]
 
@@ -767,16 +766,15 @@ def test_chat_completion_streaming_with_tools(
     ]
 
     with sentry_sdk.start_transaction(name="test"):
-        response = client.chat_completion(
-            messages=[{"role": "user", "content": "What is the weather in Paris?"}],
-            stream=True,
-            tools=tools,
-            tool_choice="auto",
+        _ = list(
+            client.chat_completion(
+                messages=[{"role": "user", "content": "What is the weather in Paris?"}],
+                stream=True,
+                tools=tools,
+                tool_choice="auto",
+            )
         )
 
-        for x in response:
-            print(x)
-
     (transaction,) = events
     (span,) = transaction["spans"]
 

From f5574cf6ad4c0454bf7c3c5be1074183acb4b3d5 Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 15:39:40 +0200
Subject: [PATCH 42/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index d94b2b2214..3b152cb16a 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -74,9 +74,8 @@ def new_huggingface_task(*args, **kwargs):
         elif "messages" in kwargs:
             prompt = kwargs["messages"]
         elif len(args) >= 2:
-            kwargs["prompt"] = args[1]
-            prompt = kwargs["prompt"]
-            args = (args[0],) + args[2:]
+            if isinstance(args[1], str):
+                prompt = args[1]
         else:
             # invalid call, dont instrument, let it return error
             return f(*args, **kwargs)

From bd5b15f1df82d557915df3018e153113edc9a8ad Mon Sep 17 00:00:00 2001
From: Anton Pirker <anton.pirker@sentry.io>
Date: Tue, 9 Sep 2025 15:47:28 +0200
Subject: [PATCH 43/43] cleanup

---
 sentry_sdk/integrations/huggingface_hub.py                 | 6 ++++--
 tests/integrations/huggingface_hub/test_huggingface_hub.py | 2 +-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/sentry_sdk/integrations/huggingface_hub.py b/sentry_sdk/integrations/huggingface_hub.py
index 3b152cb16a..cb76ccf507 100644
--- a/sentry_sdk/integrations/huggingface_hub.py
+++ b/sentry_sdk/integrations/huggingface_hub.py
@@ -69,14 +69,16 @@ def new_huggingface_task(*args, **kwargs):
         if integration is None:
             return f(*args, **kwargs)
 
+        prompt = None
         if "prompt" in kwargs:
             prompt = kwargs["prompt"]
         elif "messages" in kwargs:
             prompt = kwargs["messages"]
         elif len(args) >= 2:
-            if isinstance(args[1], str):
+            if isinstance(args[1], str) or isinstance(args[1], list):
                 prompt = args[1]
-        else:
+
+        if prompt is None:
             # invalid call, dont instrument, let it return error
             return f(*args, **kwargs)
 
diff --git a/tests/integrations/huggingface_hub/test_huggingface_hub.py b/tests/integrations/huggingface_hub/test_huggingface_hub.py
index 6e1296987f..86f9c10109 100644
--- a/tests/integrations/huggingface_hub/test_huggingface_hub.py
+++ b/tests/integrations/huggingface_hub/test_huggingface_hub.py
@@ -571,7 +571,7 @@ def test_chat_completion_streaming(
     with sentry_sdk.start_transaction(name="test"):
         _ = list(
             client.chat_completion(
-                messages=[{"role": "user", "content": "Hello!"}],
+                [{"role": "user", "content": "Hello!"}],
                 stream=True,
             )
         )