From 507bf17c3116853115b340edd2c936917b72bebe Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 10:27:43 +0200
Subject: [PATCH 01/12] fix(ai): introduce message truncation for openai

---
 sentry_sdk/ai/utils.py                   |  88 ++++++-
 sentry_sdk/client.py                     |  26 +++
 sentry_sdk/integrations/openai.py        |  43 ++--
 sentry_sdk/scope.py                      |   5 +
 tests/integrations/openai/test_openai.py |  69 +++++-
 tests/test_ai_monitoring.py              | 282 +++++++++++++++++++++++
 6 files changed, 475 insertions(+), 38 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 0c0b937006..525ef82495 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -1,14 +1,24 @@
 import json
-
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
     from typing import Any, Callable
+
     from sentry_sdk.tracing import Span
 
+from typing import TYPE_CHECKING
+
 import sentry_sdk
 from sentry_sdk.utils import logger
 
+if TYPE_CHECKING:
+    from typing import Any, Dict, List, Optional
+
+from sentry_sdk._types import AnnotatedValue
+from sentry_sdk.serializer import serialize
+
+MAX_GEN_AI_MESSAGE_BYTES = 20_000  # 20KB
+
 
 class GEN_AI_ALLOWED_MESSAGE_ROLES:
     SYSTEM = "system"
@@ -95,3 +105,79 @@ def get_start_span_function():
         current_span is not None and current_span.containing_transaction is not None
     )
     return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
+
+
+def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
+    # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
+    if not messages:
+        return messages
+
+    truncated_messages = list(messages)
+
+    while len(truncated_messages) > 1:
+        serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
+        current_size = len(serialized_json.encode("utf-8"))
+
+        if current_size <= max_bytes:
+            break
+
+        truncated_messages.pop(0)
+
+    serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
+    current_size = len(serialized_json.encode("utf-8"))
+
+    if current_size > max_bytes and len(truncated_messages) == 1:
+        message = truncated_messages[0].copy()
+        content = message.get("content", "")
+
+        if isinstance(content, str):
+            max_content_length = max_bytes // 2
+            while True:
+                message["content"] = content[:max_content_length]
+                test_json = json.dumps([message], separators=(",", ":"))
+                if len(test_json.encode("utf-8")) <= max_bytes:
+                    break
+                max_content_length = int(max_content_length * 0.9)
+                if max_content_length < 100:
+                    message["content"] = ""
+                    break
+
+            truncated_messages = [message]
+        elif isinstance(content, list):
+            content_copy = list(content)
+            while len(content_copy) > 0:
+                message["content"] = content_copy
+                test_json = json.dumps([message], separators=(",", ":"))
+                if len(test_json.encode("utf-8")) <= max_bytes:
+                    break
+                content_copy = content_copy[:-1]
+
+            if len(content_copy) == 0:
+                message["content"] = []
+
+            truncated_messages = [message]
+
+    return truncated_messages
+
+
+def truncate_and_annotate_messages(
+    messages, span, scope, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
+):
+    # type: (Optional[List[Dict[str, Any]]], Any, Any, int) -> Optional[List[Dict[str, Any]]]
+    if not messages:
+        return None
+
+    original_count = len(messages)
+    truncated_messages = truncate_messages_by_size(messages, max_bytes)
+
+    if not truncated_messages:
+        return None
+
+    truncated_count = len(truncated_messages)
+    n_removed = original_count - truncated_count
+
+    if n_removed > 0:
+        scope._gen_ai_messages_truncated[span.span_id] = n_removed
+        span.set_data("_gen_ai_messages_original_count", original_count)
+
+    return truncated_messages
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index d17f922642..396af2da15 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -606,6 +606,32 @@ def _prepare_event(
             event["breadcrumbs"] = AnnotatedValue(
                 event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs}
             )
+
+        # Annotate truncated gen_ai messages in spans
+        if scope is not None and scope._gen_ai_messages_truncated:
+            spans = event.get("spans", [])
+            if isinstance(spans, AnnotatedValue):
+                spans = spans.value
+
+            for span in spans:
+                if isinstance(span, dict):
+                    span_id = span.get("span_id")
+                    if span_id and span_id in scope._gen_ai_messages_truncated:
+                        span_data = span.get("data", {})
+                        original_count = span_data.pop(
+                            "_gen_ai_messages_original_count", None
+                        )
+                        if (
+                            original_count is not None
+                            and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
+                        ):
+                            span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = (
+                                AnnotatedValue(
+                                    span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
+                                    {"len": original_count},
+                                )
+                            )
+
         # Postprocess the event here so that annotated types do
         # generally not surface in before_send
         if event is not None:
diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 19d7717b3c..f2f463bcd7 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -1,10 +1,13 @@
 from functools import wraps
-from collections.abc import Iterable
 
 import sentry_sdk
 from sentry_sdk import consts
 from sentry_sdk.ai.monitoring import record_token_usage
-from sentry_sdk.ai.utils import set_data_normalized, normalize_message_roles
+from sentry_sdk.ai.utils import (
+    set_data_normalized,
+    normalize_message_roles,
+    truncate_and_annotate_messages,
+)
 from sentry_sdk.consts import SPANDATA
 from sentry_sdk.integrations import DidNotEnable, Integration
 from sentry_sdk.scope import should_send_default_pii
@@ -18,19 +21,14 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any, List, Optional, Callable, AsyncIterator, Iterator
+    from typing import Any, Iterable, List, Optional, Callable, AsyncIterator, Iterator
     from sentry_sdk.tracing import Span
 
 try:
     try:
-        from openai import NotGiven
+        from openai import NOT_GIVEN
     except ImportError:
-        NotGiven = None
-
-    try:
-        from openai import Omit
-    except ImportError:
-        Omit = None
+        NOT_GIVEN = None
 
     from openai.resources.chat.completions import Completions, AsyncCompletions
     from openai.resources import Embeddings, AsyncEmbeddings
@@ -189,9 +187,12 @@ def _set_input_data(span, kwargs, operation, integration):
         and integration.include_prompts
     ):
         normalized_messages = normalize_message_roles(messages)
-        set_data_normalized(
-            span, SPANDATA.GEN_AI_REQUEST_MESSAGES, normalized_messages, unpack=False
-        )
+        scope = sentry_sdk.get_current_scope()
+        messages_data = truncate_and_annotate_messages(normalized_messages, span, scope)
+        if messages_data is not None:
+            set_data_normalized(
+                span, SPANDATA.GEN_AI_REQUEST_MESSAGES, messages_data, unpack=False
+            )
 
     # Input attributes: Common
     set_data_normalized(span, SPANDATA.GEN_AI_SYSTEM, "openai")
@@ -210,12 +211,12 @@ def _set_input_data(span, kwargs, operation, integration):
     for key, attribute in kwargs_keys_to_attributes.items():
         value = kwargs.get(key)
 
-        if value is not None and _is_given(value):
+        if value is not NOT_GIVEN and value is not None:
             set_data_normalized(span, attribute, value)
 
     # Input attributes: Tools
     tools = kwargs.get("tools")
-    if tools is not None and _is_given(tools) and len(tools) > 0:
+    if tools is not NOT_GIVEN and tools is not None and len(tools) > 0:
         set_data_normalized(
             span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
         )
@@ -695,15 +696,3 @@ async def _sentry_patched_responses_async(*args, **kwargs):
         return await _execute_async(f, *args, **kwargs)
 
     return _sentry_patched_responses_async
-
-
-def _is_given(obj):
-    # type: (Any) -> bool
-    """
-    Check for givenness safely across different openai versions.
-    """
-    if NotGiven is not None and isinstance(obj, NotGiven):
-        return False
-    if Omit is not None and isinstance(obj, Omit):
-        return False
-    return True
diff --git a/sentry_sdk/scope.py b/sentry_sdk/scope.py
index f9caf7e1d6..5815a65440 100644
--- a/sentry_sdk/scope.py
+++ b/sentry_sdk/scope.py
@@ -188,6 +188,7 @@ class Scope:
         "_extras",
         "_breadcrumbs",
         "_n_breadcrumbs_truncated",
+        "_gen_ai_messages_truncated",
         "_event_processors",
         "_error_processors",
         "_should_capture",
@@ -213,6 +214,7 @@ def __init__(self, ty=None, client=None):
         self._name = None  # type: Optional[str]
         self._propagation_context = None  # type: Optional[PropagationContext]
         self._n_breadcrumbs_truncated = 0  # type: int
+        self._gen_ai_messages_truncated = {}  # type: Dict[str, int]
 
         self.client = NonRecordingClient()  # type: sentry_sdk.client.BaseClient
 
@@ -247,6 +249,7 @@ def __copy__(self):
 
         rv._breadcrumbs = copy(self._breadcrumbs)
         rv._n_breadcrumbs_truncated = self._n_breadcrumbs_truncated
+        rv._gen_ai_messages_truncated = self._gen_ai_messages_truncated.copy()
         rv._event_processors = self._event_processors.copy()
         rv._error_processors = self._error_processors.copy()
         rv._propagation_context = self._propagation_context
@@ -1583,6 +1586,8 @@ def update_from_scope(self, scope):
             self._n_breadcrumbs_truncated = (
                 self._n_breadcrumbs_truncated + scope._n_breadcrumbs_truncated
             )
+        if scope._gen_ai_messages_truncated:
+            self._gen_ai_messages_truncated.update(scope._gen_ai_messages_truncated)
         if scope._span:
             self._span = scope._span
         if scope._attachments:
diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 276a1b4886..b89635fa5e 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -1,3 +1,4 @@
+import json
 import pytest
 
 from sentry_sdk.utils import package_version
@@ -7,11 +8,6 @@
 except ImportError:
     NOT_GIVEN = None
 
-try:
-    from openai import omit
-except ImportError:
-    omit = None
-
 from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
@@ -44,6 +40,9 @@
     OpenAIIntegration,
     _calculate_token_usage,
 )
+from sentry_sdk.ai.utils import MAX_GEN_AI_MESSAGE_BYTES
+from sentry_sdk._types import AnnotatedValue
+from sentry_sdk.serializer import serialize
 
 from unittest import mock  # python 3.3 and above
 
@@ -1429,7 +1428,7 @@ async def test_streaming_responses_api_async(
 )
 @pytest.mark.parametrize(
     "tools",
-    [[], None, NOT_GIVEN, omit],
+    [[], None, NOT_GIVEN],
 )
 def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
     sentry_init(
@@ -1456,6 +1455,7 @@ def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
 
 def test_openai_message_role_mapping(sentry_init, capture_events):
     """Test that OpenAI integration properly maps message roles like 'ai' to 'assistant'"""
+
     sentry_init(
         integrations=[OpenAIIntegration(include_prompts=True)],
         traces_sample_rate=1.0,
@@ -1465,7 +1465,6 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
 
     client = OpenAI(api_key="z")
     client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
-
     # Test messages with mixed roles including "ai" that should be mapped to "assistant"
     test_messages = [
         {"role": "system", "content": "You are helpful."},
@@ -1476,11 +1475,9 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
 
     with start_transaction(name="openai tx"):
         client.chat.completions.create(model="test-model", messages=test_messages)
-
+    # Verify that the span was created correctly
     (event,) = events
     span = event["spans"][0]
-
-    # Verify that the span was created correctly
     assert span["op"] == "gen_ai.chat"
     assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
 
@@ -1505,3 +1502,55 @@ def test_openai_message_role_mapping(sentry_init, capture_events):
     # Verify no "ai" roles remain
     roles = [msg["role"] for msg in stored_messages]
     assert "ai" not in roles
+
+
+def test_openai_message_truncation(sentry_init, capture_events):
+    """Test that large messages are truncated properly in OpenAI integration."""
+    sentry_init(
+        integrations=[OpenAIIntegration(include_prompts=True)],
+        traces_sample_rate=1.0,
+        send_default_pii=True,
+    )
+    events = capture_events()
+
+    client = OpenAI(api_key="z")
+    client.chat.completions._post = mock.Mock(return_value=EXAMPLE_CHAT_COMPLETION)
+
+    large_content = (
+        "This is a very long message that will exceed our size limits. " * 1000
+    )
+    large_messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": large_content},
+        {"role": "assistant", "content": large_content},
+        {"role": "user", "content": large_content},
+    ]
+
+    with start_transaction(name="openai tx"):
+        client.chat.completions.create(
+            model="some-model",
+            messages=large_messages,
+        )
+
+    (event,) = events
+    span = event["spans"][0]
+    assert SPANDATA.GEN_AI_REQUEST_MESSAGES in span["data"]
+
+    messages_data = span["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+    assert isinstance(messages_data, str)
+
+    parsed_messages = json.loads(messages_data)
+    assert isinstance(parsed_messages, list)
+    assert len(parsed_messages) <= len(large_messages)
+
+    if "_meta" in event and len(parsed_messages) < len(large_messages):
+        meta_path = event["_meta"]
+        if (
+            "spans" in meta_path
+            and "0" in meta_path["spans"]
+            and "data" in meta_path["spans"]["0"]
+        ):
+            span_meta = meta_path["spans"]["0"]["data"]
+            if SPANDATA.GEN_AI_REQUEST_MESSAGES in span_meta:
+                messages_meta = span_meta[SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                assert "len" in messages_meta.get("", {})
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index ee757f82cd..fb32042f84 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -1,7 +1,18 @@
+import json
+
 import pytest
 
 import sentry_sdk
+from sentry_sdk._types import AnnotatedValue
 from sentry_sdk.ai.monitoring import ai_track
+from sentry_sdk.ai.utils import (
+    MAX_GEN_AI_MESSAGE_BYTES,
+    set_data_normalized,
+    truncate_and_annotate_messages,
+    truncate_messages_by_size,
+)
+from sentry_sdk.serializer import serialize
+from sentry_sdk.utils import safe_serialize
 
 
 def test_ai_track(sentry_init, capture_events):
@@ -160,3 +171,274 @@ async def async_tool(**kwargs):
 
     assert span["description"] == "my async tool"
     assert span["op"] == "custom.async.operation"
+
+
+@pytest.fixture
+def sample_messages():
+    """Sample messages similar to what gen_ai integrations would use"""
+    return [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {
+            "role": "user",
+            "content": "What is the difference between a list and a tuple in Python?",
+        },
+        {
+            "role": "assistant",
+            "content": "Lists are mutable and use [], tuples are immutable and use ().",
+        },
+        {"role": "user", "content": "Can you give me some examples?"},
+        {
+            "role": "assistant",
+            "content": "Sure! Here are examples:\n\n```python\n# List\nmy_list = [1, 2, 3]\nmy_list.append(4)\n\n# Tuple\nmy_tuple = (1, 2, 3)\n# my_tuple.append(4) would error\n```",
+        },
+    ]
+
+
+@pytest.fixture
+def large_messages():
+    """Messages that will definitely exceed size limits"""
+    large_content = "This is a very long message. " * 100
+    return [
+        {"role": "system", "content": large_content},
+        {"role": "user", "content": large_content},
+        {"role": "assistant", "content": large_content},
+        {"role": "user", "content": large_content},
+    ]
+
+
+class TestTruncateMessagesBySize:
+    def test_no_truncation_needed(self, sample_messages):
+        """Test that messages under the limit are not truncated"""
+        result = truncate_messages_by_size(
+            sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
+        )
+        assert len(result) == len(sample_messages)
+        assert result == sample_messages
+
+    def test_truncation_removes_oldest_first(self, large_messages):
+        """Test that oldest messages are removed first during truncation"""
+        small_limit = 3000
+        result = truncate_messages_by_size(large_messages, max_bytes=small_limit)
+        assert len(result) < len(large_messages)
+
+        if result:
+            assert result[-1] == large_messages[-1]
+
+    def test_empty_messages_list(self):
+        """Test handling of empty messages list"""
+        result = truncate_messages_by_size(
+            [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500
+        )
+        assert result == []
+
+    def test_progressive_truncation(self, large_messages):
+        """Test that truncation works progressively with different limits"""
+        limits = [
+            MAX_GEN_AI_MESSAGE_BYTES // 5,
+            MAX_GEN_AI_MESSAGE_BYTES // 10,
+            MAX_GEN_AI_MESSAGE_BYTES // 25,
+            MAX_GEN_AI_MESSAGE_BYTES // 100,
+            MAX_GEN_AI_MESSAGE_BYTES // 500,
+        ]
+        prev_count = len(large_messages)
+
+        for limit in limits:
+            result = truncate_messages_by_size(large_messages, max_bytes=limit)
+            current_count = len(result)
+
+            assert current_count <= prev_count
+            assert current_count >= 1
+            prev_count = current_count
+
+    def test_exact_size_boundary(self):
+        """Test behavior at exact size boundaries"""
+        messages = [{"role": "user", "content": "test"}]
+
+        serialized = serialize(messages, is_vars=False)
+        json_str = json.dumps(serialized, separators=(",", ":"))
+        exact_size = len(json_str.encode("utf-8"))
+
+        result = truncate_messages_by_size(messages, max_bytes=exact_size)
+        assert len(result) == 1
+
+        result = truncate_messages_by_size(messages, max_bytes=exact_size - 1)
+        assert len(result) == 1
+
+
+class TestTruncateAndAnnotateMessages:
+    def test_no_truncation_returns_list(self, sample_messages):
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_id"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        span = MockSpan()
+        scope = MockScope()
+        result = truncate_and_annotate_messages(sample_messages, span, scope)
+
+        assert isinstance(result, list)
+        assert not isinstance(result, AnnotatedValue)
+        assert len(result) == len(sample_messages)
+        assert result == sample_messages
+        assert span.span_id not in scope._gen_ai_messages_truncated
+        assert "_gen_ai_messages_original_count" not in span.data
+
+    def test_truncation_sets_metadata_on_scope(self, large_messages):
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_id"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        small_limit = 1000
+        span = MockSpan()
+        scope = MockScope()
+        original_count = len(large_messages)
+        result = truncate_and_annotate_messages(
+            large_messages, span, scope, max_bytes=small_limit
+        )
+
+        assert isinstance(result, list)
+        assert not isinstance(result, AnnotatedValue)
+        assert len(result) < len(large_messages)
+        n_removed = original_count - len(result)
+        assert scope._gen_ai_messages_truncated[span.span_id] == n_removed
+        assert span.data["_gen_ai_messages_original_count"] == original_count
+
+    def test_metadata_contains_original_count(self, large_messages):
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_id"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        small_limit = 1000
+        original_count = len(large_messages)
+        span = MockSpan()
+        scope = MockScope()
+
+        result = truncate_and_annotate_messages(
+            large_messages, span, scope, max_bytes=small_limit
+        )
+
+        assert span.data["_gen_ai_messages_original_count"] == original_count
+        n_removed = original_count - len(result)
+        assert scope._gen_ai_messages_truncated[span.span_id] == n_removed
+
+    def test_empty_messages_returns_none(self):
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_id"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        span = MockSpan()
+        scope = MockScope()
+        result = truncate_and_annotate_messages([], span, scope)
+        assert result is None
+
+        result = truncate_and_annotate_messages(None, span, scope)
+        assert result is None
+
+    def test_truncated_messages_newest_first(self, large_messages):
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_id"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        small_limit = 3000
+        span = MockSpan()
+        scope = MockScope()
+        result = truncate_and_annotate_messages(
+            large_messages, span, scope, max_bytes=small_limit
+        )
+
+        assert isinstance(result, list)
+        assert result[0] == large_messages[-len(result)]
+
+
+class TestClientAnnotation:
+    def test_client_wraps_truncated_messages_in_annotated_value(self, large_messages):
+        """Test that client.py properly wraps truncated messages in AnnotatedValue"""
+        from sentry_sdk._types import AnnotatedValue
+        from sentry_sdk.consts import SPANDATA
+
+        class MockSpan:
+            def __init__(self):
+                self.span_id = "test_span_123"
+                self.data = {}
+
+            def set_data(self, key, value):
+                self.data[key] = value
+
+        class MockScope:
+            def __init__(self):
+                self._gen_ai_messages_truncated = {}
+
+        small_limit = 3000
+        span = MockSpan()
+        scope = MockScope()
+        original_count = len(large_messages)
+
+        # Simulate what integrations do
+        truncated_messages = truncate_and_annotate_messages(
+            large_messages, span, scope, max_bytes=small_limit
+        )
+        span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages)
+
+        # Verify metadata was set on scope and span
+        assert span.span_id in scope._gen_ai_messages_truncated
+        assert scope._gen_ai_messages_truncated[span.span_id] > 0
+        assert "_gen_ai_messages_original_count" in span.data
+
+        # Simulate what client.py does
+        event = {"spans": [{"span_id": span.span_id, "data": span.data.copy()}]}
+
+        # Mimic client.py logic
+        for event_span in event["spans"]:
+            span_data = event_span.get("data", {})
+            orig_count = span_data.pop("_gen_ai_messages_original_count", None)
+            if orig_count is not None and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
+                span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
+                    safe_serialize(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]),
+                    {"len": orig_count},
+                )
+
+        # Verify the annotation happened
+        messages_value = event["spans"][0]["data"][SPANDATA.GEN_AI_REQUEST_MESSAGES]
+        assert isinstance(messages_value, AnnotatedValue)
+        assert messages_value.metadata["len"] == original_count
+        assert isinstance(messages_value.value, str)
+        assert "_gen_ai_messages_original_count" not in event["spans"][0]["data"]

From 7d80e0478d0cfaaa2a17a9f2b766275a2ef45e27 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 10:31:05 +0200
Subject: [PATCH 02/12] wip

---
 sentry_sdk/integrations/openai.py | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index f2f463bcd7..992b65c33b 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -26,9 +26,9 @@
 
 try:
     try:
-        from openai import NOT_GIVEN
+        from openai import NotGiven, Omit
     except ImportError:
-        NOT_GIVEN = None
+        NotGiven = None
 
     from openai.resources.chat.completions import Completions, AsyncCompletions
     from openai.resources import Embeddings, AsyncEmbeddings
@@ -211,12 +211,12 @@ def _set_input_data(span, kwargs, operation, integration):
     for key, attribute in kwargs_keys_to_attributes.items():
         value = kwargs.get(key)
 
-        if value is not NOT_GIVEN and value is not None:
+        if value is not None and _is_given(value):
             set_data_normalized(span, attribute, value)
 
     # Input attributes: Tools
     tools = kwargs.get("tools")
-    if tools is not NOT_GIVEN and tools is not None and len(tools) > 0:
+    if tools is not None and _is_given(tools) and len(tools) > 0:
         set_data_normalized(
             span, SPANDATA.GEN_AI_REQUEST_AVAILABLE_TOOLS, safe_serialize(tools)
         )
@@ -696,3 +696,15 @@ async def _sentry_patched_responses_async(*args, **kwargs):
         return await _execute_async(f, *args, **kwargs)
 
     return _sentry_patched_responses_async
+
+
+def _is_given(obj):
+    # type: (Any) -> bool
+    """
+    Check for givenness safely across different openai versions.
+    """
+    if NotGiven is not None and isinstance(obj, NotGiven):
+        return False
+    if Omit is not None and isinstance(obj, Omit):
+        return False
+    return True

From bb5973cf5f1854ad7da2a4b9e85e4af7798e7e1d Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 10:31:50 +0200
Subject: [PATCH 03/12] wip

---
 sentry_sdk/integrations/openai.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/sentry_sdk/integrations/openai.py b/sentry_sdk/integrations/openai.py
index 992b65c33b..9ff2489765 100644
--- a/sentry_sdk/integrations/openai.py
+++ b/sentry_sdk/integrations/openai.py
@@ -26,10 +26,15 @@
 
 try:
     try:
-        from openai import NotGiven, Omit
+        from openai import NotGiven
     except ImportError:
         NotGiven = None
 
+    try:
+        from openai import Omit
+    except ImportError:
+        Omit = None
+
     from openai.resources.chat.completions import Completions, AsyncCompletions
     from openai.resources import Embeddings, AsyncEmbeddings
 

From 52b7f6ec1429177c78c00da0a44ea5a3e6f369a1 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 10:32:59 +0200
Subject: [PATCH 04/12] wip

---
 tests/integrations/openai/test_openai.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index b89635fa5e..76125dc6f3 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -7,8 +7,12 @@
     from openai import NOT_GIVEN
 except ImportError:
     NOT_GIVEN = None
+try:
+    from openai import omit
+except ImportError:
+    omit = None
 
-from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
+from openai import AsyncOpenAI, Omit, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice
@@ -1428,7 +1432,7 @@ async def test_streaming_responses_api_async(
 )
 @pytest.mark.parametrize(
     "tools",
-    [[], None, NOT_GIVEN],
+    [[], None, NOT_GIVEN, omit],
 )
 def test_empty_tools_in_chat_completion(sentry_init, capture_events, tools):
     sentry_init(

From b4dcfc751838b394b2b4ed4f46e839503c0af49d Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 10:42:11 +0200
Subject: [PATCH 05/12] remove omit import

---
 tests/integrations/openai/test_openai.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integrations/openai/test_openai.py b/tests/integrations/openai/test_openai.py
index 76125dc6f3..ccef4f336e 100644
--- a/tests/integrations/openai/test_openai.py
+++ b/tests/integrations/openai/test_openai.py
@@ -12,7 +12,7 @@
 except ImportError:
     omit = None
 
-from openai import AsyncOpenAI, Omit, OpenAI, AsyncStream, Stream, OpenAIError
+from openai import AsyncOpenAI, OpenAI, AsyncStream, Stream, OpenAIError
 from openai.types import CompletionUsage, CreateEmbeddingResponse, Embedding
 from openai.types.chat import ChatCompletion, ChatCompletionMessage, ChatCompletionChunk
 from openai.types.chat.chat_completion import Choice

From 6d4c89b04a28908f8c62022a779c5ec46efbfdd2 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 11:01:35 +0200
Subject: [PATCH 06/12] fix type issues

---
 sentry_sdk/ai/utils.py |  3 ---
 sentry_sdk/client.py   | 31 ++++++++++++++-----------------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 525ef82495..0cb8af67ec 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -14,9 +14,6 @@
 if TYPE_CHECKING:
     from typing import Any, Dict, List, Optional
 
-from sentry_sdk._types import AnnotatedValue
-from sentry_sdk.serializer import serialize
-
 MAX_GEN_AI_MESSAGE_BYTES = 20_000  # 20KB
 
 
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 396af2da15..be79de4b3f 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -607,30 +607,27 @@ def _prepare_event(
                 event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs}
             )
 
-        # Annotate truncated gen_ai messages in spans
         if scope is not None and scope._gen_ai_messages_truncated:
-            spans = event.get("spans", [])
-            if isinstance(spans, AnnotatedValue):
-                spans = spans.value
-
+            spans = []  # type: List[Dict[str, Any]]
             for span in spans:
                 if isinstance(span, dict):
                     span_id = span.get("span_id")
                     if span_id and span_id in scope._gen_ai_messages_truncated:
                         span_data = span.get("data", {})
-                        original_count = span_data.pop(
-                            "_gen_ai_messages_original_count", None
-                        )
-                        if (
-                            original_count is not None
-                            and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
-                        ):
-                            span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = (
-                                AnnotatedValue(
-                                    span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
-                                    {"len": original_count},
-                                )
+                        if isinstance(span_data, dict):
+                            original_count = span_data.pop(
+                                "_gen_ai_messages_original_count", None
                             )
+                            if (
+                                original_count is not None
+                                and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
+                            ):
+                                span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = (
+                                    AnnotatedValue(
+                                        span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
+                                        {"len": original_count},
+                                    )
+                                )
 
         # Postprocess the event here so that annotated types do
         # generally not surface in before_send

From 84bfabb27720a268e1cfe9595e8170d22d1fe4a1 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 11:40:53 +0200
Subject: [PATCH 07/12] wip

---
 sentry_sdk/ai/utils.py      |  1 -
 sentry_sdk/client.py        | 39 ++++++++++++++++---------------------
 tests/test_ai_monitoring.py | 31 ++++++++++++++++-------------
 3 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 0cb8af67ec..1dd5cabe07 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -175,6 +175,5 @@ def truncate_and_annotate_messages(
 
     if n_removed > 0:
         scope._gen_ai_messages_truncated[span.span_id] = n_removed
-        span.set_data("_gen_ai_messages_original_count", original_count)
 
     return truncated_messages
diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index be79de4b3f..7764e93356 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -598,6 +598,23 @@ def _prepare_event(
             if event_scrubber:
                 event_scrubber.scrub_event(event)
 
+        if scope is not None and scope._gen_ai_messages_truncated:
+            spans = event.get("spans", [])  # type: List[Dict[str, Any]] | AnnotatedValue[List[Dict[str, Any]]]
+            for span in spans:
+                span_id = span.get("span_id", None)
+                span_data = span.get("data", {})
+                if (
+                    span_id
+                    and span_id in scope._gen_ai_messages_truncated
+                    and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
+                ):
+                    span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
+                        span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
+                        {
+                            "len": scope._gen_ai_messages_truncated[span_id]
+                            + len(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES])
+                        },
+                    )
         if previous_total_spans is not None:
             event["spans"] = AnnotatedValue(
                 event.get("spans", []), {"len": previous_total_spans}
@@ -607,28 +624,6 @@ def _prepare_event(
                 event.get("breadcrumbs", []), {"len": previous_total_breadcrumbs}
             )
 
-        if scope is not None and scope._gen_ai_messages_truncated:
-            spans = []  # type: List[Dict[str, Any]]
-            for span in spans:
-                if isinstance(span, dict):
-                    span_id = span.get("span_id")
-                    if span_id and span_id in scope._gen_ai_messages_truncated:
-                        span_data = span.get("data", {})
-                        if isinstance(span_data, dict):
-                            original_count = span_data.pop(
-                                "_gen_ai_messages_original_count", None
-                            )
-                            if (
-                                original_count is not None
-                                and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
-                            ):
-                                span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = (
-                                    AnnotatedValue(
-                                        span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
-                                        {"len": original_count},
-                                    )
-                                )
-
         # Postprocess the event here so that annotated types do
         # generally not surface in before_send
         if event is not None:
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index fb32042f84..2fa677e19d 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -288,7 +288,6 @@ def __init__(self):
         assert len(result) == len(sample_messages)
         assert result == sample_messages
         assert span.span_id not in scope._gen_ai_messages_truncated
-        assert "_gen_ai_messages_original_count" not in span.data
 
     def test_truncation_sets_metadata_on_scope(self, large_messages):
         class MockSpan:
@@ -316,9 +315,8 @@ def __init__(self):
         assert len(result) < len(large_messages)
         n_removed = original_count - len(result)
         assert scope._gen_ai_messages_truncated[span.span_id] == n_removed
-        assert span.data["_gen_ai_messages_original_count"] == original_count
 
-    def test_metadata_contains_original_count(self, large_messages):
+    def test_scope_tracks_removed_messages(self, large_messages):
         class MockSpan:
             def __init__(self):
                 self.span_id = "test_span_id"
@@ -340,9 +338,9 @@ def __init__(self):
             large_messages, span, scope, max_bytes=small_limit
         )
 
-        assert span.data["_gen_ai_messages_original_count"] == original_count
         n_removed = original_count - len(result)
         assert scope._gen_ai_messages_truncated[span.span_id] == n_removed
+        assert len(result) + n_removed == original_count
 
     def test_empty_messages_returns_none(self):
         class MockSpan:
@@ -391,7 +389,7 @@ def __init__(self):
 
 class TestClientAnnotation:
     def test_client_wraps_truncated_messages_in_annotated_value(self, large_messages):
-        """Test that client.py properly wraps truncated messages in AnnotatedValue"""
+        """Test that client.py properly wraps truncated messages in AnnotatedValue using scope data"""
         from sentry_sdk._types import AnnotatedValue
         from sentry_sdk.consts import SPANDATA
 
@@ -418,22 +416,30 @@ def __init__(self):
         )
         span.set_data(SPANDATA.GEN_AI_REQUEST_MESSAGES, truncated_messages)
 
-        # Verify metadata was set on scope and span
+        # Verify metadata was set on scope
         assert span.span_id in scope._gen_ai_messages_truncated
         assert scope._gen_ai_messages_truncated[span.span_id] > 0
-        assert "_gen_ai_messages_original_count" in span.data
 
         # Simulate what client.py does
         event = {"spans": [{"span_id": span.span_id, "data": span.data.copy()}]}
 
-        # Mimic client.py logic
+        # Mimic client.py logic - using scope to get the removed count
         for event_span in event["spans"]:
+            span_id = event_span.get("span_id")
             span_data = event_span.get("data", {})
-            orig_count = span_data.pop("_gen_ai_messages_original_count", None)
-            if orig_count is not None and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data:
+            if (
+                span_id
+                and span_id in scope._gen_ai_messages_truncated
+                and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
+            ):
+                messages = span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]
+                n_removed = scope._gen_ai_messages_truncated[span_id]
+                n_remaining = len(messages) if isinstance(messages, list) else 0
+                original_count_calculated = n_removed + n_remaining
+
                 span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
-                    safe_serialize(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES]),
-                    {"len": orig_count},
+                    safe_serialize(messages),
+                    {"len": original_count_calculated},
                 )
 
         # Verify the annotation happened
@@ -441,4 +447,3 @@ def __init__(self):
         assert isinstance(messages_value, AnnotatedValue)
         assert messages_value.metadata["len"] == original_count
         assert isinstance(messages_value.value, str)
-        assert "_gen_ai_messages_original_count" not in event["spans"][0]["data"]

From 8e45aa3b7ec76534042cc356a2e8d50c107ae37d Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 11:49:17 +0200
Subject: [PATCH 08/12] wip

---
 sentry_sdk/client.py | 33 +++++++++++++++++----------------
 1 file changed, 17 insertions(+), 16 deletions(-)

diff --git a/sentry_sdk/client.py b/sentry_sdk/client.py
index 7764e93356..ffd899b545 100644
--- a/sentry_sdk/client.py
+++ b/sentry_sdk/client.py
@@ -599,22 +599,23 @@ def _prepare_event(
                 event_scrubber.scrub_event(event)
 
         if scope is not None and scope._gen_ai_messages_truncated:
-            spans = event.get("spans", [])  # type: List[Dict[str, Any]] | AnnotatedValue[List[Dict[str, Any]]]
-            for span in spans:
-                span_id = span.get("span_id", None)
-                span_data = span.get("data", {})
-                if (
-                    span_id
-                    and span_id in scope._gen_ai_messages_truncated
-                    and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
-                ):
-                    span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
-                        span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
-                        {
-                            "len": scope._gen_ai_messages_truncated[span_id]
-                            + len(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES])
-                        },
-                    )
+            spans = event.get("spans", [])  # type: List[Dict[str, Any]] | AnnotatedValue
+            if isinstance(spans, list):
+                for span in spans:
+                    span_id = span.get("span_id", None)
+                    span_data = span.get("data", {})
+                    if (
+                        span_id
+                        and span_id in scope._gen_ai_messages_truncated
+                        and SPANDATA.GEN_AI_REQUEST_MESSAGES in span_data
+                    ):
+                        span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES] = AnnotatedValue(
+                            span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES],
+                            {
+                                "len": scope._gen_ai_messages_truncated[span_id]
+                                + len(span_data[SPANDATA.GEN_AI_REQUEST_MESSAGES])
+                            },
+                        )
         if previous_total_spans is not None:
             event["spans"] = AnnotatedValue(
                 event.get("spans", []), {"len": previous_total_spans}

From 4fb628911a0ef7526e9140107da140f3d056f074 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 12:45:27 +0200
Subject: [PATCH 09/12] fix types

---
 sentry_sdk/ai/utils.py | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 1dd5cabe07..ac391df4cb 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -2,18 +2,13 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from typing import Any, Callable
+    from typing import Any, Callable, Dict, List, Optional
 
     from sentry_sdk.tracing import Span
 
-from typing import TYPE_CHECKING
-
 import sentry_sdk
 from sentry_sdk.utils import logger
 
-if TYPE_CHECKING:
-    from typing import Any, Dict, List, Optional
-
 MAX_GEN_AI_MESSAGE_BYTES = 20_000  # 20KB
 
 

From 405a096baa60b7468866b299d0757be9c74cde6c Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 12:51:34 +0200
Subject: [PATCH 10/12] don't truncate single message size yet

---
 sentry_sdk/ai/utils.py | 36 ------------------------------------
 1 file changed, 36 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index ac391df4cb..356e0d95ac 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -109,46 +109,10 @@ def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
     while len(truncated_messages) > 1:
         serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
         current_size = len(serialized_json.encode("utf-8"))
-
         if current_size <= max_bytes:
             break
-
         truncated_messages.pop(0)
 
-    serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
-    current_size = len(serialized_json.encode("utf-8"))
-
-    if current_size > max_bytes and len(truncated_messages) == 1:
-        message = truncated_messages[0].copy()
-        content = message.get("content", "")
-
-        if isinstance(content, str):
-            max_content_length = max_bytes // 2
-            while True:
-                message["content"] = content[:max_content_length]
-                test_json = json.dumps([message], separators=(",", ":"))
-                if len(test_json.encode("utf-8")) <= max_bytes:
-                    break
-                max_content_length = int(max_content_length * 0.9)
-                if max_content_length < 100:
-                    message["content"] = ""
-                    break
-
-            truncated_messages = [message]
-        elif isinstance(content, list):
-            content_copy = list(content)
-            while len(content_copy) > 0:
-                message["content"] = content_copy
-                test_json = json.dumps([message], separators=(",", ":"))
-                if len(test_json.encode("utf-8")) <= max_bytes:
-                    break
-                content_copy = content_copy[:-1]
-
-            if len(content_copy) == 0:
-                message["content"] = []
-
-            truncated_messages = [message]
-
     return truncated_messages
 
 

From 12dcf5dd9694e1be1a5b862ac81741f952e395d9 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 14:33:26 +0200
Subject: [PATCH 11/12] incrementally compute cumulative message sizes

---
 sentry_sdk/ai/utils.py      | 56 +++++++++++++++++++++----------------
 tests/test_ai_monitoring.py | 47 ++++++++++++++++++++-----------
 2 files changed, 62 insertions(+), 41 deletions(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 356e0d95ac..3d9412a6a8 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -1,8 +1,10 @@
 import json
+from collections import deque
 from typing import TYPE_CHECKING
+from sys import getsizeof
 
 if TYPE_CHECKING:
-    from typing import Any, Callable, Dict, List, Optional
+    from typing import Any, Callable, Dict, List, Optional, Tuple
 
     from sentry_sdk.tracing import Span
 
@@ -99,21 +101,33 @@ def get_start_span_function():
     return sentry_sdk.start_span if transaction_exists else sentry_sdk.start_transaction
 
 
-def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
-    # type: (List[Dict[str, Any]], int) -> List[Dict[str, Any]]
-    if not messages:
-        return messages
+def _find_truncation_index(messages, max_bytes):
+    # type: (List[Dict[str, Any]], int) -> int
+    """
+    Find the index of the first message that would exceed the max bytes limit.
+    Compute the individual message sizes, and return the index of the first message from the back
+    of the list that would exceed the max bytes limit.
+    """
+    running_sum = 0
+    for idx in range(len(messages) - 1, -1, -1):
+        size = len(json.dumps(messages[idx], separators=(",", ":")))
+        running_sum += size
+        if running_sum > max_bytes:
+            return idx + 1
 
-    truncated_messages = list(messages)
+    return 0
 
-    while len(truncated_messages) > 1:
-        serialized_json = json.dumps(truncated_messages, separators=(",", ":"))
-        current_size = len(serialized_json.encode("utf-8"))
-        if current_size <= max_bytes:
-            break
-        truncated_messages.pop(0)
 
-    return truncated_messages
+def truncate_messages_by_size(messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES):
+    # type: (List[Dict[str, Any]], int) -> Tuple[List[Dict[str, Any]], int]
+    serialized_json = json.dumps(messages, separators=(",", ":"))
+    current_size = len(serialized_json.encode("utf-8"))
+
+    if current_size <= max_bytes:
+        return messages, 0
+
+    truncation_index = _find_truncation_index(messages, max_bytes)
+    return messages[truncation_index:], truncation_index
 
 
 def truncate_and_annotate_messages(
@@ -123,16 +137,10 @@ def truncate_and_annotate_messages(
     if not messages:
         return None
 
-    original_count = len(messages)
-    truncated_messages = truncate_messages_by_size(messages, max_bytes)
-
-    if not truncated_messages:
-        return None
-
-    truncated_count = len(truncated_messages)
-    n_removed = original_count - truncated_count
-
-    if n_removed > 0:
-        scope._gen_ai_messages_truncated[span.span_id] = n_removed
+    truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
+    if removed_count > 0:
+        scope._gen_ai_messages_truncated[span.span_id] = len(messages) - len(
+            truncated_messages
+        )
 
     return truncated_messages
diff --git a/tests/test_ai_monitoring.py b/tests/test_ai_monitoring.py
index 2fa677e19d..be66860384 100644
--- a/tests/test_ai_monitoring.py
+++ b/tests/test_ai_monitoring.py
@@ -10,6 +10,7 @@
     set_data_normalized,
     truncate_and_annotate_messages,
     truncate_messages_by_size,
+    _find_truncation_index,
 )
 from sentry_sdk.serializer import serialize
 from sentry_sdk.utils import safe_serialize
@@ -209,27 +210,53 @@ def large_messages():
 class TestTruncateMessagesBySize:
     def test_no_truncation_needed(self, sample_messages):
         """Test that messages under the limit are not truncated"""
-        result = truncate_messages_by_size(
+        result, removed_count = truncate_messages_by_size(
             sample_messages, max_bytes=MAX_GEN_AI_MESSAGE_BYTES
         )
         assert len(result) == len(sample_messages)
         assert result == sample_messages
+        assert removed_count == 0
 
     def test_truncation_removes_oldest_first(self, large_messages):
         """Test that oldest messages are removed first during truncation"""
         small_limit = 3000
-        result = truncate_messages_by_size(large_messages, max_bytes=small_limit)
+        result, removed_count = truncate_messages_by_size(
+            large_messages, max_bytes=small_limit
+        )
         assert len(result) < len(large_messages)
 
         if result:
             assert result[-1] == large_messages[-1]
+        assert removed_count == len(large_messages) - len(result)
 
     def test_empty_messages_list(self):
         """Test handling of empty messages list"""
-        result = truncate_messages_by_size(
+        result, removed_count = truncate_messages_by_size(
             [], max_bytes=MAX_GEN_AI_MESSAGE_BYTES // 500
         )
         assert result == []
+        assert removed_count == 0
+
+    def test_find_truncation_index(
+        self,
+    ):
+        """Test that the truncation index is found correctly"""
+        # when represented in JSON, these are each 7 bytes long
+        messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5]
+        truncation_index = _find_truncation_index(messages, 20)
+        assert truncation_index == 3
+        assert messages[truncation_index:] == ["D" * 5, "E" * 5]
+
+        messages = ["A" * 5, "B" * 5, "C" * 5, "D" * 5, "E" * 5]
+        truncation_index = _find_truncation_index(messages, 40)
+        assert truncation_index == 0
+        assert messages[truncation_index:] == [
+            "A" * 5,
+            "B" * 5,
+            "C" * 5,
+            "D" * 5,
+            "E" * 5,
+        ]
 
     def test_progressive_truncation(self, large_messages):
         """Test that truncation works progressively with different limits"""
@@ -250,20 +277,6 @@ def test_progressive_truncation(self, large_messages):
             assert current_count >= 1
             prev_count = current_count
 
-    def test_exact_size_boundary(self):
-        """Test behavior at exact size boundaries"""
-        messages = [{"role": "user", "content": "test"}]
-
-        serialized = serialize(messages, is_vars=False)
-        json_str = json.dumps(serialized, separators=(",", ":"))
-        exact_size = len(json_str.encode("utf-8"))
-
-        result = truncate_messages_by_size(messages, max_bytes=exact_size)
-        assert len(result) == 1
-
-        result = truncate_messages_by_size(messages, max_bytes=exact_size - 1)
-        assert len(result) == 1
-
 
 class TestTruncateAndAnnotateMessages:
     def test_no_truncation_returns_list(self, sample_messages):

From cf9d80949b6bf955923ee246af8832b12397b313 Mon Sep 17 00:00:00 2001
From: Simon Hellmayr <simon.hellmayr@sentry.io>
Date: Thu, 16 Oct 2025 14:42:08 +0200
Subject: [PATCH 12/12] use utf-8

---
 sentry_sdk/ai/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sentry_sdk/ai/utils.py b/sentry_sdk/ai/utils.py
index 3d9412a6a8..1fb291bdac 100644
--- a/sentry_sdk/ai/utils.py
+++ b/sentry_sdk/ai/utils.py
@@ -110,7 +110,7 @@ def _find_truncation_index(messages, max_bytes):
     """
     running_sum = 0
     for idx in range(len(messages) - 1, -1, -1):
-        size = len(json.dumps(messages[idx], separators=(",", ":")))
+        size = len(json.dumps(messages[idx], separators=(",", ":")).encode("utf-8"))
         running_sum += size
         if running_sum > max_bytes:
             return idx + 1