Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion sentry_sdk/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@


SENSITIVE_DATA_SUBSTITUTE = "[Filtered]"
BLOB_DATA_SUBSTITUTE = "[Blob substitute]"


class AnnotatedValue:
Expand Down
104 changes: 0 additions & 104 deletions sentry_sdk/ai/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,6 @@
from copy import deepcopy
from typing import TYPE_CHECKING

from sentry_sdk._types import BLOB_DATA_SUBSTITUTE
from sentry_sdk.ai.consts import DATA_URL_BASE64_REGEX

if TYPE_CHECKING:
from typing import Any, Callable, Dict, List, Optional, Tuple
Expand Down Expand Up @@ -198,104 +196,6 @@ def _find_truncation_index(messages: "List[Dict[str, Any]]", max_bytes: int) ->
return 0


def _is_image_type_with_blob_content(item: "Dict[str, Any]") -> bool:
"""
Some content blocks contain an image_url property with base64 content as its value.
This is used to identify those while not leading to unnecessary copying of data when the image URL does not contain base64 content.
"""
if item.get("type") != "image_url":
return False

image_url = item.get("image_url", {}).get("url", "")
data_url_match = DATA_URL_BASE64_REGEX.match(image_url)

return bool(data_url_match)


def redact_blob_message_parts(
messages: "List[Dict[str, Any]]",
) -> "List[Dict[str, Any]]":
"""
Redact blob message parts from the messages by replacing blob content with "[Filtered]".

This function creates a deep copy of messages that contain blob content to avoid
mutating the original message dictionaries. Messages without blob content are
returned as-is to minimize copying overhead.

e.g:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "data:image/jpeg;base64,..."
}
]
}
becomes:
{
"role": "user",
"content": [
{
"text": "How many ponies do you see in the image?",
"type": "text"
},
{
"type": "blob",
"modality": "image",
"mime_type": "image/jpeg",
"content": "[Filtered]"
}
]
}
"""

# First pass: check if any message contains blob content
has_blobs = False
for message in messages:
if not isinstance(message, dict):
continue
content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and (
item.get("type") == "blob" or _is_image_type_with_blob_content(item)
):
has_blobs = True
break
if has_blobs:
break

# If no blobs found, return original messages to avoid unnecessary copying
if not has_blobs:
return messages

# Deep copy messages to avoid mutating the original
messages_copy = deepcopy(messages)

# Second pass: redact blob content in the copy
for message in messages_copy:
if not isinstance(message, dict):
continue

content = message.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict):
if item.get("type") == "blob":
item["content"] = BLOB_DATA_SUBSTITUTE
elif _is_image_type_with_blob_content(item):
item["image_url"]["url"] = BLOB_DATA_SUBSTITUTE

return messages_copy


def truncate_messages_by_size(
messages: "List[Dict[str, Any]]",
max_bytes: int = MAX_GEN_AI_MESSAGE_BYTES,
Expand Down Expand Up @@ -341,8 +241,6 @@ def truncate_and_annotate_messages(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_message = _truncate_single_message_content_if_present(
deepcopy(messages[-1]), max_chars=max_single_message_chars
)
Expand All @@ -361,8 +259,6 @@ def truncate_and_annotate_embedding_inputs(
if not messages:
return None

messages = redact_blob_message_parts(messages)

truncated_messages, removed_count = truncate_messages_by_size(messages, max_bytes)
if removed_count > 0:
scope._gen_ai_original_message_count[span.span_id] = len(messages)
Expand Down
Loading
Loading