Skip to content

Commit

Permalink
Do not trim span descriptions. (#1983)
Browse files Browse the repository at this point in the history
- Made sure that span descriptions are never trimmed. (for all op values, not just db spans.)
- Removed the experimental smart_transaction_trimming option
- Also removed some dead code that was never executed because the experimental option defaults to False.
  • Loading branch information
antonpirker committed Apr 3, 2023
1 parent 5d9cd4f commit c4d0384
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 148 deletions.
7 changes: 1 addition & 6 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -320,12 +320,7 @@ def _prepare_event(
# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
event = serialize(
event,
smart_transaction_trimming=self.options["_experiments"].get(
"smart_transaction_trimming"
),
)
event = serialize(event)

before_send = self.options["before_send"]
if before_send is not None and event.get("type") != "transaction":
Expand Down
3 changes: 1 addition & 2 deletions sentry_sdk/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@
{
"max_spans": Optional[int],
"record_sql_params": Optional[bool],
"smart_transaction_trimming": Optional[bool],
# TODO: Remvoe these 2 profiling related experiments
# TODO: Remove these 2 profiling related experiments
"profiles_sample_rate": Optional[float],
"profiler_mode": Optional[ProfilerMode],
},
Expand Down
129 changes: 15 additions & 114 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@
capture_internal_exception,
disable_capture_event,
format_timestamp,
json_dumps,
safe_repr,
strip_string,
)

import sentry_sdk.utils

from sentry_sdk._compat import (
text_type,
PY2,
Expand All @@ -23,12 +19,9 @@
iteritems,
binary_sequence_types,
)

from sentry_sdk._types import TYPE_CHECKING

if TYPE_CHECKING:
from datetime import timedelta

from types import TracebackType

from typing import Any
Expand All @@ -37,7 +30,6 @@
from typing import Dict
from typing import List
from typing import Optional
from typing import Tuple
from typing import Type
from typing import Union

Expand Down Expand Up @@ -120,12 +112,11 @@ def __exit__(
self._ids.pop(id(self._objs.pop()), None)


def serialize(event, smart_transaction_trimming=False, **kwargs):
# type: (Event, bool, **Any) -> Event
def serialize(event, **kwargs):
# type: (Event, **Any) -> Event
memo = Memo()
path = [] # type: List[Segment]
meta_stack = [] # type: List[Dict[str, Any]]
span_description_bytes = [] # type: List[int]

def _annotate(**meta):
# type: (**Any) -> None
Expand Down Expand Up @@ -365,113 +356,23 @@ def _serialize_node_impl(
if not isinstance(obj, string_types):
obj = safe_repr(obj)

# Allow span descriptions to be longer than other strings.
#
# For database auto-instrumented spans, the description contains
# potentially long SQL queries that are most useful when not truncated.
# Because arbitrarily large events may be discarded by the server as a
# protection mechanism, we dynamically limit the description length
# later in _truncate_span_descriptions.
if (
smart_transaction_trimming
and len(path) == 3
and path[0] == "spans"
and path[-1] == "description"
):
span_description_bytes.append(len(obj))
is_span_description = (
len(path) == 3 and path[0] == "spans" and path[-1] == "description"
)
if is_span_description:
return obj
return _flatten_annotated(strip_string(obj))

def _truncate_span_descriptions(serialized_event, event, excess_bytes):
# type: (Event, Event, int) -> None
"""
Modifies serialized_event in-place trying to remove excess_bytes from
span descriptions. The original event is used read-only to access the
span timestamps (represented as RFC3399-formatted strings in
serialized_event).
It uses heuristics to prioritize preserving the description of spans
that might be the most interesting ones in terms of understanding and
optimizing performance.
"""
# When truncating a description, preserve a small prefix.
min_length = 10

def shortest_duration_longest_description_first(args):
# type: (Tuple[int, Span]) -> Tuple[timedelta, int]
i, serialized_span = args
span = event["spans"][i]
now = datetime.utcnow()
start = span.get("start_timestamp") or now
end = span.get("timestamp") or now
duration = end - start
description = serialized_span.get("description") or ""
return (duration, -len(description))

# Note: for simplicity we sort spans by exact duration and description
# length. If ever needed, we could have a more involved heuristic, e.g.
# replacing exact durations with "buckets" and/or looking at other span
# properties.
path.append("spans")
for i, span in sorted(
enumerate(serialized_event.get("spans") or []),
key=shortest_duration_longest_description_first,
):
description = span.get("description") or ""
if len(description) <= min_length:
continue
excess_bytes -= len(description) - min_length
path.extend([i, "description"])
# Note: the last time we call strip_string we could preserve a few
# more bytes up to a total length of MAX_EVENT_BYTES. Since that's
# not strictly required, we leave it out for now for simplicity.
span["description"] = _flatten_annotated(
strip_string(description, max_length=min_length)
)
del path[-2:]
del meta_stack[len(path) + 1 :]

if excess_bytes <= 0:
break
path.pop()
del meta_stack[len(path) + 1 :]
return _flatten_annotated(strip_string(obj))

#
# Start of serialize() function
#
disable_capture_event.set(True)
try:
rv = _serialize_node(event, **kwargs)
if meta_stack and isinstance(rv, dict):
rv["_meta"] = meta_stack[0]

sum_span_description_bytes = sum(span_description_bytes)
if smart_transaction_trimming and sum_span_description_bytes > 0:
span_count = len(event.get("spans") or [])
# This is an upper bound of how many bytes all descriptions would
# consume if the usual string truncation in _serialize_node_impl
# would have taken place, not accounting for the metadata attached
# as event["_meta"].
descriptions_budget_bytes = span_count * sentry_sdk.utils.MAX_STRING_LENGTH

# If by not truncating descriptions we ended up with more bytes than
# per the usual string truncation, check if the event is too large
# and we need to truncate some descriptions.
#
# This is guarded with an if statement to avoid JSON-encoding the
# event unnecessarily.
if sum_span_description_bytes > descriptions_budget_bytes:
original_bytes = len(json_dumps(rv))
excess_bytes = original_bytes - MAX_EVENT_BYTES
if excess_bytes > 0:
# Event is too large, will likely be discarded by the
# server. Trim it down before sending.
_truncate_span_descriptions(rv, event, excess_bytes)

# Span descriptions truncated, set or reset _meta.
#
# We run the same code earlier because we want to account
# for _meta when calculating original_bytes, the number of
# bytes in the JSON-encoded event.
if meta_stack and isinstance(rv, dict):
rv["_meta"] = meta_stack[0]
return rv
serialized_event = _serialize_node(event, **kwargs)
if meta_stack and isinstance(serialized_event, dict):
serialized_event["_meta"] = meta_stack[0]

return serialized_event
finally:
disable_capture_event.set(False)
40 changes: 14 additions & 26 deletions tests/integrations/sqlalchemy/test_sqlalchemy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@

from sentry_sdk import capture_message, start_transaction, configure_scope
from sentry_sdk.integrations.sqlalchemy import SqlalchemyIntegration
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH
from sentry_sdk.serializer import MAX_EVENT_BYTES
from sentry_sdk.utils import json_dumps, MAX_STRING_LENGTH


def test_orm_queries(sentry_init, capture_events):
Expand Down Expand Up @@ -143,7 +143,6 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
sentry_init(
traces_sample_rate=1,
integrations=[SqlalchemyIntegration()],
_experiments={"smart_transaction_trimming": True},
)
events = capture_events()

Expand All @@ -158,11 +157,10 @@ def test_long_sql_query_preserved(sentry_init, capture_events):
assert description.endswith("SELECT 98 UNION SELECT 99")


def test_too_large_event_truncated(sentry_init, capture_events):
def test_large_event_not_truncated(sentry_init, capture_events):
sentry_init(
traces_sample_rate=1,
integrations=[SqlalchemyIntegration()],
_experiments={"smart_transaction_trimming": True},
)
events = capture_events()

Expand All @@ -178,36 +176,26 @@ def processor(event, hint):
engine = create_engine("sqlite:///:memory:")
with start_transaction(name="test"):
with engine.connect() as con:
for _ in range(2000):
for _ in range(1500):
con.execute(" UNION ".join("SELECT {}".format(i) for i in range(100)))

(event,) = events

# Because of attached metadata in the "_meta" key, we may send out a little
# bit more than MAX_EVENT_BYTES.
max_bytes = 1.2 * MAX_EVENT_BYTES
assert len(json_dumps(event)) < max_bytes
assert len(json_dumps(event)) > MAX_EVENT_BYTES

# Some spans are discarded.
assert len(event["spans"]) == 1000

for i, span in enumerate(event["spans"]):
description = span["description"]

assert description.startswith("SELECT ")
if str(i) in event["_meta"]["spans"]:
# Description must have been truncated
assert len(description) == 10
assert description.endswith("...")
else:
# Description was not truncated, check for original length
assert len(description) == 1583
assert description.endswith("SELECT 98 UNION SELECT 99")

# Smoke check the meta info for one of the spans.
assert next(iter(event["_meta"]["spans"].values())) == {
"description": {"": {"len": 1583, "rem": [["!limit", "x", 7, 10]]}}
}
# Span descriptions are not truncated.
description = event["spans"][0]["description"]
assert len(description) == 1583
assert description.startswith("SELECT 0")
assert description.endswith("SELECT 98 UNION SELECT 99")

description = event["spans"][999]["description"]
assert len(description) == 1583
assert description.startswith("SELECT 0")
assert description.endswith("SELECT 98 UNION SELECT 99")

# Smoke check that truncation of other fields has not changed.
assert len(event["message"]) == MAX_STRING_LENGTH
Expand Down

0 comments on commit c4d0384

Please sign in to comment.