From 67e51954a96b186e57cdc88761947b812f54d220 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Thu, 20 Nov 2025 12:30:54 -0500 Subject: [PATCH 1/5] feat(span-enrichment): introduce segment name normalization --- pyproject.toml | 1 + src/sentry/features/temporary.py | 2 + .../transaction_clusterer/normalization.py | 116 ++++++++++++++++++ .../consumers/process_segments/message.py | 24 +++- .../test_normalization.py | 89 ++++++++++++++ .../process_segments/test_message.py | 34 +++++ uv.lock | 10 ++ 7 files changed, 275 insertions(+), 1 deletion(-) create mode 100644 src/sentry/ingest/transaction_clusterer/normalization.py create mode 100644 tests/sentry/ingest/transaction_clusterer/test_normalization.py diff --git a/pyproject.toml b/pyproject.toml index fb32b57876ddde..296d70c41a2b7a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dependencies = [ "rfc3986-validator>=0.1.1", # [end] jsonschema format validators "sentry-arroyo>=2.33.1", + "sentry-conventions>=0.3.0", "sentry-forked-email-reply-parser>=0.5.12.post1", "sentry-kafka-schemas>=2.1.15", "sentry-ophio>=1.1.3", diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index ecf62778c71d82..189e3eef191b72 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -219,6 +219,8 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("organizations:more-workflows", OrganizationFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False) # Generate charts using detector/open period payload manager.add("organizations:new-metric-issue-charts", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) + # Normalize segment names during span enrichment + manager.add("organizations:normalize_segment_names_in_span_enrichment", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Extract on demand metrics manager.add("organizations:on-demand-metrics-extraction", OrganizationFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=True) # Extract on demand metrics (experimental features) diff --git a/src/sentry/ingest/transaction_clusterer/normalization.py b/src/sentry/ingest/transaction_clusterer/normalization.py new file mode 100644 index 00000000000000..e5e0df1a352168 --- /dev/null +++ b/src/sentry/ingest/transaction_clusterer/normalization.py @@ -0,0 +1,116 @@ +import re +from dataclasses import dataclass + +import orjson +from sentry_conventions.attributes import ATTRIBUTE_NAMES + +from sentry.spans.consumers.process_segments.types import CompatibleSpan + +# Ported from Relay: +# https://github.com/getsentry/relay/blob/aad4b6099d12422e88dd5df49abae11247efdd99/relay-event-normalization/src/regexes.rs#L9 +TRANSACTION_NAME_NORMALIZER_REGEX = re.compile( + r"""(?x) + (?P[^/\\]* + (?a:\b)[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}(?a:\b) + [^/\\]*) | + (?P[^/\\]* + (?a:\b)[0-9a-fA-F]{40}(?a:\b) + [^/\\]*) | + (?P[^/\\]* + (?a:\b)[0-9a-fA-F]{32}(?a:\b) + [^/\\]*) | + (?P[^/\\]* + (?: + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]\.[0-9]+([+-][0-2][0-9]:[0-5][0-9]|Z))| + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z))| + (?:[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-5][0-9]([+-][0-2][0-9]:[0-5][0-9]|Z)) + ) | + (?: + (?a:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat)(?a:\s)+)? + (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?a:\s)+ + (?:[0-9]{1,2})(?a:\s)+ + (?:[0-9]{2}:[0-9]{2}:[0-9]{2})(?a:\s)+ + [0-9]{4} + ) | + (?: + (?a:\b)(?:(Sun|Mon|Tue|Wed|Thu|Fri|Sat),(?a:\s)+)? + (?:0[1-9]|[1-2]?[0-9]|3[01])(?a:\s)+ + (?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)(?a:\s)+ + (?:19[0-9]{2}|[2-9][0-9]{3})(?a:\s)+ + (?:2[0-3]|[0-1][0-9]):([0-5][0-9]) + (?::(60|[0-5][0-9]))?(?a:\s)+ + (?:[-\+][0-9]{2}[0-5][0-9]|(?:UT|GMT|(?:E|C|M|P)(?:ST|DT)|[A-IK-Z])) + ) + [^/\\]*) | + (?P[^/\\]* + (?a:\b)0[xX][0-9a-fA-F]+(?a:\b) + [^/\\]*) | + (?:^|[/\\]) + (?P + (:?[^%/\\]|%[0-9a-fA-F]{2})*[0-9]{2,} + [^/\\]*)""", + re.UNICODE, +) + + +def normalize_segment_name(segment_span: CompatibleSpan): + attributes = segment_span.get("attributes") or {} + segment_name = segment_span.get("name") + if attr := attributes.get(ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME): + if attr["type"] == "string": + segment_name = attr["value"] # type: ignore[assignment] + if segment_name: + _scrub_identifiers(segment_span, segment_name) + + +@dataclass(frozen=True) +class Remark: + ty: str + rule_id: str + range: tuple[int, int] + + +# Ported from Relay: +# https://github.com/getsentry/relay/blob/aad4b6099d12422e88dd5df49abae11247efdd99/relay-event-normalization/src/transactions/processor.rs#L350 +def _scrub_identifiers(segment_span: CompatibleSpan, segment_name: str): + matches = TRANSACTION_NAME_NORMALIZER_REGEX.finditer(segment_name) + remarks = [] + for m in matches: + remarks.extend( + [ + Remark(ty="s", rule_id=group_name, range=(m.start(group_name), m.end(group_name))) + for group_name in m.groupdict().keys() + if m.start(group_name) > -1 + ] + ) + if len(remarks) == 0: + return + + remarks.sort(key=lambda remark: remark.range[1]) + str_parts: list[str] = [] + last_end = 0 + for remark in remarks: + start, end = remark.range + str_parts.append(segment_name[last_end:start]) + str_parts.append("*") + last_end = end + str_parts.append(segment_name[last_end:]) + normalized_segment_name = "".join(str_parts) + + segment_span["name"] = normalized_segment_name + attributes = segment_span.get("attributes") or {} + attributes[ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME] = { + "type": "string", + "value": normalized_segment_name, + } + attributes[ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] = { + "type": "string", + "value": "sanitized", + } + attributes[f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}"] = { + "type": "string", + "value": orjson.dumps( + {"meta": {"": {"rem": [[r.rule_id, r.ty, r.range[0], r.range[1]] for r in remarks]}}} + ).decode(), + } + segment_span["attributes"] = attributes diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index bc844761bd8ef1..baf24ab2a37449 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -6,12 +6,14 @@ import sentry_sdk from django.core.exceptions import ValidationError +from sentry_conventions.attributes import ATTRIBUTE_NAMES from sentry_kafka_schemas.schema_types.ingest_spans_v1 import SpanEvent -from sentry import options +from sentry import features, options from sentry.constants import DataCategory from sentry.dynamic_sampling.rules.helpers.latest_releases import record_latest_release from sentry.event_manager import INSIGHT_MODULE_TO_PROJECT_FLAG_NAME +from sentry.ingest.transaction_clusterer.normalization import normalize_segment_name from sentry.insights import FilterSpan from sentry.insights import modules as insights_modules from sentry.issue_detection.performance_detection import detect_performance_problems @@ -61,6 +63,7 @@ def process_segment( # If the project does not exist then it might have been deleted during ingestion. return [] + _normalize_segment_name(segment_span, spans, project.organization) _add_segment_name(segment_span, spans) _compute_breakdowns(segment_span, spans, project) _create_models(segment_span, project) @@ -140,6 +143,25 @@ def _enrich_spans( return segment, spans +@metrics.wraps("spans.consumers.process_segments.normalize_segment_name") +def _normalize_segment_name( + segment_span: CompatibleSpan, spans: Sequence[CompatibleSpan], organization: Organization +) -> None: + if not features.has("organizations:normalize_segment_names_in_span_enrichment", organization): + return + + attributes = segment_span.get("attributes") or {} + segment_name = attributes.get(ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME) or segment_span.get("name") + if not segment_name: + return + + source = attributes.get(ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE) + unknown_if_parameterized = not source + known_to_be_unparameterized = source == "url" + if unknown_if_parameterized or known_to_be_unparameterized: + normalize_segment_name(segment_span) + + @metrics.wraps("spans.consumers.process_segments.add_segment_name") def _add_segment_name(segment: CompatibleSpan, spans: Sequence[CompatibleSpan]) -> None: segment_name = segment.get("name") diff --git a/tests/sentry/ingest/transaction_clusterer/test_normalization.py b/tests/sentry/ingest/transaction_clusterer/test_normalization.py new file mode 100644 index 00000000000000..01c9b9d0cf1962 --- /dev/null +++ b/tests/sentry/ingest/transaction_clusterer/test_normalization.py @@ -0,0 +1,89 @@ +import orjson +from sentry_conventions.attributes import ATTRIBUTE_NAMES + +from sentry.ingest.transaction_clusterer.normalization import normalize_segment_name +from sentry.spans.consumers.process_segments.types import CompatibleSpan + + +def _segment_span(**kwargs) -> CompatibleSpan: + segment_span: CompatibleSpan = { + "organization_id": 1, + "project_id": 1, + "trace_id": "94576097f3a64b68b85a59c7d4e3ee2a", + "span_id": "a49b42af9fb69da0", + "start_timestamp": 1707953018.865, + "end_timestamp": 1707953018.972, + "retention_days": 90, + "received": 1707953019.044972, + "status": "ok", + "exclusive_time": 0.1, + "op": "default", + "sentry_tags": {}, + "name": "default", + } + segment_span.update(**kwargs) # type:ignore[call-arg] + return segment_span + + +# Ported from Relay: +# https://github.com/getsentry/relay/blob/aad4b6099d12422e88dd5df49abae11247efdd99/relay-event-normalization/src/transactions/processor.rs#L789 +def test_identifiers_scrubbed(): + segment_span = _segment_span(name="/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0") + + normalize_segment_name(segment_span) + + assert segment_span["name"] == "/foo/*/user/*/0" + attributes = segment_span.get("attributes") or {} + assert attributes[ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME] == { + "type": "string", + "value": "/foo/*/user/*/0", + } + assert attributes[ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] == { + "type": "string", + "value": "sanitized", + } + assert attributes[f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}"] == { + "type": "string", + "value": orjson.dumps( + {"meta": {"": {"rem": [["int", "s", 5, 45], ["int", "s", 51, 54]]}}} + ).decode(), + } + + +def test_name_attribute_takes_precedence_over_name(): + segment_span = _segment_span( + name="/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0", + attributes={ + ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME: { + "type": "string", + "value": "/bar/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12", + } + }, + ) + + normalize_segment_name(segment_span) + + assert segment_span["name"] == "/bar/*" + attributes = segment_span.get("attributes") or {} + assert attributes[ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME] == { + "type": "string", + "value": "/bar/*", + } + assert attributes[ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] == { + "type": "string", + "value": "sanitized", + } + assert attributes[f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}"] == { + "type": "string", + "value": orjson.dumps({"meta": {"": {"rem": [["int", "s", 5, 45]]}}}).decode(), + } + + +def test_no_meta_changes_if_no_name_changes(): + segment_span = _segment_span(name="/foo") + + normalize_segment_name(segment_span) + + assert segment_span["name"] == "/foo" + attributes = segment_span.get("attributes") or {} + assert len(attributes) == 0 diff --git a/tests/sentry/spans/consumers/process_segments/test_message.py b/tests/sentry/spans/consumers/process_segments/test_message.py index acc4c8fea9784b..3bcde6a66c5552 100644 --- a/tests/sentry/spans/consumers/process_segments/test_message.py +++ b/tests/sentry/spans/consumers/process_segments/test_message.py @@ -4,12 +4,14 @@ from unittest import mock import pytest +from sentry_conventions.attributes import ATTRIBUTE_NAMES from sentry.issues.grouptype import PerformanceStreamedSpansGroupTypeExperimental from sentry.models.environment import Environment from sentry.models.release import Release from sentry.spans.consumers.process_segments.message import _verify_compatibility, process_segment from sentry.testutils.cases import TestCase +from sentry.testutils.helpers.features import Feature from sentry.testutils.helpers.options import override_options from sentry.testutils.issue_detection.experiments import exclude_experimental_detectors from tests.sentry.spans.consumers.process import build_mock_span @@ -272,6 +274,38 @@ def test_segment_name_propagation_when_name_missing(self): child_attributes = child_span["attributes"] or {} assert child_attributes.get("sentry.segment.name") is None + def test_segment_name_normalization_with_feature(self): + _, segment_span = self.generate_basic_spans() + segment_span["name"] = "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" + + with self.feature("organizations:normalize_segment_names_in_span_enrichment"): + processed_spans = process_segment([segment_span]) + + assert processed_spans[0]["name"] == "/foo/*/user/*/0" + + def test_segment_name_normalization_without_feature(self): + _, segment_span = self.generate_basic_spans() + segment_span["name"] = "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" + + with Feature({"organizations:normalize_segment_names_in_span_enrichment": False}): + processed_spans = process_segment([segment_span]) + + assert ( + processed_spans[0]["name"] == "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" + ) + + def test_segment_name_normalization_checks_source(self): + _, segment_span = self.generate_basic_spans() + segment_span["name"] = "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" + segment_span["attributes"][ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] = "route" + + with self.feature("organizations:normalize_segment_names_in_span_enrichment"): + processed_spans = process_segment([segment_span]) + + assert ( + processed_spans[0]["name"] == "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" + ) + def test_verify_compatibility(): spans: list[dict[str, Any]] = [ diff --git a/uv.lock b/uv.lock index dab2b578d5929a..886b525cdfb845 100644 --- a/uv.lock +++ b/uv.lock @@ -2009,6 +2009,7 @@ dependencies = [ { name = "rfc3339-validator", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "rfc3986-validator", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "sentry-arroyo", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, + { name = "sentry-conventions", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "sentry-forked-email-reply-parser", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "sentry-kafka-schemas", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, { name = "sentry-ophio", marker = "sys_platform == 'darwin' or sys_platform == 'linux'" }, @@ -2175,6 +2176,7 @@ requires-dist = [ { name = "rfc3339-validator", specifier = ">=0.1.2" }, { name = "rfc3986-validator", specifier = ">=0.1.1" }, { name = "sentry-arroyo", specifier = ">=2.33.1" }, + { name = "sentry-conventions", specifier = ">=0.3.0" }, { name = "sentry-forked-email-reply-parser", specifier = ">=0.5.12.post1" }, { name = "sentry-kafka-schemas", specifier = ">=2.1.15" }, { name = "sentry-ophio", specifier = ">=1.1.3" }, @@ -2286,6 +2288,14 @@ wheels = [ { url = "https://pypi.devinfra.sentry.io/wheels/sentry_cli-2.16.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.musllinux_1_2_x86_64.whl", hash = "sha256:9d0541a3cbe96697f354549f2464c24c3250aa189e58d690ca632f434c34e6e8" }, ] +[[package]] +name = "sentry-conventions" +version = "0.3.0" +source = { registry = "https://pypi.devinfra.sentry.io/simple" } +wheels = [ + { url = "https://pypi.devinfra.sentry.io/wheels/sentry_conventions-0.3.0-py3-none-any.whl", hash = "sha256:675e23ab13c690726b24d2d8bd96b043a4710c4f479373fa3b061510a279d1c6" }, +] + [[package]] name = "sentry-covdefaults-disable-branch-coverage" version = "1.0.2" From 008d2c9e94f4e2fe218f6eca38aaba56c8239668 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Thu, 20 Nov 2025 12:59:51 -0500 Subject: [PATCH 2/5] fix attribute use --- .../ingest/transaction_clusterer/normalization.py | 10 ++++------ .../spans/consumers/process_segments/message.py | 13 ++++++------- .../consumers/process_segments/test_message.py | 5 ++++- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/src/sentry/ingest/transaction_clusterer/normalization.py b/src/sentry/ingest/transaction_clusterer/normalization.py index e5e0df1a352168..28c8ee24aa08b7 100644 --- a/src/sentry/ingest/transaction_clusterer/normalization.py +++ b/src/sentry/ingest/transaction_clusterer/normalization.py @@ -4,7 +4,7 @@ import orjson from sentry_conventions.attributes import ATTRIBUTE_NAMES -from sentry.spans.consumers.process_segments.types import CompatibleSpan +from sentry.spans.consumers.process_segments.types import CompatibleSpan, attribute_value # Ported from Relay: # https://github.com/getsentry/relay/blob/aad4b6099d12422e88dd5df49abae11247efdd99/relay-event-normalization/src/regexes.rs#L9 @@ -54,11 +54,9 @@ def normalize_segment_name(segment_span: CompatibleSpan): - attributes = segment_span.get("attributes") or {} - segment_name = segment_span.get("name") - if attr := attributes.get(ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME): - if attr["type"] == "string": - segment_name = attr["value"] # type: ignore[assignment] + segment_name = attribute_value( + segment_span, ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME + ) or segment_span.get("name") if segment_name: _scrub_identifiers(segment_span, segment_name) diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index baf24ab2a37449..2d4990d97827a3 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -63,7 +63,7 @@ def process_segment( # If the project does not exist then it might have been deleted during ingestion. return [] - _normalize_segment_name(segment_span, spans, project.organization) + _normalize_segment_name(segment_span, project.organization) _add_segment_name(segment_span, spans) _compute_breakdowns(segment_span, spans, project) _create_models(segment_span, project) @@ -144,18 +144,17 @@ def _enrich_spans( @metrics.wraps("spans.consumers.process_segments.normalize_segment_name") -def _normalize_segment_name( - segment_span: CompatibleSpan, spans: Sequence[CompatibleSpan], organization: Organization -) -> None: +def _normalize_segment_name(segment_span: CompatibleSpan, organization: Organization) -> None: if not features.has("organizations:normalize_segment_names_in_span_enrichment", organization): return - attributes = segment_span.get("attributes") or {} - segment_name = attributes.get(ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME) or segment_span.get("name") + segment_name = attribute_value( + segment_span, ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME + ) or segment_span.get("name") if not segment_name: return - source = attributes.get(ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE) + source = attribute_value(segment_span, ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE) unknown_if_parameterized = not source known_to_be_unparameterized = source == "url" if unknown_if_parameterized or known_to_be_unparameterized: diff --git a/tests/sentry/spans/consumers/process_segments/test_message.py b/tests/sentry/spans/consumers/process_segments/test_message.py index 3bcde6a66c5552..f34c0593c07d6a 100644 --- a/tests/sentry/spans/consumers/process_segments/test_message.py +++ b/tests/sentry/spans/consumers/process_segments/test_message.py @@ -297,7 +297,10 @@ def test_segment_name_normalization_without_feature(self): def test_segment_name_normalization_checks_source(self): _, segment_span = self.generate_basic_spans() segment_span["name"] = "/foo/2fd4e1c67a2d28fced849ee1bb76e7391b93eb12/user/123/0" - segment_span["attributes"][ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] = "route" + segment_span["attributes"][ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE] = { + "type": "string", + "value": "route", + } with self.feature("organizations:normalize_segment_names_in_span_enrichment"): processed_spans = process_segment([segment_span]) From fce2d21178e8bc96562c0857ccce4a8391b3acd4 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Thu, 20 Nov 2025 13:03:09 -0500 Subject: [PATCH 3/5] work around duplicate test file name --- tests/sentry/ingest/transaction_clusterer/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 tests/sentry/ingest/transaction_clusterer/__init__.py diff --git a/tests/sentry/ingest/transaction_clusterer/__init__.py b/tests/sentry/ingest/transaction_clusterer/__init__.py new file mode 100644 index 00000000000000..e69de29bb2d1d6 From 88c229b663159489f4548d285d4c1855774bc9aa Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Mon, 24 Nov 2025 10:34:52 -0500 Subject: [PATCH 4/5] move remark serialization into class --- src/sentry/ingest/transaction_clusterer/normalization.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/sentry/ingest/transaction_clusterer/normalization.py b/src/sentry/ingest/transaction_clusterer/normalization.py index 28c8ee24aa08b7..07bba43f339964 100644 --- a/src/sentry/ingest/transaction_clusterer/normalization.py +++ b/src/sentry/ingest/transaction_clusterer/normalization.py @@ -67,6 +67,9 @@ class Remark: rule_id: str range: tuple[int, int] + def serialize(self) -> list: + return [self.rule_id, self.ty, self.range[0], self.range[1]] + # Ported from Relay: # https://github.com/getsentry/relay/blob/aad4b6099d12422e88dd5df49abae11247efdd99/relay-event-normalization/src/transactions/processor.rs#L350 @@ -107,8 +110,6 @@ def _scrub_identifiers(segment_span: CompatibleSpan, segment_name: str): } attributes[f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}"] = { "type": "string", - "value": orjson.dumps( - {"meta": {"": {"rem": [[r.rule_id, r.ty, r.range[0], r.range[1]] for r in remarks]}}} - ).decode(), + "value": orjson.dumps({"meta": {"": {"rem": [r.serialize() for r in remarks]}}}).decode(), } segment_span["attributes"] = attributes From ed6acf0494c141821c074d13875f00b843bb7180 Mon Sep 17 00:00:00 2001 From: Matt Quinn Date: Mon, 24 Nov 2025 10:41:46 -0500 Subject: [PATCH 5/5] wrap normalization in safe_execute --- src/sentry/spans/consumers/process_segments/message.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sentry/spans/consumers/process_segments/message.py b/src/sentry/spans/consumers/process_segments/message.py index 2d4990d97827a3..1317224d42f5c6 100644 --- a/src/sentry/spans/consumers/process_segments/message.py +++ b/src/sentry/spans/consumers/process_segments/message.py @@ -37,6 +37,7 @@ from sentry.utils.dates import to_datetime from sentry.utils.outcomes import Outcome, OutcomeAggregator from sentry.utils.projectflags import set_project_flag_and_signal +from sentry.utils.safe import safe_execute logger = logging.getLogger(__name__) @@ -63,7 +64,7 @@ def process_segment( # If the project does not exist then it might have been deleted during ingestion. return [] - _normalize_segment_name(segment_span, project.organization) + safe_execute(_normalize_segment_name, segment_span, project.organization) _add_segment_name(segment_span, spans) _compute_breakdowns(segment_span, spans, project) _create_models(segment_span, project)