Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 22 additions & 2 deletions src/sentry/ingest/transaction_clusterer/datasource/redis.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
"""Write transactions into redis sets"""

import logging
from collections.abc import Callable, Iterator, Mapping
from collections.abc import Callable, Iterator, Mapping, Sequence
from typing import Any

import orjson
import sentry_sdk
from django.conf import settings
from rediscluster import RedisCluster
Expand Down Expand Up @@ -133,6 +134,12 @@ def record_segment_name(project: Project, segment_span: CompatibleSpan) -> None:
project,
segment_name,
)
if in_random_rollout("txnames.bump-lifetime-sample-rate"):
safe_execute(
_bump_rule_lifetime_for_segment,
project,
segment_span,
)


def _should_store_transaction_name(event_data: Mapping[str, Any]) -> str | None:
Expand Down Expand Up @@ -181,9 +188,22 @@ def _should_store_segment_name_inner(


def _bump_rule_lifetime(project: Project, event_data: Mapping[str, Any]) -> None:
applied_rules = event_data.get("_meta", {}).get("transaction", {}).get("", {}).get("rem", {})
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: applied_rules on line 191 defaults to a dictionary ({}) instead of a list ([]), causing a type mismatch with expected Sequence iteration.
Severity: MEDIUM | Confidence: High

🔍 Detailed Analysis

On line 191 of src/sentry/ingest/transaction_clusterer/datasource/redis.py, the applied_rules variable defaults to an empty dictionary ({}) when the rem field is absent from transaction event data. This creates a type mismatch because the code expects applied_rules to be a Sequence (specifically a list) for iteration, as evidenced by test data, type annotations, and the loop logic on lines 210-217. This inconsistency with the segment case (line 200, which defaults to []) means that if a non-empty dictionary is ever provided, the iteration logic will fail.

💡 Suggested Fix

Change the default value for applied_rules on line 191 from {} to [] to align with the expected Sequence type and the segment implementation.

🤖 Prompt for AI Agent
Review the code at the location below. A potential bug has been identified by an AI
agent.
Verify if this is a real issue. If it is, propose a fix; if not, explain why it's not
valid.

Location: src/sentry/ingest/transaction_clusterer/datasource/redis.py#L191

Potential issue: On line 191 of
`src/sentry/ingest/transaction_clusterer/datasource/redis.py`, the `applied_rules`
variable defaults to an empty dictionary (`{}`) when the `rem` field is absent from
transaction event data. This creates a type mismatch because the code expects
`applied_rules` to be a `Sequence` (specifically a list) for iteration, as evidenced by
test data, type annotations, and the loop logic on lines 210-217. This inconsistency
with the segment case (line 200, which defaults to `[]`) means that if a non-empty
dictionary is ever provided, the iteration logic will fail.

Did we get this right? 👍 / 👎 to inform future reviews.
Reference ID: 4996900

_bump_rule_lifetime_inner(project, applied_rules)


def _bump_rule_lifetime_for_segment(project: Project, segment_span: CompatibleSpan) -> None:
meta_str: str = attribute_value(
segment_span, f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}"
)
meta = orjson.loads(meta_str)
applied_rules = meta.get("meta", {}).get("", {}).get("rem", [])
_bump_rule_lifetime_inner(project, applied_rules)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Missing null check before parsing JSON metadata

The _bump_rule_lifetime_for_segment function calls attribute_value which returns None when the requested attribute doesn't exist on the span. The result is immediately passed to orjson.loads(meta_str) without a null check, which will raise a TypeError when meta_str is None. This occurs when a segment span hasn't had any clustering rules applied to it (e.g., new segment name patterns before rules are generated). While safe_execute catches the exception, this causes unnecessary errors rather than gracefully handling the case where no rules were applied.

Fix in Cursor Fix in Web

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think cursor is right, does mypy actually accept meta_str: str here?



def _bump_rule_lifetime_inner(project: Project, applied_rules: Sequence):
from sentry.ingest.transaction_clusterer import rules as clusterer_rules

applied_rules = event_data.get("_meta", {}).get("transaction", {}).get("", {}).get("rem", {})
if not applied_rules:
return

Expand Down
61 changes: 61 additions & 0 deletions tests/sentry/ingest/test_transaction_clusterer.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from unittest import mock

import orjson
import pytest
from sentry_conventions.attributes import ATTRIBUTE_NAMES

Expand Down Expand Up @@ -554,6 +555,66 @@ def test_transaction_clusterer_bumps_rules(_, default_organization) -> None:
assert get_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 2}


@mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 10)
@mock.patch("sentry.ingest.transaction_clusterer.tasks.MERGE_THRESHOLD", 5)
@mock.patch(
"sentry.ingest.transaction_clusterer.tasks.cluster_projects.delay",
wraps=cluster_projects, # call immediately
)
@django_db_all
def test_segment_clusterer_bumps_rules(_, default_organization) -> None:
project1 = Project(id=123, name="project1", organization_id=default_organization.id)
project1.save()

with override_options({"txnames.bump-lifetime-sample-rate": 1.0}):
for i in range(10):
_record_sample(
ClustererNamespace.TRANSACTIONS, project1, f"/user/tx-{project1.name}-{i}/settings"
)

with mock.patch("sentry.ingest.transaction_clusterer.rules._now", lambda: 1):
spawn_clusterers()

assert get_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 1}

with mock.patch("sentry.ingest.transaction_clusterer.rules._now", lambda: 2):
record_segment_name(
project1,
{
"name": "/user/*/settings",
"attributes": {
ATTRIBUTE_NAMES.SENTRY_SPAN_SOURCE: {
"type": "string",
"value": "sanitized",
},
f"sentry._meta.fields.attributes.{ATTRIBUTE_NAMES.SENTRY_SEGMENT_NAME}": {
"type": "string",
"value": orjson.dumps(
{
"meta": {
"": {
"rem": [["int", "s", 0, 0], ["/user/*/**", "s"]],
"val": "/user/tx-project1-pi/settings",
}
}
}
).decode(),
},
},
}, # type: ignore[typeddict-item]
)

# _get_rules fetches from project options, which arent updated yet.
assert get_redis_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 2}
assert get_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 1}
# Update rules to update the project option storage.
with mock.patch("sentry.ingest.transaction_clusterer.rules._now", lambda: 3):
assert 0 == update_rules(ClustererNamespace.TRANSACTIONS, project1, [])
# After project options are updated, the last_seen should also be updated.
assert get_redis_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 2}
assert get_rules(ClustererNamespace.TRANSACTIONS, project1) == {"/user/*/**": 2}


@mock.patch("sentry.ingest.transaction_clusterer.datasource.redis.MAX_SET_SIZE", 3)
@mock.patch("sentry.ingest.transaction_clusterer.tasks.MERGE_THRESHOLD", 2)
@mock.patch(
Expand Down
Loading