Skip to content

Commit 65bad06

Browse files
yuvmenandrewshie-sentry
authored andcommitted
feat(sampling) - Changed times_seen increments on groups to take error sample rate into account (#93003)
As part of error extrapolation/upsampling we need to now make sure the times_seen on groups is calculated correctly on projects where events have a sample rate. Now in event manager for projects in the allowlist we increment times_seen not be 1 but by the inverse of sample_rate, both when creating a new group and when saving a new event.
1 parent 0bb6a44 commit 65bad06

File tree

2 files changed

+228
-2
lines changed

2 files changed

+228
-2
lines changed

src/sentry/event_manager.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -749,7 +749,7 @@ def _derive_client_error_sampling_rate(jobs: Sequence[Job], projects: ProjectsMa
749749
)
750750

751751
if client_sample_rate is not None and isinstance(client_sample_rate, (int, float)):
752-
if 0 <= client_sample_rate <= 1:
752+
if 0 < client_sample_rate <= 1:
753753
job["data"]["sample_rate"] = client_sample_rate
754754
else:
755755
logger.warning(
@@ -1471,6 +1471,13 @@ def _create_group(
14711471
group_data["metadata"]["initial_priority"] = priority
14721472
group_creation_kwargs["data"] = group_data
14731473

1474+
# Set initial times_seen
1475+
group_creation_kwargs["times_seen"] = 1
1476+
1477+
# If the project is in the allowlist, use the client sample rate to weight the times_seen
1478+
if project.id in options.get("issues.client_error_sampling.project_allowlist"):
1479+
group_creation_kwargs["times_seen"] = _get_error_weighted_times_seen(event)
1480+
14741481
try:
14751482
with transaction.atomic(router.db_for_write(Group)):
14761483
# This is the 99.999% path. The rest of the function is all to handle a very rare and
@@ -1510,6 +1517,14 @@ def _create_group(
15101517
return group
15111518

15121519

1520+
def _get_error_weighted_times_seen(event: BaseEvent) -> int:
1521+
if event.get_event_type() in ("error", "default"):
1522+
error_sample_rate = event.data.get("sample_rate")
1523+
if error_sample_rate is not None and error_sample_rate > 0:
1524+
return int(1 / error_sample_rate)
1525+
return 1
1526+
1527+
15131528
def _is_stuck_counter_error(err: Exception, project: Project, short_id: int) -> bool:
15141529
"""Decide if this is `UniqueViolation` error on the `Group` table's project and short id values."""
15151530

@@ -1821,7 +1836,11 @@ def _process_existing_aggregate(
18211836

18221837
# We pass `times_seen` separately from all of the other columns so that `buffer_inr` knows to
18231838
# increment rather than overwrite the existing value
1824-
buffer_incr(Group, {"times_seen": 1}, {"id": group.id}, updated_group_values)
1839+
times_seen = 1
1840+
if group.project.id in options.get("issues.client_error_sampling.project_allowlist"):
1841+
times_seen = _get_error_weighted_times_seen(event)
1842+
1843+
buffer_incr(Group, {"times_seen": times_seen}, {"id": group.id}, updated_group_values)
18251844

18261845
return bool(is_regression)
18271846

tests/sentry/event_manager/test_event_manager.py

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2935,6 +2935,213 @@ def test_derive_client_error_sampling_rate_invalid_range(self) -> None:
29352935
# Check that sample_rate was not set due to invalid range
29362936
assert "sample_rate" not in event.data
29372937

2938+
def test_times_seen_new_group_default_behavior(self) -> None:
2939+
"""Test that new groups start with times_seen=1 when no sample rate is provided"""
2940+
manager = EventManager(make_event(message="test message"))
2941+
manager.normalize()
2942+
2943+
with self.tasks():
2944+
event = manager.save(self.project.id)
2945+
2946+
group = event.group
2947+
assert group is not None
2948+
assert group.times_seen == 1
2949+
2950+
def test_times_seen_existing_group_increment(self) -> None:
2951+
"""Test that existing groups have their times_seen incremented"""
2952+
# Create first event to establish the group
2953+
manager1 = EventManager(make_event(message="test message", fingerprint=["group1"]))
2954+
manager1.normalize()
2955+
2956+
with self.tasks():
2957+
event1 = manager1.save(self.project.id)
2958+
2959+
group = event1.group
2960+
assert group is not None
2961+
initial_times_seen = group.times_seen
2962+
assert initial_times_seen == 1
2963+
2964+
# Create second event for the same group
2965+
manager2 = EventManager(make_event(message="test message 2", fingerprint=["group1"]))
2966+
manager2.normalize()
2967+
2968+
with self.tasks():
2969+
event2 = manager2.save(self.project.id)
2970+
2971+
# Should be the same group
2972+
assert event2.group_id == event1.group_id
2973+
2974+
# Refresh group from database to get updated times_seen
2975+
group.refresh_from_db()
2976+
assert group.times_seen == initial_times_seen + 1
2977+
2978+
def test_times_seen_weighted_with_sample_rate_option_enabled(self) -> None:
2979+
"""Test that times_seen is weighted by 1/sample_rate when the project is in the allowlist"""
2980+
2981+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
2982+
# Create event with a sample rate of 0.5 (50%)
2983+
event_data = make_event(
2984+
message="sampled event", contexts={"error_sampling": {"client_sample_rate": 0.5}}
2985+
)
2986+
2987+
manager = EventManager(event_data)
2988+
manager.normalize()
2989+
2990+
with self.tasks():
2991+
event = manager.save(self.project.id)
2992+
2993+
group = event.group
2994+
assert group is not None
2995+
# With sample rate 0.5, times_seen should be 1/0.5 = 2
2996+
assert group.times_seen == 2
2997+
2998+
def test_times_seen_weighted_with_sample_rate_option_disabled(self) -> None:
2999+
"""Test that times_seen is not weighted when the project is not in the allowlist"""
3000+
3001+
# Create event with a sample rate of 0.5 (50%) but project not in allowlist
3002+
event_data = make_event(
3003+
message="sampled event", contexts={"error_sampling": {"client_sample_rate": 0.5}}
3004+
)
3005+
3006+
manager = EventManager(event_data)
3007+
manager.normalize()
3008+
3009+
with self.tasks():
3010+
event = manager.save(self.project.id)
3011+
3012+
group = event.group
3013+
assert group is not None
3014+
# With the project not in allowlist, times_seen should remain 1 regardless of sample rate
3015+
assert group.times_seen == 1
3016+
3017+
def test_times_seen_weighted_existing_group_with_sample_rate(self) -> None:
3018+
"""Test that existing groups are incremented by weighted amount when project is in allowlist"""
3019+
3020+
# Create first event to establish the group
3021+
manager1 = EventManager(make_event(message="test message", fingerprint=["group1"]))
3022+
manager1.normalize()
3023+
3024+
with self.tasks():
3025+
event1 = manager1.save(self.project.id)
3026+
3027+
group = event1.group
3028+
assert group is not None
3029+
initial_times_seen = group.times_seen
3030+
assert initial_times_seen == 1
3031+
3032+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
3033+
# Create second event for the same group with sample rate 0.25 (25%)
3034+
event_data = make_event(
3035+
message="test message 2",
3036+
fingerprint=["group1"],
3037+
contexts={"error_sampling": {"client_sample_rate": 0.25}},
3038+
)
3039+
3040+
manager2 = EventManager(event_data)
3041+
manager2.normalize()
3042+
3043+
with self.tasks():
3044+
event2 = manager2.save(self.project.id)
3045+
3046+
# Should be the same group
3047+
assert event2.group_id == event1.group_id
3048+
3049+
# Refresh group from database to get updated times_seen
3050+
group.refresh_from_db()
3051+
# Should be incremented by 1/0.25 = 4
3052+
assert group.times_seen == initial_times_seen + 4
3053+
3054+
def test_times_seen_no_sample_rate_meta(self) -> None:
3055+
"""Test that times_seen defaults to 1 when no sample rate meta exists"""
3056+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
3057+
# Create event with no error_sampling context
3058+
manager = EventManager(make_event(fingerprint=["no_context"]))
3059+
manager.normalize()
3060+
3061+
with self.tasks():
3062+
event = manager.save(self.project.id)
3063+
assert event.group is not None
3064+
assert event.group.times_seen == 1
3065+
3066+
# Create event with empty error_sampling context
3067+
manager = EventManager(
3068+
make_event(fingerprint=["empty_context"], contexts={"error_sampling": {}})
3069+
)
3070+
manager.normalize()
3071+
3072+
with self.tasks():
3073+
event = manager.save(self.project.id)
3074+
assert event.group is not None
3075+
assert event.group.times_seen == 1
3076+
3077+
# Create event with null client_sample_rate
3078+
manager = EventManager(
3079+
make_event(
3080+
fingerprint=["null_client_sample_rate"],
3081+
contexts={"error_sampling": {"client_sample_rate": None}},
3082+
)
3083+
)
3084+
manager.normalize()
3085+
3086+
with self.tasks():
3087+
event = manager.save(self.project.id)
3088+
assert event.group is not None
3089+
assert event.group.times_seen == 1
3090+
3091+
def test_times_seen_invalid_sample_rate(self) -> None:
3092+
"""Test times_seen calculation with invalid sample rates (null, 0, negative, > 1)"""
3093+
with self.options({"issues.client_error_sampling.project_allowlist": [self.project.id]}):
3094+
# Test null sample rate
3095+
manager = EventManager(make_event(fingerprint=["null_sample_rate"]))
3096+
manager.normalize()
3097+
3098+
with self.tasks():
3099+
event = manager.save(self.project.id)
3100+
assert event.group is not None
3101+
assert event.group.times_seen == 1
3102+
3103+
# Test sample rate of 0 (should result in times_seen = 1)
3104+
manager = EventManager(
3105+
make_event(
3106+
fingerprint=["zero_sample_rate"],
3107+
contexts={"error_sampling": {"client_sample_rate": 0}},
3108+
)
3109+
)
3110+
manager.normalize()
3111+
3112+
with self.tasks():
3113+
event = manager.save(self.project.id)
3114+
assert event.group is not None
3115+
assert event.group.times_seen == 1
3116+
3117+
# Test negative sample rate (should result in times_seen = 1)
3118+
manager = EventManager(
3119+
make_event(
3120+
fingerprint=["negative_sample_rate"],
3121+
contexts={"error_sampling": {"client_sample_rate": -0.5}},
3122+
)
3123+
)
3124+
manager.normalize()
3125+
3126+
with self.tasks():
3127+
event = manager.save(self.project.id)
3128+
assert event.group is not None
3129+
assert event.group.times_seen == 1
3130+
3131+
# Test sample rate > 1 (should result in times_seen = 1)
3132+
manager = EventManager(
3133+
make_event(
3134+
fingerprint=["high_sample_rate"],
3135+
contexts={"error_sampling": {"client_sample_rate": 1.5}},
3136+
)
3137+
)
3138+
manager.normalize()
3139+
3140+
with self.tasks():
3141+
event = manager.save(self.project.id)
3142+
assert event.group is not None
3143+
assert event.group.times_seen == 1
3144+
29383145

29393146
class ReleaseIssueTest(TestCase):
29403147
def setUp(self) -> None:

0 commit comments

Comments
 (0)