From 52fc380a9493e05dca1ded7c91f6b4cc52feec01 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Wed, 6 Nov 2024 15:01:18 -0500 Subject: [PATCH 1/3] Add N sampling --- src/sentry/tagstore/snuba/backend.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index bf9922b42b5b94..ef35ed2915ca1a 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -11,10 +11,11 @@ from sentry_relay.consts import SPAN_STATUS_CODE_TO_NAME from snuba_sdk import Column, Condition, Direction, Entity, Function, Op, OrderBy, Query, Request -from sentry import analytics +from sentry import analytics, features from sentry.api.utils import default_start_end_dates from sentry.issues.grouptype import GroupCategory from sentry.models.group import Group +from sentry.models.organization import Organization from sentry.models.project import Project from sentry.models.release import Release from sentry.models.releaseenvironment import ReleaseEnvironment @@ -433,6 +434,15 @@ def get_tag_keys_for_projects( max_unsampled_projects = _max_unsampled_projects # We want to disable FINAL in the snuba query to reduce load. optimize_kwargs = {"turbo": True} + + # Add static sample amount to the query. Turbo will sample at 10% by + # default, but organizations with many events still get timeouts. A + # static sample creates more consistent performance. + organization_id = get_organization_id_from_project_ids(projects) + organization = Organization.objects.get_from_cache(id=organization_id) + if features.has("organizations:tag-key-sample-n", organization): + optimize_kwargs["sample"] = 1_000_000 + # If we are fetching less than max_unsampled_projects, then disable # the sampling that turbo enables so that we get more accurate results. # We only want sampling when we have a large number of projects, so From 38e3e6d9ef6f09c322a233945b06ff4a7de43ef9 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:28:41 -0500 Subject: [PATCH 2/3] Register option for N value --- src/sentry/options/defaults.py | 6 ++++++ src/sentry/tagstore/snuba/backend.py | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 25b6c81b80c5a3..6eac8160297367 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -2848,3 +2848,9 @@ default=[], flags=FLAG_AUTOMATOR_MODIFIABLE, ) + +register( + "visibility.tag-key-sample-size", + default=1_000_000, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index ef35ed2915ca1a..0f13ae3272a148 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -11,7 +11,7 @@ from sentry_relay.consts import SPAN_STATUS_CODE_TO_NAME from snuba_sdk import Column, Condition, Direction, Entity, Function, Op, OrderBy, Query, Request -from sentry import analytics, features +from sentry import analytics, features, options from sentry.api.utils import default_start_end_dates from sentry.issues.grouptype import GroupCategory from sentry.models.group import Group @@ -441,7 +441,7 @@ def get_tag_keys_for_projects( organization_id = get_organization_id_from_project_ids(projects) organization = Organization.objects.get_from_cache(id=organization_id) if features.has("organizations:tag-key-sample-n", organization): - optimize_kwargs["sample"] = 1_000_000 + optimize_kwargs["sample"] = options.get("visibility.tag-key-sample-size") # If we are fetching less than max_unsampled_projects, then disable # the sampling that turbo enables so that we get more accurate results. From 12f13f04d83c6f2f3b51c29f88d93f0313d7f378 Mon Sep 17 00:00:00 2001 From: George Gritsouk <989898+gggritso@users.noreply.github.com> Date: Thu, 7 Nov 2024 13:54:18 -0500 Subject: [PATCH 3/3] Add comment --- src/sentry/options/defaults.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index dfdf542ce538e3..36e64a9a208f4f 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -2856,6 +2856,7 @@ flags=FLAG_AUTOMATOR_MODIFIABLE, ) +# option for sample size when fetching project tag keys register( "visibility.tag-key-sample-size", default=1_000_000,