diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py index 8970e3685b7b6f..36e64a9a208f4f 100644 --- a/src/sentry/options/defaults.py +++ b/src/sentry/options/defaults.py @@ -2855,3 +2855,10 @@ type=Bool, flags=FLAG_AUTOMATOR_MODIFIABLE, ) + +# option for sample size when fetching project tag keys +register( + "visibility.tag-key-sample-size", + default=1_000_000, + flags=FLAG_AUTOMATOR_MODIFIABLE, +) diff --git a/src/sentry/tagstore/snuba/backend.py b/src/sentry/tagstore/snuba/backend.py index bf9922b42b5b94..0f13ae3272a148 100644 --- a/src/sentry/tagstore/snuba/backend.py +++ b/src/sentry/tagstore/snuba/backend.py @@ -11,10 +11,11 @@ from sentry_relay.consts import SPAN_STATUS_CODE_TO_NAME from snuba_sdk import Column, Condition, Direction, Entity, Function, Op, OrderBy, Query, Request -from sentry import analytics +from sentry import analytics, features, options from sentry.api.utils import default_start_end_dates from sentry.issues.grouptype import GroupCategory from sentry.models.group import Group +from sentry.models.organization import Organization from sentry.models.project import Project from sentry.models.release import Release from sentry.models.releaseenvironment import ReleaseEnvironment @@ -433,6 +434,15 @@ def get_tag_keys_for_projects( max_unsampled_projects = _max_unsampled_projects # We want to disable FINAL in the snuba query to reduce load. optimize_kwargs = {"turbo": True} + + # Add static sample amount to the query. Turbo will sample at 10% by + # default, but organizations with many events still get timeouts. A + # static sample creates more consistent performance. + organization_id = get_organization_id_from_project_ids(projects) + organization = Organization.objects.get_from_cache(id=organization_id) + if features.has("organizations:tag-key-sample-n", organization): + optimize_kwargs["sample"] = options.get("visibility.tag-key-sample-size") + # If we are fetching less than max_unsampled_projects, then disable # the sampling that turbo enables so that we get more accurate results. # We only want sampling when we have a large number of projects, so