getsentry · gggritso · Nov 8, 2024 · Nov 6, 2024 · Nov 7, 2024 · Nov 7, 2024
diff --git a/src/sentry/options/defaults.py b/src/sentry/options/defaults.py
@@ -2855,3 +2855,10 @@
     type=Bool,
     flags=FLAG_AUTOMATOR_MODIFIABLE,
 )
+
+# option for sample size when fetching project tag keys
+register(
+    "visibility.tag-key-sample-size",
+    default=1_000_000,
+    flags=FLAG_AUTOMATOR_MODIFIABLE,
+)
@@ -11,10 +11,11 @@
 from sentry_relay.consts import SPAN_STATUS_CODE_TO_NAME
 from snuba_sdk import Column, Condition, Direction, Entity, Function, Op, OrderBy, Query, Request
 
-from sentry import analytics
+from sentry import analytics, features, options
 from sentry.api.utils import default_start_end_dates
 from sentry.issues.grouptype import GroupCategory
 from sentry.models.group import Group
+from sentry.models.organization import Organization
 from sentry.models.project import Project
 from sentry.models.release import Release
 from sentry.models.releaseenvironment import ReleaseEnvironment
@@ -433,6 +434,15 @@ def get_tag_keys_for_projects(
         max_unsampled_projects = _max_unsampled_projects
         # We want to disable FINAL in the snuba query to reduce load.
         optimize_kwargs = {"turbo": True}
+
+        # Add static sample amount to the query. Turbo will sample at 10% by
+        # default, but organizations with many events still get timeouts. A
+        # static sample creates more consistent performance.
+        organization_id = get_organization_id_from_project_ids(projects)
+        organization = Organization.objects.get_from_cache(id=organization_id)
+        if features.has("organizations:tag-key-sample-n", organization):
+            optimize_kwargs["sample"] = options.get("visibility.tag-key-sample-size")
+
         # If we are fetching less than max_unsampled_projects, then disable
         # the sampling that turbo enables so that we get more accurate results.
         # We only want sampling when we have a large number of projects, so