From 843cb66264d0892d7b324bf2e038f99e88bf00ff Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Wed, 27 May 2026 17:16:48 -0700
Subject: [PATCH 1/4] feat(allocation-policy): Add per-org overrides to
 BytesScannedRejectingPolicy

Add two new Configuration entries on BytesScannedRejectingPolicy:

- organization_referrer_scan_limit_override, keyed by
  (organization_id, referrer)
- organization_scan_limit_override, keyed by organization_id

Previously the only way to override the scan limit for the
organization branch was the per-referrer override that applied to
every organization, which made it impossible to tune the limit for a
single noisy org without affecting everyone else.

Overrides on the organization branch are now resolved in order of
specificity, with the first one set winning:

  (organization_id, referrer)
    > organization_id
    > (all orgs, referrer)
    > default

The project branch and cross-org behavior are unchanged.

Also fix pre-existing lint/mypy issues in the same files that the
pre-commit hook surfaces once the files are touched (E712 truthiness
asserts, untyped tenant_ids dicts, ResourceIdentifier arg type, and a
misplaced type: ignore exposed by ruff reformatting).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Agent transcript: https://claudescope.sentry.dev/share/xXAC6_vLoFmdoaI4mlEdxiC7CUhp2mohJV2MuJoaNPU
---
 .../bytes_scanned_rejecting_policy.py         |  96 ++++++++------
 .../test_bytes_scanned_rejecting_policy.py    | 120 ++++++++++++++----
 2 files changed, 147 insertions(+), 69 deletions(-)

diff --git a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
index 11a5ecc94c4..708539450af 100644
--- a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
+++ b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
@@ -38,9 +38,7 @@
 
 
 UNREASONABLY_LARGE_NUMBER_OF_BYTES_SCANNED_PER_QUERY = int(1e12)
-_RATE_LIMITER = RedisSlidingWindowRateLimiter(
-    get_redis_client(RedisClientKey.RATE_LIMITER)
-)
+_RATE_LIMITER = RedisSlidingWindowRateLimiter(get_redis_client(RedisClientKey.RATE_LIMITER))
 DEFAULT_OVERRIDE_LIMIT = -1
 PETABYTE = 10**12
 DEFAULT_BYTES_SCANNED_LIMIT = int(1.28 * PETABYTE)
@@ -64,32 +62,47 @@ class BytesScannedRejectingPolicy(AllocationPolicy):
     WINDOW_GRANULARITY_SECONDS = 60
 
     def _additional_config_definitions(self) -> list[Configuration]:
-        # Overrides are prioritized in order of specificity.
-        # If two overrides applicable available to the request, the one with a smaller value takes precedence
+        # Overrides are checked in order of specificity; the first one set wins.
+        # For organization_id queries:
+        #   (organization_id, referrer) > organization_id > (all orgs, referrer) > default
         return [
             Configuration(
                 "referrer_all_projects_scan_limit_override",
-                f"Specific referrer scan limit in the last {self.WINDOW_SECONDS/ 60} mins, APPLIES TO ALL PROJECTS",
+                f"Specific referrer scan limit in the last {self.WINDOW_SECONDS / 60} mins, APPLIES TO ALL PROJECTS",
                 int,
                 DEFAULT_OVERRIDE_LIMIT,
                 param_types={"referrer": str},
             ),
             Configuration(
                 "referrer_all_organizations_scan_limit_override",
-                f"Specific referrer scan limit in the last {self.WINDOW_SECONDS/ 60} mins, APPLIES TO ALL ORGANIZATIONS",
+                f"Specific referrer scan limit in the last {self.WINDOW_SECONDS / 60} mins, APPLIES TO ALL ORGANIZATIONS",
                 int,
                 DEFAULT_OVERRIDE_LIMIT,
                 param_types={"referrer": str},
             ),
+            Configuration(
+                "organization_referrer_scan_limit_override",
+                f"Specific (organization_id, referrer) scan limit in the last {self.WINDOW_SECONDS / 60} mins",
+                int,
+                DEFAULT_OVERRIDE_LIMIT,
+                param_types={"organization_id": int, "referrer": str},
+            ),
+            Configuration(
+                "organization_scan_limit_override",
+                f"Scan limit for a specific organization_id across any referrer in the last {self.WINDOW_SECONDS / 60} mins",
+                int,
+                DEFAULT_OVERRIDE_LIMIT,
+                param_types={"organization_id": int},
+            ),
             Configuration(
                 "project_referrer_scan_limit",
-                f"DEFAULT: how many bytes can a project scan per referrer in the last {self.WINDOW_SECONDS/ 60} mins before queries start getting rejected",
+                f"DEFAULT: how many bytes can a project scan per referrer in the last {self.WINDOW_SECONDS / 60} mins before queries start getting rejected",
                 int,
                 DEFAULT_BYTES_SCANNED_LIMIT,
             ),
             Configuration(
                 "organization_referrer_scan_limit",
-                f"DEFAULT: how many bytes can an organization scan per referrer in the last {self.WINDOW_SECONDS/ 60} mins before queries start getting rejected. Cross-project queries are limited by organization_id",
+                f"DEFAULT: how many bytes can an organization scan per referrer in the last {self.WINDOW_SECONDS / 60} mins before queries start getting rejected. Cross-project queries are limited by organization_id",
                 int,
                 DEFAULT_BYTES_SCANNED_LIMIT * 2,
             ),
@@ -125,9 +138,7 @@ def _additional_config_definitions(self) -> list[Configuration]:
             ),
         ]
 
-    def _are_tenant_ids_valid(
-        self, tenant_ids: dict[str, str | int]
-    ) -> tuple[bool, str]:
+    def _are_tenant_ids_valid(self, tenant_ids: dict[str, str | int]) -> tuple[bool, str]:
         if self.is_cross_org_query(tenant_ids):
             return True, "cross org query"
         if tenant_ids.get("referrer") is None:
@@ -162,12 +173,24 @@ def __get_scan_limit(
                 return int(self.get_config_value("project_referrer_scan_limit"))
             return int(override)
         elif customer_tenant_key == "organization_id":
-            override = self.get_config_value(
+            org_referrer_override = self.get_config_value(
+                "organization_referrer_scan_limit_override",
+                {"organization_id": customer_tenant_value, "referrer": referrer},
+            )
+            if org_referrer_override != DEFAULT_OVERRIDE_LIMIT:
+                return int(org_referrer_override)
+            org_override = self.get_config_value(
+                "organization_scan_limit_override",
+                {"organization_id": customer_tenant_value},
+            )
+            if org_override != DEFAULT_OVERRIDE_LIMIT:
+                return int(org_override)
+            all_orgs_referrer_override = self.get_config_value(
                 "referrer_all_organizations_scan_limit_override", {"referrer": referrer}
             )
-            if override == DEFAULT_OVERRIDE_LIMIT:
-                return int(self.get_config_value("organization_referrer_scan_limit"))
-            return int(override)
+            if all_orgs_referrer_override != DEFAULT_OVERRIDE_LIMIT:
+                return int(all_orgs_referrer_override)
+            return int(self.get_config_value("organization_referrer_scan_limit"))
         raise InvalidTenantsForAllocationPolicy.from_args(
             {customer_tenant_key: customer_tenant_value, "referrer": referrer},
             self.__class__.__name__,
@@ -212,9 +235,7 @@ def _get_quota_allowance(
                 suggestion=PASS_THROUGH_REFERRERS_SUGGESTION,
             )
 
-        scan_limit = self.__get_scan_limit(
-            customer_tenant_key, customer_tenant_value, referrer
-        )
+        scan_limit = self.__get_scan_limit(customer_tenant_key, customer_tenant_value, referrer)
         throttle_threshold = max(
             1, int(scan_limit // self.get_config_value("bytes_throttle_divider"))
         )
@@ -244,8 +265,7 @@ def _get_quota_allowance(
         if granted_quota.granted <= 0:
             if self.get_config_value("limit_bytes_instead_of_rejecting"):
                 max_bytes_to_read = int(
-                    scan_limit
-                    / self.get_config_value("max_bytes_to_read_scan_limit_divider")
+                    scan_limit / self.get_config_value("max_bytes_to_read_scan_limit_divider")
                 )
                 explanation[
                     "reason"
@@ -259,16 +279,13 @@ def _get_quota_allowance(
 
                 self.metrics.increment(
                     "bytes_scanned_limited",
-                    tags={
-                        "tenant": f"{customer_tenant_key}__{customer_tenant_value}__{referrer}"
-                    },
+                    tags={"tenant": f"{customer_tenant_key}__{customer_tenant_value}__{referrer}"},
                 )
                 return QuotaAllowance(
                     can_run=True,
                     max_threads=max(
                         1,
-                        self.max_threads
-                        // self.get_config_value("threads_throttle_divider"),
+                        self.max_threads // self.get_config_value("threads_throttle_divider"),
                     ),
                     max_bytes_to_read=max_bytes_to_read,
                     explanation=explanation,
@@ -293,9 +310,7 @@ def _get_quota_allowance(
 
                 self.metrics.increment(
                     "bytes_scanned_rejection",
-                    tags={
-                        "tenant": f"{customer_tenant_key}__{customer_tenant_value}__{referrer}"
-                    },
+                    tags={"tenant": f"{customer_tenant_key}__{customer_tenant_value}__{referrer}"},
                 )
                 return QuotaAllowance(
                     can_run=False,
@@ -319,8 +334,7 @@ def _get_quota_allowance(
                 can_run=True,
                 max_threads=max(
                     1,
-                    self.max_threads
-                    // self.get_config_value("threads_throttle_divider"),
+                    self.max_threads // self.get_config_value("threads_throttle_divider"),
                 ),
                 explanation={"reason": "within_limit but throttled"},
                 is_throttled=True,
@@ -349,17 +363,17 @@ def _get_bytes_scanned_in_query(
         if result_or_error.error:
             if (
                 isinstance(result_or_error.error.__cause__, ClickhouseError)
-                and result_or_error.error.__cause__.code
-                == errors.ErrorCodes.TIMEOUT_EXCEEDED
+                and result_or_error.error.__cause__.code == errors.ErrorCodes.TIMEOUT_EXCEEDED
             ):
-                return int(
-                    self.get_config_value(
-                        "clickhouse_timeout_bytes_scanned_penalization"
-                    )
-                )
+                return int(self.get_config_value("clickhouse_timeout_bytes_scanned_penalization"))
             else:
                 return 0
-        progress_bytes_scanned = cast(int, result_or_error.query_result.result.get("profile", {}).get("progress_bytes", None))  # type: ignore
+        progress_bytes_scanned = cast(
+            int,
+            result_or_error.query_result.result.get("profile", {}).get(  # type: ignore[union-attr]
+                "progress_bytes", None
+            ),
+        )
         if isinstance(progress_bytes_scanned, (int, float)):
             self.metrics.increment(
                 "progress_bytes_scanned",
@@ -388,9 +402,7 @@ def _update_quota_balance(
             customer_tenant_key,
             customer_tenant_value,
         ) = self._get_customer_tenant_key_and_value(tenant_ids)
-        scan_limit = self.__get_scan_limit(
-            customer_tenant_key, customer_tenant_value, referrer
-        )
+        scan_limit = self.__get_scan_limit(customer_tenant_key, customer_tenant_value, referrer)
         # we can assume that the requested quota was granted (because it was)
         # we just need to update the quota with however many bytes were consumed
         _RATE_LIMITER.use_quotas(
diff --git a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
index 3f10ee1d252..fca8a084bf0 100644
--- a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
+++ b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
@@ -4,6 +4,7 @@
 from clickhouse_driver import errors
 
 from snuba.clickhouse.errors import ClickhouseError
+from snuba.configs.configuration import ResourceIdentifier
 from snuba.datasets.storages.storage_key import StorageKey
 from snuba.query.allocation_policies import AllocationPolicy, QueryResultOrError
 from snuba.query.allocation_policies.bytes_scanned_rejecting_policy import (
@@ -22,7 +23,7 @@
 @pytest.fixture(scope="function")
 def policy() -> AllocationPolicy:
     policy = BytesScannedRejectingPolicy(
-        storage_key=StorageKey("errors"),
+        storage_key=ResourceIdentifier(StorageKey("errors")),
         required_tenant_types=["referrer", "organization_id", "project_id"],
         default_config_overrides={},
     )
@@ -34,9 +35,7 @@ def _configure_policy(policy: AllocationPolicy) -> None:
     policy.set_config_value("is_enforced", 1)
     policy.set_config_value("max_threads", MAX_THREAD_NUMBER)
     policy.set_config_value("project_referrer_scan_limit", PROJECT_REFERRER_SCAN_LIMIT)
-    policy.set_config_value(
-        "organization_referrer_scan_limit", ORGANIZATION_REFERRER_SCAN_LIMIT
-    )
+    policy.set_config_value("organization_referrer_scan_limit", ORGANIZATION_REFERRER_SCAN_LIMIT)
 
 
 @pytest.mark.parametrize(
@@ -68,7 +67,7 @@ def _configure_policy(policy: AllocationPolicy) -> None:
 @pytest.mark.redis_db
 def test_consume_quota(
     policy: BytesScannedRejectingPolicy,
-    tenant_ids: dict,
+    tenant_ids: dict[str, str | int],
     bytes_to_scan: int,
     reason: str,
     limit: int,
@@ -97,7 +96,10 @@ def test_consume_quota(
     }.items() <= allowance.explanation.items()
 
     assert reason in str(allowance.explanation["reason"])
-    new_tenant_ids = {**tenant_ids, "referrer": tenant_ids["referrer"] + "abcd"}
+    new_tenant_ids: dict[str, str | int] = {
+        **tenant_ids,
+        "referrer": str(tenant_ids["referrer"]) + "abcd",
+    }
 
     # a different referrer should work fine though
     allowance = policy.get_quota_allowance(
@@ -122,11 +124,7 @@ def test_cross_org_query(policy: BytesScannedRejectingPolicy) -> None:
         QUERY_ID,
         QueryResultOrError(
             query_result=QueryResult(
-                result={
-                    "profile": {
-                        "progress_bytes": ORGANIZATION_REFERRER_SCAN_LIMIT * 100
-                    }
-                },
+                result={"profile": {"progress_bytes": ORGANIZATION_REFERRER_SCAN_LIMIT * 100}},
                 extra={"stats": {}, "sql": "", "experiments": {}},
             ),
             error=None,
@@ -181,6 +179,30 @@ def test_invalid_tenants(
             ),
             id="use overridden scan limit",
         ),
+        pytest.param(
+            {
+                "organization_id": 123,
+                "referrer": "some_referrer",
+            },
+            (
+                "organization_referrer_scan_limit_override",
+                50,
+                {"organization_id": 123, "referrer": "some_referrer"},
+            ),
+            id="per (org, referrer) override",
+        ),
+        pytest.param(
+            {
+                "organization_id": 123,
+                "referrer": "some_referrer",
+            },
+            (
+                "organization_scan_limit_override",
+                75,
+                {"organization_id": 123},
+            ),
+            id="per-org override",
+        ),
     ],
 )
 @pytest.mark.redis_db
@@ -210,6 +232,58 @@ def test_overrides(
     assert allowance.explanation["limit"] == limit
 
 
+@pytest.mark.redis_db
+def test_org_override_precedence(policy: BytesScannedRejectingPolicy) -> None:
+    """(org_id, referrer) > org_id > (all orgs, referrer) > default."""
+    _configure_policy(policy)
+    tenant_ids: dict[str, str | int] = {
+        "organization_id": 123,
+        "referrer": "some_referrer",
+    }
+    policy.set_config_value(
+        "referrer_all_organizations_scan_limit_override",
+        1000,
+        {"referrer": "some_referrer"},
+    )
+    policy.set_config_value(
+        "organization_scan_limit_override",
+        500,
+        {"organization_id": 123},
+    )
+    policy.set_config_value(
+        "organization_referrer_scan_limit_override",
+        100,
+        {"organization_id": 123, "referrer": "some_referrer"},
+    )
+
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert allowance.can_run
+    assert allowance.rejection_threshold == 100
+
+    policy.update_quota_balance(
+        tenant_ids,
+        QUERY_ID,
+        QueryResultOrError(
+            query_result=QueryResult(
+                result={"profile": {"progress_bytes": 100}},
+                extra={"stats": {}, "sql": "", "experiments": {}},
+            ),
+            error=None,
+        ),
+    )
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert not allowance.can_run
+    assert allowance.explanation["limit"] == 100
+
+    # different org still uses the all-orgs referrer override
+    other_org_tenant: dict[str, str | int] = {
+        "organization_id": 456,
+        "referrer": "some_referrer",
+    }
+    allowance = policy.get_quota_allowance(other_org_tenant, QUERY_ID)
+    assert allowance.rejection_threshold == 1000
+
+
 @pytest.mark.redis_db
 def test_penalize_timeout(policy: BytesScannedRejectingPolicy) -> None:
     _configure_policy(policy)
@@ -226,21 +300,15 @@ def test_penalize_timeout(policy: BytesScannedRejectingPolicy) -> None:
 
     assert allowance.can_run
     # regular query exception is thrown, should not affect quota
-    policy.update_quota_balance(
-        tenant_ids, QUERY_ID, QueryResultOrError(None, QueryException())
-    )
+    policy.update_quota_balance(tenant_ids, QUERY_ID, QueryResultOrError(None, QueryException()))
     allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
     assert allowance.can_run
 
     # timneout exception is thrown, the penalization is greater than the quota, therefore
     # next query should be rejected
     timeout_exception = QueryException()
-    timeout_exception.__cause__ = ClickhouseError(
-        code=errors.ErrorCodes.TIMEOUT_EXCEEDED
-    )
-    policy.update_quota_balance(
-        tenant_ids, QUERY_ID, QueryResultOrError(None, timeout_exception)
-    )
+    timeout_exception.__cause__ = ClickhouseError(code=errors.ErrorCodes.TIMEOUT_EXCEEDED)
+    policy.update_quota_balance(tenant_ids, QUERY_ID, QueryResultOrError(None, timeout_exception))
 
     allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
     assert not allowance.can_run and allowance.max_threads == 0
@@ -250,7 +318,7 @@ def test_penalize_timeout(policy: BytesScannedRejectingPolicy) -> None:
 def test_does_not_throttle_and_then_throttles(
     policy: BytesScannedRejectingPolicy,
 ) -> None:
-    tenant_ids = {
+    tenant_ids: dict[str, str | int] = {
         "project_id": 4505240668733440,
         "referrer": "api.trace-explorer.stats",
     }
@@ -276,7 +344,7 @@ def test_does_not_throttle_and_then_throttles(
     )
 
     allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
-    assert allowance.is_throttled == False
+    assert not allowance.is_throttled
     assert allowance.max_threads == MAX_THREAD_NUMBER
 
     policy.update_quota_balance(
@@ -291,7 +359,7 @@ def test_does_not_throttle_and_then_throttles(
         ),
     )
     allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
-    assert allowance.is_throttled == True
+    assert allowance.is_throttled
     assert allowance.max_threads == MAX_THREAD_NUMBER // 2
 
 
@@ -299,7 +367,7 @@ def test_does_not_throttle_and_then_throttles(
 def test_limit_bytes_read(
     policy: BytesScannedRejectingPolicy,
 ) -> None:
-    tenant_ids = {
+    tenant_ids: dict[str, str | int] = {
         "project_id": 4505240668733440,
         "referrer": "api.trace-explorer.stats",
     }
@@ -335,6 +403,4 @@ def test_limit_bytes_read(
     allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
     assert allowance.is_throttled
     assert allowance.max_threads == MAX_THREAD_NUMBER // threads_throttle_divider
-    assert allowance.max_bytes_to_read == int(
-        scan_limit / max_bytes_to_read_scan_limit_divider
-    )
+    assert allowance.max_bytes_to_read == int(scan_limit / max_bytes_to_read_scan_limit_divider)

From 7ef05bd546f1e410581c7c767cfa99f91d31c916 Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Thu, 28 May 2026 07:22:46 -0700
Subject: [PATCH 2/4] feat(allocation-policy): Add per-org max_bytes_to_read
 cap

Add two new Configuration entries on BytesScannedRejectingPolicy that
forward a hard max_bytes_to_read value to ClickHouse and bypass the
sliding-window scan limit for the configured organization:

- organization_referrer_max_bytes_to_read, keyed by
  (organization_id, referrer)
- organization_max_bytes_to_read, keyed by organization_id

When either is set the query runs at full threads with the configured
cap and the sliding window is not consulted; ClickHouse aborts the
query if it would scan more than the cap. (org_id, referrer) is more
specific and wins over org_id.

This complements the existing global limit_bytes_instead_of_rejecting
flow, which only caps queries after a tenant exceeds its scan limit.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Agent transcript: https://claudescope.sentry.dev/share/K7ElxY0inzZzY1icw8PnMWaO4jjRKEoV3AthvEUGg7E
---
 .../bytes_scanned_rejecting_policy.py         | 56 ++++++++++++
 .../test_bytes_scanned_rejecting_policy.py    | 86 +++++++++++++++++++
 2 files changed, 142 insertions(+)

diff --git a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
index 708539450af..4f5a3ee36bf 100644
--- a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
+++ b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
@@ -94,6 +94,20 @@ def _additional_config_definitions(self) -> list[Configuration]:
                 DEFAULT_OVERRIDE_LIMIT,
                 param_types={"organization_id": int},
             ),
+            Configuration(
+                "organization_referrer_max_bytes_to_read",
+                "Per-(organization_id, referrer) hard cap forwarded to clickhouse as max_bytes_to_read. Queries that match are allowed to run with this cap and bypass the sliding-window scan limit",
+                int,
+                DEFAULT_OVERRIDE_LIMIT,
+                param_types={"organization_id": int, "referrer": str},
+            ),
+            Configuration(
+                "organization_max_bytes_to_read",
+                "Per-organization_id hard cap forwarded to clickhouse as max_bytes_to_read across any referrer. Queries that match are allowed to run with this cap and bypass the sliding-window scan limit",
+                int,
+                DEFAULT_OVERRIDE_LIMIT,
+                param_types={"organization_id": int},
+            ),
             Configuration(
                 "project_referrer_scan_limit",
                 f"DEFAULT: how many bytes can a project scan per referrer in the last {self.WINDOW_SECONDS / 60} mins before queries start getting rejected",
@@ -197,6 +211,31 @@ def __get_scan_limit(
             "customer tenant key is neither project_id or organization_id, this should never happen",
         )
 
+    def __get_organization_max_bytes_to_read(
+        self, tenant_ids: dict[str, str | int], referrer: str | int
+    ) -> int | None:
+        """Return a per-org max_bytes_to_read cap if one is configured.
+
+        Precedence: (organization_id, referrer) > organization_id.
+        Returns None when no cap applies.
+        """
+        org_id = tenant_ids.get("organization_id")
+        if org_id is None:
+            return None
+        org_referrer_cap = self.get_config_value(
+            "organization_referrer_max_bytes_to_read",
+            {"organization_id": org_id, "referrer": referrer},
+        )
+        if org_referrer_cap != DEFAULT_OVERRIDE_LIMIT:
+            return int(org_referrer_cap)
+        org_cap = self.get_config_value(
+            "organization_max_bytes_to_read",
+            {"organization_id": org_id},
+        )
+        if org_cap != DEFAULT_OVERRIDE_LIMIT:
+            return int(org_cap)
+        return None
+
     def _get_quota_allowance(
         self, tenant_ids: dict[str, str | int], query_id: str
     ) -> QuotaAllowance:
@@ -235,6 +274,23 @@ def _get_quota_allowance(
                 suggestion=PASS_THROUGH_REFERRERS_SUGGESTION,
             )
 
+        org_cap = self.__get_organization_max_bytes_to_read(tenant_ids, referrer)
+        if org_cap is not None:
+            return QuotaAllowance(
+                can_run=True,
+                max_threads=self.max_threads,
+                max_bytes_to_read=org_cap,
+                explanation={
+                    "reason": f"organization_id {tenant_ids.get('organization_id')} runs with a per-org max_bytes_to_read cap of {org_cap}"
+                },
+                is_throttled=False,
+                throttle_threshold=MAX_THRESHOLD,
+                rejection_threshold=MAX_THRESHOLD,
+                quota_used=0,
+                quota_unit=QUOTA_UNIT,
+                suggestion=NO_SUGGESTION,
+            )
+
         scan_limit = self.__get_scan_limit(customer_tenant_key, customer_tenant_value, referrer)
         throttle_threshold = max(
             1, int(scan_limit // self.get_config_value("bytes_throttle_divider"))
diff --git a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
index fca8a084bf0..8af8c47a18f 100644
--- a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
+++ b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
@@ -284,6 +284,92 @@ def test_org_override_precedence(policy: BytesScannedRejectingPolicy) -> None:
     assert allowance.rejection_threshold == 1000
 
 
+@pytest.mark.redis_db
+def test_org_max_bytes_to_read_cap(policy: BytesScannedRejectingPolicy) -> None:
+    """Per-org cap sets max_bytes_to_read and bypasses the sliding-window check."""
+    _configure_policy(policy)
+    tenant_ids: dict[str, str | int] = {
+        "organization_id": 123,
+        "referrer": "some_referrer",
+    }
+    policy.set_config_value(
+        "organization_max_bytes_to_read",
+        500,
+        {"organization_id": 123},
+    )
+
+    # Exhaust the quota first; the cap should still let the query through.
+    policy.update_quota_balance(
+        tenant_ids,
+        QUERY_ID,
+        QueryResultOrError(
+            query_result=QueryResult(
+                result={"profile": {"progress_bytes": ORGANIZATION_REFERRER_SCAN_LIMIT * 10}},
+                extra={"stats": {}, "sql": "", "experiments": {}},
+            ),
+            error=None,
+        ),
+    )
+
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert allowance.can_run
+    assert allowance.max_bytes_to_read == 500
+    assert allowance.max_threads == MAX_THREAD_NUMBER
+    assert not allowance.is_throttled
+
+    # An org without a cap configured is still subject to the sliding-window limit.
+    other_org_tenant: dict[str, str | int] = {
+        "organization_id": 456,
+        "referrer": "some_referrer",
+    }
+    policy.update_quota_balance(
+        other_org_tenant,
+        QUERY_ID,
+        QueryResultOrError(
+            query_result=QueryResult(
+                result={"profile": {"progress_bytes": ORGANIZATION_REFERRER_SCAN_LIMIT}},
+                extra={"stats": {}, "sql": "", "experiments": {}},
+            ),
+            error=None,
+        ),
+    )
+    allowance = policy.get_quota_allowance(other_org_tenant, QUERY_ID)
+    assert not allowance.can_run
+
+
+@pytest.mark.redis_db
+def test_org_referrer_cap_beats_org_cap(policy: BytesScannedRejectingPolicy) -> None:
+    """(org_id, referrer) cap is more specific than the org_id cap and wins."""
+    _configure_policy(policy)
+    tenant_ids: dict[str, str | int] = {
+        "organization_id": 123,
+        "referrer": "some_referrer",
+    }
+    policy.set_config_value(
+        "organization_max_bytes_to_read",
+        1000,
+        {"organization_id": 123},
+    )
+    policy.set_config_value(
+        "organization_referrer_max_bytes_to_read",
+        200,
+        {"organization_id": 123, "referrer": "some_referrer"},
+    )
+
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert allowance.can_run
+    assert allowance.max_bytes_to_read == 200
+
+    # A different referrer falls back to the org-wide cap.
+    other_referrer: dict[str, str | int] = {
+        "organization_id": 123,
+        "referrer": "other_referrer",
+    }
+    allowance = policy.get_quota_allowance(other_referrer, QUERY_ID)
+    assert allowance.can_run
+    assert allowance.max_bytes_to_read == 1000
+
+
 @pytest.mark.redis_db
 def test_penalize_timeout(policy: BytesScannedRejectingPolicy) -> None:
     _configure_policy(policy)

From 549c2198eafc0e80acf7e3d9eecc1721f24e7a9e Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Mon, 1 Jun 2026 16:41:38 -0700
Subject: [PATCH 3/4] fix(allocation-policy): gate per-org max_bytes_to_read on
 org-keyed queries

Sentry queries usually carry both organization_id and project_id, and
the policy resolves those to the project_id branch. The new org cap was
checked before that resolution, so any project query with an
organization_id in tenant_ids would silently pick up the org cap and
bypass the project-level sliding-window limit.

Move the cap check after _get_customer_tenant_key_and_value() and gate
it on customer_tenant_key == "organization_id". Adds a regression test
covering the (org_id + project_id) shape.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Agent transcript: https://claudescope.sentry.dev/share/ZggQ4p_AlWKzjkr6xJZ5kCjchDzv_CTIpf-S8WXChVs
---
 .../bytes_scanned_rejecting_policy.py         | 38 ++++++++--------
 .../test_bytes_scanned_rejecting_policy.py    | 44 +++++++++++++++++++
 2 files changed, 62 insertions(+), 20 deletions(-)

diff --git a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
index 4f5a3ee36bf..b992321d670 100644
--- a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
+++ b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
@@ -212,16 +212,13 @@ def __get_scan_limit(
         )
 
     def __get_organization_max_bytes_to_read(
-        self, tenant_ids: dict[str, str | int], referrer: str | int
+        self, org_id: str | int, referrer: str | int
     ) -> int | None:
         """Return a per-org max_bytes_to_read cap if one is configured.
 
         Precedence: (organization_id, referrer) > organization_id.
         Returns None when no cap applies.
         """
-        org_id = tenant_ids.get("organization_id")
-        if org_id is None:
-            return None
         org_referrer_cap = self.get_config_value(
             "organization_referrer_max_bytes_to_read",
             {"organization_id": org_id, "referrer": referrer},
@@ -274,22 +271,23 @@ def _get_quota_allowance(
                 suggestion=PASS_THROUGH_REFERRERS_SUGGESTION,
             )
 
-        org_cap = self.__get_organization_max_bytes_to_read(tenant_ids, referrer)
-        if org_cap is not None:
-            return QuotaAllowance(
-                can_run=True,
-                max_threads=self.max_threads,
-                max_bytes_to_read=org_cap,
-                explanation={
-                    "reason": f"organization_id {tenant_ids.get('organization_id')} runs with a per-org max_bytes_to_read cap of {org_cap}"
-                },
-                is_throttled=False,
-                throttle_threshold=MAX_THRESHOLD,
-                rejection_threshold=MAX_THRESHOLD,
-                quota_used=0,
-                quota_unit=QUOTA_UNIT,
-                suggestion=NO_SUGGESTION,
-            )
+        if customer_tenant_key == "organization_id":
+            org_cap = self.__get_organization_max_bytes_to_read(customer_tenant_value, referrer)
+            if org_cap is not None:
+                return QuotaAllowance(
+                    can_run=True,
+                    max_threads=self.max_threads,
+                    max_bytes_to_read=org_cap,
+                    explanation={
+                        "reason": f"organization_id {customer_tenant_value} runs with a per-org max_bytes_to_read cap of {org_cap}"
+                    },
+                    is_throttled=False,
+                    throttle_threshold=MAX_THRESHOLD,
+                    rejection_threshold=MAX_THRESHOLD,
+                    quota_used=0,
+                    quota_unit=QUOTA_UNIT,
+                    suggestion=NO_SUGGESTION,
+                )
 
         scan_limit = self.__get_scan_limit(customer_tenant_key, customer_tenant_value, referrer)
         throttle_threshold = max(
diff --git a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
index 8af8c47a18f..435f5aee042 100644
--- a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
+++ b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
@@ -370,6 +370,50 @@ def test_org_referrer_cap_beats_org_cap(policy: BytesScannedRejectingPolicy) ->
     assert allowance.max_bytes_to_read == 1000
 
 
+@pytest.mark.redis_db
+def test_org_caps_do_not_apply_to_project_queries(
+    policy: BytesScannedRejectingPolicy,
+) -> None:
+    """An org-level cap must not bypass project-level sliding-window limits.
+
+    Sentry queries usually carry both organization_id and project_id; the
+    policy resolves those to the project_id branch. The org cap should only
+    fire on org-keyed queries.
+    """
+    _configure_policy(policy)
+    tenant_ids: dict[str, str | int] = {
+        "organization_id": 123,
+        "project_id": 12345,
+        "referrer": "some_referrer",
+    }
+    policy.set_config_value(
+        "organization_max_bytes_to_read",
+        500,
+        {"organization_id": 123},
+    )
+    policy.set_config_value(
+        "organization_referrer_max_bytes_to_read",
+        200,
+        {"organization_id": 123, "referrer": "some_referrer"},
+    )
+
+    # Exhaust the project quota; the org cap must not let the query through.
+    policy.update_quota_balance(
+        tenant_ids,
+        QUERY_ID,
+        QueryResultOrError(
+            query_result=QueryResult(
+                result={"profile": {"progress_bytes": PROJECT_REFERRER_SCAN_LIMIT}},
+                extra={"stats": {}, "sql": "", "experiments": {}},
+            ),
+            error=None,
+        ),
+    )
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert not allowance.can_run
+    assert allowance.max_bytes_to_read == 0
+
+
 @pytest.mark.redis_db
 def test_penalize_timeout(policy: BytesScannedRejectingPolicy) -> None:
     _configure_policy(policy)

From 44cc07709c9604117584aa59c410aba9cbb37395 Mon Sep 17 00:00:00 2001
From: Pierre Massat <pierre.massat@sentry.io>
Date: Tue, 2 Jun 2026 11:01:47 -0700
Subject: [PATCH 4/4] fix(allocation-policy): Bypass sliding window when org
 cap is set

`_get_quota_allowance` bypasses the sliding-window scan limit for
org-keyed queries that run under a per-org `max_bytes_to_read` cap, but
`_update_quota_balance` still recorded those queries' bytes_scanned into
the same window. If the cap is later removed, the window has phantom
usage from the capped period and queries get rejected against quota
they never consumed.

Mirror the bypass in `_update_quota_balance` and add a regression test.

Co-Authored-By: Claude <noreply@anthropic.com>

Agent transcript: https://claudescope.sentry.dev/share/tvrt5qSDhk3FhvibKVFT_XWjBOhbIdYwDfgHykOwGVw
---
 .../bytes_scanned_rejecting_policy.py         | 10 ++++
 .../test_bytes_scanned_rejecting_policy.py    | 49 +++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
index b992321d670..5d099a1b720 100644
--- a/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
+++ b/snuba/query/allocation_policies/bytes_scanned_rejecting_policy.py
@@ -456,6 +456,16 @@ def _update_quota_balance(
             customer_tenant_key,
             customer_tenant_value,
         ) = self._get_customer_tenant_key_and_value(tenant_ids)
+        # Mirror the bypass in _get_quota_allowance: org-keyed queries running under a
+        # max_bytes_to_read cap skip the sliding window entirely. Recording usage here
+        # would silently fill the window, so removing the cap later would reject queries
+        # against quota they never actually consumed.
+        if (
+            customer_tenant_key == "organization_id"
+            and self.__get_organization_max_bytes_to_read(customer_tenant_value, referrer)
+            is not None
+        ):
+            return
         scan_limit = self.__get_scan_limit(customer_tenant_key, customer_tenant_value, referrer)
         # we can assume that the requested quota was granted (because it was)
         # we just need to update the quota with however many bytes were consumed
diff --git a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
index 435f5aee042..cc6b16eea20 100644
--- a/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
+++ b/tests/query/allocation_policies/test_bytes_scanned_rejecting_policy.py
@@ -337,6 +337,55 @@ def test_org_max_bytes_to_read_cap(policy: BytesScannedRejectingPolicy) -> None:
     assert not allowance.can_run
 
 
+@pytest.mark.redis_db
+def test_org_cap_does_not_record_into_sliding_window(
+    policy: BytesScannedRejectingPolicy,
+) -> None:
+    """Capped org queries must not accumulate usage in the sliding window.
+
+    `_get_quota_allowance` bypasses the window when an org cap is set;
+    `_update_quota_balance` must match, otherwise removing the cap later
+    would reject queries against a window that silently filled up while
+    the cap was active.
+    """
+    _configure_policy(policy)
+    tenant_ids: dict[str, str | int] = {
+        "organization_id": 123,
+        "referrer": "some_referrer",
+    }
+    policy.set_config_value(
+        "organization_max_bytes_to_read",
+        500,
+        {"organization_id": 123},
+    )
+
+    # While the cap is set, lots of bytes scanned must not be recorded.
+    for _ in range(5):
+        policy.update_quota_balance(
+            tenant_ids,
+            QUERY_ID,
+            QueryResultOrError(
+                query_result=QueryResult(
+                    result={"profile": {"progress_bytes": ORGANIZATION_REFERRER_SCAN_LIMIT}},
+                    extra={"stats": {}, "sql": "", "experiments": {}},
+                ),
+                error=None,
+            ),
+        )
+
+    # Remove the cap; the sliding window should be empty, so the next query
+    # is allowed under the normal org-referrer limit.
+    policy.set_config_value(
+        "organization_max_bytes_to_read",
+        -1,
+        {"organization_id": 123},
+    )
+    allowance = policy.get_quota_allowance(tenant_ids, QUERY_ID)
+    assert allowance.can_run
+    assert allowance.max_threads == MAX_THREAD_NUMBER
+    assert not allowance.is_throttled
+
+
 @pytest.mark.redis_db
 def test_org_referrer_cap_beats_org_cap(policy: BytesScannedRejectingPolicy) -> None:
     """(org_id, referrer) cap is more specific than the org_id cap and wins."""