Skip to content
Merged
1 change: 0 additions & 1 deletion .github/codeowners-coverage-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2144,7 +2144,6 @@ tests/sentry/models/test_commitfilechange.py
tests/sentry/models/test_dashboard.py
tests/sentry/models/test_debugfile.py
tests/sentry/models/test_deploy.py
tests/sentry/models/test_dynamicsampling.py
tests/sentry/models/test_environment.py
tests/sentry/models/test_eventattachment.py
tests/sentry/models/test_eventerror.py
Expand Down
2 changes: 0 additions & 2 deletions src/sentry/dynamic_sampling/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
RuleType,
get_enabled_user_biases,
get_redis_client_for_ds,
get_rule_hash,
get_supported_biases_ids,
get_user_biases,
)
Expand All @@ -25,7 +24,6 @@
"get_user_biases",
"get_enabled_user_biases",
"get_redis_client_for_ds",
"get_rule_hash",
"record_latest_release",
"RuleType",
"ExtendedBoostedRelease",
Expand Down
21 changes: 2 additions & 19 deletions src/sentry/dynamic_sampling/rules/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
from enum import Enum
from typing import Literal, NotRequired, TypedDict, Union

import orjson
Comment thread
cursor[bot] marked this conversation as resolved.
from django.conf import settings
from redis import StrictRedis

from sentry.models.dynamicsampling import CUSTOM_RULE_START
from sentry.relay.types import RuleCondition
from sentry.utils import redis

CUSTOM_RULE_START = 3000

BOOSTED_RELEASES_LIMIT = 10

LATEST_RELEASES_BOOST_FACTOR = 1.5
Expand Down Expand Up @@ -117,23 +117,6 @@ class DecayingRule(Rule):
PolymorphicRule = Union[Rule, DecayingRule]


def get_rule_hash(rule: PolymorphicRule) -> int:
# We want to be explicit in what we use for computing the hash. In addition, we need to remove certain fields like
# the sampleRate.
return (
orjson.dumps(
{
"id": rule["id"],
"type": rule["type"],
"condition": rule["condition"],
},
option=orjson.OPT_SORT_KEYS,
)
.decode()
.__hash__()
)


def get_user_biases(user_set_biases: list[ActivatableBias] | None) -> list[ActivatableBias]:
if user_set_biases is None:
return DEFAULT_BIASES
Expand Down
296 changes: 2 additions & 294 deletions src/sentry/models/dynamicsampling.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,12 @@
from __future__ import annotations

import hashlib
from collections.abc import Mapping, Sequence
from datetime import datetime, timedelta
from typing import TYPE_CHECKING, Any

from django.db import models, router, transaction
from django.db.models import F, IntegerField, Max, Q, Subquery, Value
from django.db.models.functions import Coalesce
from django.db import models
from django.db.models import Q
from django.utils import timezone

from sentry.backup.scopes import RelocationScope
from sentry.constants import ObjectStatus
from sentry.db.models import FlexibleForeignKey, Model, cell_silo_model
from sentry.db.models.fields.hybrid_cloud_foreign_key import HybridCloudForeignKey
from sentry.utils import json, metrics

if TYPE_CHECKING:
from sentry.models.organization import Organization
from sentry.models.project import Project

# max number of custom rules that can be created per organization
MAX_CUSTOM_RULES = 2000
CUSTOM_RULE_START = 3000
MAX_CUSTOM_RULES_PER_PROJECT = 50
CUSTOM_RULE_DATE_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"


class TooManyRules(ValueError):
"""
Raised when a there is already the max number of rules active for an organization
"""


def get_rule_hash(condition: Any, project_ids: Sequence[int]) -> str:
"""
Returns the hash of the rule based on the condition and projects
"""
condition_string = to_order_independent_string(condition)
project_string = to_order_independent_string(list(project_ids))
rule_string = f"{condition_string}-{project_string}"
# make it a bit shorter
return hashlib.sha1(rule_string.encode("utf-8")).hexdigest()


def to_order_independent_string(val: Any) -> str:
"""
Converts a value in an order independent string and then hashes it

Note: this will insure the same repr is generated for ['x', 'y'] and ['y', 'x']
Also the same repr is generated for {'x': 1, 'y': 2} and {'y': 2, 'x': 1}
"""
ret_val = ""
if isinstance(val, Mapping):
for key in sorted(val.keys()):
ret_val += f"{key}:{to_order_independent_string(val[key])}-"
elif isinstance(val, (list, tuple)):
vals = sorted([to_order_independent_string(item) for item in val])
for item in vals:
ret_val += f"{item}-"
else:
ret_val = str(val)
return ret_val


@cell_silo_model
Expand Down Expand Up @@ -114,16 +59,6 @@ class CustomDynamicSamplingRule(Model):
created_by_id = HybridCloudForeignKey("sentry.User", on_delete="CASCADE", null=True, blank=True)
notification_sent = models.BooleanField(null=True, blank=True)

@property
def external_rule_id(self) -> int:
"""
Returns the external rule id

For external users, i.e. Relay, we need to shift the ids since the slot we
have allocated starts at the offset specified in RESERVED_IDS.
"""
return self.rule_id + CUSTOM_RULE_START

class Meta:
app_label = "sentry"
db_table = "sentry_customdynamicsamplingrule"
Expand All @@ -137,230 +72,3 @@ class Meta:
fields=["condition_hash"], name="condition_hash_idx", condition=Q(is_active=True)
),
]

@staticmethod
def get_rule_for_org(
condition: Any,
organization_id: int,
project_ids: Sequence[int],
) -> CustomDynamicSamplingRule | None:
"""
Returns an active rule for the given condition and organization if it exists otherwise None

Note: There should not be more than one active rule for a given condition and organization
This function doesn't verify this condition, it just returns the first one.
"""
rule_hash = get_rule_hash(condition, project_ids)
rules = CustomDynamicSamplingRule.objects.filter(
organization_id=organization_id,
condition_hash=rule_hash,
is_active=True,
end_date__gt=timezone.now(),
)[:1]

return rules[0] if rules else None

@staticmethod
def update_or_create(
condition: Any,
start: datetime,
end: datetime,
project_ids: Sequence[int],
organization_id: int,
num_samples: int,
sample_rate: float,
query: str,
created_by_id: int | None = None,
) -> CustomDynamicSamplingRule:
from sentry.models.organization import Organization
from sentry.models.project import Project

with transaction.atomic(router.db_for_write(CustomDynamicSamplingRule)):
# check if rule already exists for this organization
existing_rule = CustomDynamicSamplingRule.get_rule_for_org(
condition, organization_id, project_ids
)

if existing_rule is not None:
# we already have an active rule for this condition and this organization
# update the expiration date and ensure that our projects are included
existing_rule.end_date = max(end, existing_rule.end_date)
existing_rule.num_samples = max(num_samples, existing_rule.num_samples)
existing_rule.sample_rate = max(sample_rate, existing_rule.sample_rate)

# for org rules we don't need to do anything with the projects
existing_rule.save()
return existing_rule
else:
projects = Project.objects.get_many_from_cache(project_ids)
projects = list(projects)
organization = Organization.objects.get_from_cache(id=organization_id)

if CustomDynamicSamplingRule.per_project_limit_reached(projects, organization):
raise TooManyRules()

# create a new rule
rule_hash = get_rule_hash(condition, project_ids)
is_org_level = len(project_ids) == 0
condition_str = json.dumps(condition)
rule = CustomDynamicSamplingRule.objects.create(
organization_id=organization_id,
condition=condition_str,
sample_rate=sample_rate,
start_date=start,
end_date=end,
num_samples=num_samples,
condition_hash=rule_hash,
is_active=True,
is_org_level=is_org_level,
query=query,
notification_sent=False,
created_by_id=created_by_id,
)

rule.save()
# now try to assign a rule id
id = rule.assign_rule_id()
if id > MAX_CUSTOM_RULES:
# we have too many rules, delete this one
rule.delete()
raise TooManyRules()

# set the projects if not org level
for project in projects:
CustomDynamicSamplingRuleProject.objects.create(
custom_dynamic_sampling_rule=rule, project=project
)
return rule

def assign_rule_id(self) -> int:
"""
Assigns the smallest rule id that is not taken in the
current organization.
"""
if self.id is None:
raise ValueError("Cannot assign rule id to unsaved object")
if self.rule_id != 0:
raise ValueError("Cannot assign rule id to object that already has a rule id")

now = timezone.now()

base_qs = CustomDynamicSamplingRule.objects.filter(
organization_id=self.organization.id, end_date__gt=now, is_active=True
)

# We want to find the smallest free rule id. We do this by self-joining with rule_id + 1 and excluding the existing rule_ids.
# We then order by rule_id_plus_one and take the first value.
# This also works for the first rule, as it is pre-initialized with 0, and will thus end up with 1.
new_rule_id_subquery = Subquery(
base_qs.annotate(rule_id_plus_one=F("rule_id") + 1)
.exclude(rule_id_plus_one__in=base_qs.values_list("rule_id", flat=True))
.order_by("rule_id_plus_one")
.values("rule_id_plus_one")[:1]
)

max_rule_id = base_qs.aggregate(Max("rule_id"))["rule_id__max"] or 0
fallback_value = Value(max_rule_id + 1, output_field=IntegerField())

safe_new_rule_id = Coalesce(new_rule_id_subquery, fallback_value)

# Update this instance with the new rule_id
CustomDynamicSamplingRule.objects.filter(id=self.id).update(rule_id=safe_new_rule_id)
self.refresh_from_db()
return self.rule_id

@staticmethod
def deactivate_old_rules() -> None:
"""
Deactivates all rules expired rules (this is just an optimization to remove old rules from indexes).

This should be called periodically to clean up old rules (it is not necessary to call it for correctness,
just for performance)
"""
CustomDynamicSamplingRule.objects.filter(
# give it a minute grace period to make sure we don't deactivate rules that are still active
end_date__lt=timezone.now() - timedelta(minutes=1),
).update(is_active=False)

@staticmethod
def get_project_rules(
project: Project,
) -> Sequence[CustomDynamicSamplingRule]:
"""
Returns all active project rules
"""
now = timezone.now()
# org rules ( apply to all projects in the org)
org_rules = CustomDynamicSamplingRule.objects.filter(
is_active=True,
is_org_level=True,
organization=project.organization,
end_date__gt=now,
start_date__lt=now,
)[: MAX_CUSTOM_RULES_PER_PROJECT + 1]

# project rules
project_rules = CustomDynamicSamplingRule.objects.filter(
is_active=True,
projects__in=[project],
end_date__gt=now,
start_date__lt=now,
)[: MAX_CUSTOM_RULES_PER_PROJECT + 1]

rules = list(project_rules.union(org_rules)[: MAX_CUSTOM_RULES_PER_PROJECT + 1])

if len(rules) > MAX_CUSTOM_RULES_PER_PROJECT:
metrics.incr("dynamic_sampling.custom_rules.overflow")

return rules[:MAX_CUSTOM_RULES_PER_PROJECT]

@staticmethod
def deactivate_expired_rules() -> None:
"""
Deactivates all rules that have expired
"""
CustomDynamicSamplingRule.objects.filter(
end_date__lt=timezone.now(), is_active=True
).update(is_active=False)

@staticmethod
def num_active_rules_for_project(project: Project) -> int:
"""
Returns the number of active rules for the given project
"""
now = timezone.now()

num_org_rules = CustomDynamicSamplingRule.objects.filter(
is_active=True,
is_org_level=True,
organization=project.organization,
end_date__gt=now,
start_date__lte=now,
).count()

num_proj_rules = CustomDynamicSamplingRule.objects.filter(
is_active=True,
is_org_level=False,
projects__in=[project],
end_date__gt=now,
start_date__lte=now,
).count()

return num_proj_rules + num_org_rules

@staticmethod
def per_project_limit_reached(projects: Sequence[Project], organization: Organization) -> bool:
"""
Returns True if the rule limit is reached for any of the given projects (or all
the projects in the organization if org level rule)
"""
projects = list(projects)
if len(projects) == 0:
# an org rule check all the org projects
org_projects = organization.project_set.filter(status=ObjectStatus.ACTIVE)
projects = list(org_projects)
for project in projects:
num_rules = CustomDynamicSamplingRule.num_active_rules_for_project(project)
if num_rules >= MAX_CUSTOM_RULES_PER_PROJECT:
return True
return False
2 changes: 0 additions & 2 deletions src/sentry/organizations/services/organization/impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
from sentry.incidents.models.incident import IncidentActivity
from sentry.models.activity import Activity
from sentry.models.dashboard import Dashboard, DashboardFavoriteUser
from sentry.models.dynamicsampling import CustomDynamicSamplingRule
from sentry.models.groupassignee import GroupAssignee
from sentry.models.groupbookmark import GroupBookmark
from sentry.models.groupsearchview import GroupSearchView
Expand Down Expand Up @@ -581,7 +580,6 @@ def merge_users(self, *, organization_id: int, from_user_id: int, to_user_id: in
Activity,
AlertRule,
AlertRuleActivity,
CustomDynamicSamplingRule,
Comment thread
shellmayr marked this conversation as resolved.
Dashboard,
DashboardFavoriteUser,
GroupAssignee,
Expand Down
Loading
Loading