Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1009,9 +1009,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
},
"delete-pending-groups": {
"task": "deletions:sentry.tasks.delete_pending_groups",
# Runs every 2 hours during 9am-5pm Eastern Time (EST: UTC-5)
# 9am, 11am, 1pm, 3pm, 5pm EST = 14:00, 16:00, 18:00, 20:00, 22:00 UTC
"schedule": task_crontab("0", "14,16,18,20,22", "*", "*", "*"),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was done so we could observe during work hours.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When you switch schedules, you may miss an interval in the new schedule depending on when the last run was. But the schedule should align to the new schedule after that.

# Runs every 6 hours (at 00:00, 06:00, 12:00, 18:00 UTC)
"schedule": task_crontab("0", "*/6", "*", "*", "*"),
},
"schedule-weekly-organization-reports-new": {
"task": "reports:sentry.tasks.summaries.weekly_reports.schedule_organizations",
Expand Down
12 changes: 6 additions & 6 deletions src/sentry/tasks/delete_pending_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

BATCH_LIMIT = 1000
MAX_LAST_SEEN_DAYS = 90
MIN_LAST_SEEN_DAYS = 1
MIN_LAST_SEEN_HOURS = 6


@instrumented_task(
Expand All @@ -34,8 +34,8 @@ def delete_pending_groups() -> None:
and schedules deletion tasks for them. Groups are batched by project to ensure
efficient deletion processing.

Only processes groups with last_seen between 24 hours and 90 days ago to avoid
processing very recent groups (safety window) or very old stuck groups.
Only processes groups with last_seen between 6 hours and 90 days ago to avoid
processing very recent groups (safety window) or groups past retention period.
"""
statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS]

Expand All @@ -48,10 +48,10 @@ def delete_pending_groups() -> None:
logger.info("delete_pending_groups.no_groups_found")
return

# Round to midnight to make the task idempotent throughout the day
now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
# Process groups between 6 hours and 90 days old
now = timezone.now()
min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS)
max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS)
max_last_seen = now - timedelta(hours=MIN_LAST_SEEN_HOURS)
# Group by project_id to ensure all groups in a batch belong to the same project
groups_by_project: dict[int, list[int]] = defaultdict(list)
for group_id, project_id, last_seen in groups:
Expand Down
19 changes: 11 additions & 8 deletions tests/sentry/tasks/test_delete_pending_groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,37 +8,40 @@
from sentry.models.group import Group, GroupStatus
from sentry.tasks.delete_pending_groups import (
MAX_LAST_SEEN_DAYS,
MIN_LAST_SEEN_DAYS,
MIN_LAST_SEEN_HOURS,
delete_pending_groups,
)
from sentry.testutils.cases import TestCase
from sentry.types.group import GroupSubStatus


class DeletePendingGroupsTest(TestCase):
def _count_groups_in_deletion_status(self) -> int:
def _count_groups_in_deletion_status_and_valid_date_range(self) -> int:
"""Count groups with deletion statuses in the valid date range."""
return Group.objects.filter(
status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS],
last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS),
last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS),
last_seen__lte=self._hours_ago(MIN_LAST_SEEN_HOURS),
).count()

def _days_ago(self, days: int) -> datetime:
return timezone.now() - timedelta(days=days)

def _hours_ago(self, hours: int) -> datetime:
return timezone.now() - timedelta(hours=hours)

def test_schedules_only_groups_within_valid_date_range(self) -> None:
"""Test that only groups with last_seen between 24h-90d are scheduled for deletion."""
project = self.create_project()

# Too recent - within 24 hours (should NOT be scheduled)
# Too recent - within 4 hours (should NOT be scheduled)
too_recent = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0)
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(4)
)

# Valid range - should be scheduled
valid_group = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(7)
)

# Too old - over 90 days (should NOT be scheduled)
Expand All @@ -65,11 +68,11 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None:
assert call_kwargs["object_ids"] == [valid_group.id]
assert call_kwargs["project_id"] == project.id

assert self._count_groups_in_deletion_status() != 0
assert self._count_groups_in_deletion_status_and_valid_date_range() != 0
with self.tasks():
delete_pending_groups()

assert self._count_groups_in_deletion_status() == 0
assert self._count_groups_in_deletion_status_and_valid_date_range() == 0
assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [
too_recent.id,
too_old.id,
Expand Down
Loading