From 9c17b920a19b904b3e8c11656e28efadde127bf7 Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Mon, 24 Nov 2025 11:26:43 -0500 Subject: [PATCH] feat(deletions): Change schedule & date ranges This is a follow-up to #103820. A failure to delete a group should be re-processed within 24 hours. Changes included: * Schedule re-processing task every 6 hours * Re-process groups pending deletion that are older than 6 hours rather than midnight from the day before --- src/sentry/conf/server.py | 5 ++--- src/sentry/tasks/delete_pending_groups.py | 12 ++++++------ .../tasks/test_delete_pending_groups.py | 19 +++++++++++-------- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index b681db1328d969..52c061ec997656 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -1009,9 +1009,8 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str: }, "delete-pending-groups": { "task": "deletions:sentry.tasks.delete_pending_groups", - # Runs every 2 hours during 9am-5pm Eastern Time (EST: UTC-5) - # 9am, 11am, 1pm, 3pm, 5pm EST = 14:00, 16:00, 18:00, 20:00, 22:00 UTC - "schedule": task_crontab("0", "14,16,18,20,22", "*", "*", "*"), + # Runs every 6 hours (at 00:00, 06:00, 12:00, 18:00 UTC) + "schedule": task_crontab("0", "*/6", "*", "*", "*"), }, "schedule-weekly-organization-reports-new": { "task": "reports:sentry.tasks.summaries.weekly_reports.schedule_organizations", diff --git a/src/sentry/tasks/delete_pending_groups.py b/src/sentry/tasks/delete_pending_groups.py index 7345506b561f33..dec10f9332a4c2 100644 --- a/src/sentry/tasks/delete_pending_groups.py +++ b/src/sentry/tasks/delete_pending_groups.py @@ -16,7 +16,7 @@ BATCH_LIMIT = 1000 MAX_LAST_SEEN_DAYS = 90 -MIN_LAST_SEEN_DAYS = 1 +MIN_LAST_SEEN_HOURS = 6 @instrumented_task( @@ -34,8 +34,8 @@ def delete_pending_groups() -> None: and schedules deletion tasks for them. Groups are batched by project to ensure efficient deletion processing. - Only processes groups with last_seen between 24 hours and 90 days ago to avoid - processing very recent groups (safety window) or very old stuck groups. + Only processes groups with last_seen between 6 hours and 90 days ago to avoid + processing very recent groups (safety window) or groups past retention period. """ statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS] @@ -48,10 +48,10 @@ def delete_pending_groups() -> None: logger.info("delete_pending_groups.no_groups_found") return - # Round to midnight to make the task idempotent throughout the day - now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) + # Process groups between 6 hours and 90 days old + now = timezone.now() min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS) - max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS) + max_last_seen = now - timedelta(hours=MIN_LAST_SEEN_HOURS) # Group by project_id to ensure all groups in a batch belong to the same project groups_by_project: dict[int, list[int]] = defaultdict(list) for group_id, project_id, last_seen in groups: diff --git a/tests/sentry/tasks/test_delete_pending_groups.py b/tests/sentry/tasks/test_delete_pending_groups.py index afbf4df25955a8..a12b529022b00f 100644 --- a/tests/sentry/tasks/test_delete_pending_groups.py +++ b/tests/sentry/tasks/test_delete_pending_groups.py @@ -8,7 +8,7 @@ from sentry.models.group import Group, GroupStatus from sentry.tasks.delete_pending_groups import ( MAX_LAST_SEEN_DAYS, - MIN_LAST_SEEN_DAYS, + MIN_LAST_SEEN_HOURS, delete_pending_groups, ) from sentry.testutils.cases import TestCase @@ -16,29 +16,32 @@ class DeletePendingGroupsTest(TestCase): - def _count_groups_in_deletion_status(self) -> int: + def _count_groups_in_deletion_status_and_valid_date_range(self) -> int: """Count groups with deletion statuses in the valid date range.""" return Group.objects.filter( status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS], last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS), - last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS), + last_seen__lte=self._hours_ago(MIN_LAST_SEEN_HOURS), ).count() def _days_ago(self, days: int) -> datetime: return timezone.now() - timedelta(days=days) + def _hours_ago(self, hours: int) -> datetime: + return timezone.now() - timedelta(hours=hours) + def test_schedules_only_groups_within_valid_date_range(self) -> None: """Test that only groups with last_seen between 24h-90d are scheduled for deletion.""" project = self.create_project() - # Too recent - within 24 hours (should NOT be scheduled) + # Too recent - within 4 hours (should NOT be scheduled) too_recent = self.create_group( - project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0) + project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(4) ) # Valid range - should be scheduled valid_group = self.create_group( - project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._hours_ago(7) ) # Too old - over 90 days (should NOT be scheduled) @@ -65,11 +68,11 @@ def test_schedules_only_groups_within_valid_date_range(self) -> None: assert call_kwargs["object_ids"] == [valid_group.id] assert call_kwargs["project_id"] == project.id - assert self._count_groups_in_deletion_status() != 0 + assert self._count_groups_in_deletion_status_and_valid_date_range() != 0 with self.tasks(): delete_pending_groups() - assert self._count_groups_in_deletion_status() == 0 + assert self._count_groups_in_deletion_status_and_valid_date_range() == 0 assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [ too_recent.id, too_old.id,