From 939c2e818380f7003d1b3780d2528406ac78694f Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Fri, 21 Nov 2025 11:29:21 -0500 Subject: [PATCH 1/2] feat(deletions): Schedule task to delete pending deletions groups Sometimes, groups do not get deleted and end up in limbo. This task will handle groups that are pending deletion before 90 days retention and older than a day. --- src/sentry/conf/server.py | 7 + src/sentry/tasks/delete_pending_groups.py | 91 ++++++++++ .../tasks/test_delete_pending_groups.py | 158 ++++++++++++++++++ 3 files changed, 256 insertions(+) create mode 100644 src/sentry/tasks/delete_pending_groups.py create mode 100644 tests/sentry/tasks/test_delete_pending_groups.py diff --git a/src/sentry/conf/server.py b/src/sentry/conf/server.py index a13f630b140c0f..13186ecef35796 100644 --- a/src/sentry/conf/server.py +++ b/src/sentry/conf/server.py @@ -893,6 +893,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str: "sentry.tasks.collect_project_platforms", "sentry.tasks.commit_context", "sentry.tasks.commits", + "sentry.tasks.delete_pending_groups", "sentry.tasks.delete_seer_grouping_records", "sentry.tasks.digests", "sentry.tasks.email", @@ -1006,6 +1007,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str: "task": "deletions:sentry.deletions.tasks.reattempt_deletions", "schedule": task_crontab("0", "*/2", "*", "*", "*"), }, + "delete-pending-groups": { + "task": "deletions:sentry.tasks.delete_pending_groups", + # Runs every 2 hours during 9am-5pm Eastern Time (EST: UTC-5) + # 9am, 11am, 1pm, 3pm, 5pm EST = 14:00, 16:00, 18:00, 20:00, 22:00 UTC + "schedule": task_crontab("0", "14,16,18,20,22", "*", "*", "*"), + }, "schedule-weekly-organization-reports-new": { "task": "reports:sentry.tasks.summaries.weekly_reports.schedule_organizations", "schedule": task_crontab("0", "12", "sat", "*", "*"), diff --git a/src/sentry/tasks/delete_pending_groups.py b/src/sentry/tasks/delete_pending_groups.py new file mode 100644 index 00000000000000..4ddaa816281a64 --- /dev/null +++ b/src/sentry/tasks/delete_pending_groups.py @@ -0,0 +1,91 @@ +import logging +from collections import defaultdict +from datetime import timedelta +from uuid import uuid4 + +from django.utils import timezone + +from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE +from sentry.deletions.tasks.groups import delete_groups_for_project +from sentry.models.group import Group, GroupStatus +from sentry.silo.base import SiloMode +from sentry.tasks.base import instrumented_task +from sentry.taskworker.namespaces import deletion_tasks +from sentry.taskworker.retry import Retry +from sentry.utils import metrics + +logger = logging.getLogger(__name__) + +BATCH_LIMIT = 1000 +MAX_LAST_SEEN_DAYS = 90 +MIN_LAST_SEEN_DAYS = 1 + + +@instrumented_task( + name="sentry.tasks.delete_pending_groups", + namespace=deletion_tasks, + processing_deadline_duration=10 * 60, + retry=Retry(times=3, delay=60), + silo_mode=SiloMode.REGION, +) +def delete_pending_groups() -> None: + """ + Scheduled task that runs daily to clean up groups in pending deletion states. + + This task queries groups with status PENDING_DELETION or DELETION_IN_PROGRESS + and schedules deletion tasks for them. Groups are batched by project to ensure + efficient deletion processing. + + Only processes groups with last_seen between 24 hours and 90 days ago to avoid + processing very recent groups (safety window) or very old stuck groups. + """ + statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS] + + # XXX: If needed add a partial index with the status and last_seen fields + # This can timeout for lack of an index on the status field + # Not using the last_seen index to avoid the lack of composite index on status and last_seen + groups = Group.objects.filter(status__in=statuses_to_delete).values_list( + "id", "project_id", "last_seen" + )[:BATCH_LIMIT] + + if not groups: + logger.info("delete_pending_groups.no_groups_found") + return + + # Round to midnight to make the task idempotent throughout the day + now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) + min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS) + max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS) + # Group by project_id to ensure all groups in a batch belong to the same project + groups_by_project: dict[int, list[int]] = defaultdict(list) + for group_id, project_id, last_seen in groups: + if last_seen >= min_last_seen and last_seen <= max_last_seen: + groups_by_project[project_id].append(group_id) + + total_groups = sum(len(group_ids) for group_ids in groups_by_project.values()) + total_tasks = 0 + + logger.info( + "delete_pending_groups.started", + extra={"total_groups": total_groups, "projects_count": len(groups_by_project)}, + ) + + for project_id, group_ids in groups_by_project.items(): + # Schedule deletion tasks in chunks of GROUP_CHUNK_SIZE + for i in range(0, len(group_ids), GROUP_CHUNK_SIZE): + chunk = group_ids[i : i + GROUP_CHUNK_SIZE] + transaction_id = str(uuid4()) + + delete_groups_for_project.apply_async( + kwargs={ + "project_id": project_id, + "object_ids": chunk, + "transaction_id": transaction_id, + } + ) + total_tasks += 1 + + metrics.incr("delete_pending_groups.groups_scheduled", amount=total_groups, sample_rate=1.0) + metrics.incr("delete_pending_groups.tasks_scheduled", amount=total_tasks, sample_rate=1.0) + + logger.info("delete_pending_groups.completed") diff --git a/tests/sentry/tasks/test_delete_pending_groups.py b/tests/sentry/tasks/test_delete_pending_groups.py new file mode 100644 index 00000000000000..8d00d3422c9a65 --- /dev/null +++ b/tests/sentry/tasks/test_delete_pending_groups.py @@ -0,0 +1,158 @@ +from __future__ import annotations + +from datetime import datetime, timedelta +from unittest.mock import MagicMock, patch + +from django.utils import timezone + +from sentry.models.group import Group, GroupStatus, GroupSubStatus +from sentry.tasks.delete_pending_groups import ( + MAX_LAST_SEEN_DAYS, + MIN_LAST_SEEN_DAYS, + delete_pending_groups, +) +from sentry.testutils.cases import TestCase + + +class DeletePendingGroupsTest(TestCase): + def _count_groups_in_deletion_status(self) -> int: + """Count groups with deletion statuses in the valid date range.""" + return Group.objects.filter( + status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS], + last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS), + last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS), + ).count() + + def _days_ago(self, days: int) -> datetime: + return timezone.now() - timedelta(days=days) + + def test_schedules_only_groups_within_valid_date_range(self) -> None: + """Test that only groups with last_seen between 24h-90d are scheduled for deletion.""" + project = self.create_project() + + # Too recent - within 24 hours (should NOT be scheduled) + too_recent = self.create_group( + project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0) + ) + + # Valid range - should be scheduled + valid_group = self.create_group( + project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + ) + + # Too old - over 90 days (should NOT be scheduled) + too_old = self.create_group( + project=project, status=GroupStatus.DELETION_IN_PROGRESS, last_seen=self._days_ago(91) + ) + + # Wrong status - should NOT be scheduled + wrong_status = self.create_group( + project=project, + status=GroupStatus.UNRESOLVED, + substatus=GroupSubStatus.NEW, + last_seen=self._days_ago(5), + ) + + with patch( + "sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async" + ) as mock_delete_task: + delete_pending_groups() + + # Verify only the valid group was scheduled + mock_delete_task.assert_called_once() + call_kwargs = mock_delete_task.call_args.kwargs["kwargs"] + assert call_kwargs["object_ids"] == [valid_group.id] + assert call_kwargs["project_id"] == project.id + + assert self._count_groups_in_deletion_status() != 0 + with self.tasks(): + delete_pending_groups() + + assert self._count_groups_in_deletion_status() == 0 + assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [ + too_recent.id, + too_old.id, + wrong_status.id, + ] + + @patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async") + def test_groups_by_project(self, mock_delete_task: MagicMock) -> None: + """Test that groups are properly grouped by project when scheduling deletion.""" + project1 = self.create_project() + project2 = self.create_project() + + group1 = self.create_group( + project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + ) + group2 = self.create_group( + project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + ) + group3 = self.create_group( + project=project2, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + ) + + delete_pending_groups() + + assert mock_delete_task.call_count == 2 + + # Verify both projects got their deletion tasks scheduled + all_calls = mock_delete_task.call_args_list + project_ids = {call.kwargs["kwargs"]["project_id"] for call in all_calls} + assert project_ids == {project1.id, project2.id} + + # Verify correct groups are in each call + for call in all_calls: + call_kwargs = call.kwargs["kwargs"] + if call_kwargs["project_id"] == project1.id: + assert set(call_kwargs["object_ids"]) == {group1.id, group2.id} + elif call_kwargs["project_id"] == project2.id: + assert set(call_kwargs["object_ids"]) == {group3.id} + + @patch("sentry.tasks.delete_pending_groups.GROUP_CHUNK_SIZE", new=10) + @patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async") + @patch("sentry.tasks.delete_pending_groups.metrics.incr") + def test_chunks_large_batches( + self, + mock_metrics_incr: MagicMock, + mock_delete_task: MagicMock, + ) -> None: + """Test that groups are chunked according to GROUP_CHUNK_SIZE when scheduling deletion.""" + GROUP_CHUNK_SIZE = 10 + GROUPS_MORE_THAN_CHUNK_SIZE = 5 + project = self.create_project() + + # Create more groups than GROUP_CHUNK_SIZE (10 in this test) + num_groups = GROUPS_MORE_THAN_CHUNK_SIZE + GROUP_CHUNK_SIZE + for _ in range(num_groups): + self.create_group( + project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) + ) + + delete_pending_groups() + + # Should be called twice: one chunk of 10 and one of 5 + assert mock_delete_task.call_count == 2 + + # Verify first chunk has GROUP_CHUNK_SIZE groups + first_call_kwargs = mock_delete_task.call_args_list[0].kwargs["kwargs"] + assert len(first_call_kwargs["object_ids"]) == GROUP_CHUNK_SIZE + + # Verify second chunk has remaining groups + second_call_kwargs = mock_delete_task.call_args_list[1].kwargs["kwargs"] + assert len(second_call_kwargs["object_ids"]) == GROUPS_MORE_THAN_CHUNK_SIZE + + # Assert metrics are called with correct totals + incr_calls = mock_metrics_incr.call_args_list + incr_names = [c.args[0] for c in incr_calls] + assert "delete_pending_groups.groups_scheduled" in incr_names + assert "delete_pending_groups.tasks_scheduled" in incr_names + + groups_scheduled_call = next( + c for c in incr_calls if c.args[0] == "delete_pending_groups.groups_scheduled" + ) + assert groups_scheduled_call.kwargs["amount"] == num_groups + + tasks_scheduled_call = next( + c for c in incr_calls if c.args[0] == "delete_pending_groups.tasks_scheduled" + ) + assert tasks_scheduled_call.kwargs["amount"] == 2 From e8a04f8424ead569e6d263fa953a698eb2341f7f Mon Sep 17 00:00:00 2001 From: "Armen Zambrano G." <44410+armenzg@users.noreply.github.com> Date: Fri, 21 Nov 2025 11:38:10 -0500 Subject: [PATCH 2/2] Fix typing --- tests/sentry/tasks/test_delete_pending_groups.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/sentry/tasks/test_delete_pending_groups.py b/tests/sentry/tasks/test_delete_pending_groups.py index 8d00d3422c9a65..773433e15d1895 100644 --- a/tests/sentry/tasks/test_delete_pending_groups.py +++ b/tests/sentry/tasks/test_delete_pending_groups.py @@ -5,13 +5,14 @@ from django.utils import timezone -from sentry.models.group import Group, GroupStatus, GroupSubStatus +from sentry.models.group import Group, GroupStatus from sentry.tasks.delete_pending_groups import ( MAX_LAST_SEEN_DAYS, MIN_LAST_SEEN_DAYS, delete_pending_groups, ) from sentry.testutils.cases import TestCase +from sentry.types.group import GroupSubStatus class DeletePendingGroupsTest(TestCase):