-
-
Notifications
You must be signed in to change notification settings - Fork 4.5k
feat(deletions): Schedule task to delete pending deletions groups #103820
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| @@ -0,0 +1,91 @@ | ||||||||||||||||||||
| import logging | ||||||||||||||||||||
| from collections import defaultdict | ||||||||||||||||||||
| from datetime import timedelta | ||||||||||||||||||||
| from uuid import uuid4 | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from django.utils import timezone | ||||||||||||||||||||
|
|
||||||||||||||||||||
| from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE | ||||||||||||||||||||
| from sentry.deletions.tasks.groups import delete_groups_for_project | ||||||||||||||||||||
| from sentry.models.group import Group, GroupStatus | ||||||||||||||||||||
| from sentry.silo.base import SiloMode | ||||||||||||||||||||
| from sentry.tasks.base import instrumented_task | ||||||||||||||||||||
| from sentry.taskworker.namespaces import deletion_tasks | ||||||||||||||||||||
| from sentry.taskworker.retry import Retry | ||||||||||||||||||||
| from sentry.utils import metrics | ||||||||||||||||||||
|
|
||||||||||||||||||||
| logger = logging.getLogger(__name__) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| BATCH_LIMIT = 1000 | ||||||||||||||||||||
| MAX_LAST_SEEN_DAYS = 90 | ||||||||||||||||||||
| MIN_LAST_SEEN_DAYS = 1 | ||||||||||||||||||||
|
|
||||||||||||||||||||
|
|
||||||||||||||||||||
| @instrumented_task( | ||||||||||||||||||||
| name="sentry.tasks.delete_pending_groups", | ||||||||||||||||||||
| namespace=deletion_tasks, | ||||||||||||||||||||
| processing_deadline_duration=10 * 60, | ||||||||||||||||||||
| retry=Retry(times=3, delay=60), | ||||||||||||||||||||
| silo_mode=SiloMode.REGION, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| def delete_pending_groups() -> None: | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| Scheduled task that runs daily to clean up groups in pending deletion states. | ||||||||||||||||||||
| This task queries groups with status PENDING_DELETION or DELETION_IN_PROGRESS | ||||||||||||||||||||
| and schedules deletion tasks for them. Groups are batched by project to ensure | ||||||||||||||||||||
| efficient deletion processing. | ||||||||||||||||||||
| Only processes groups with last_seen between 24 hours and 90 days ago to avoid | ||||||||||||||||||||
| processing very recent groups (safety window) or very old stuck groups. | ||||||||||||||||||||
| """ | ||||||||||||||||||||
| statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS] | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # XXX: If needed add a partial index with the status and last_seen fields | ||||||||||||||||||||
| # This can timeout for lack of an index on the status field | ||||||||||||||||||||
| # Not using the last_seen index to avoid the lack of composite index on status and last_seen | ||||||||||||||||||||
| groups = Group.objects.filter(status__in=statuses_to_delete).values_list( | ||||||||||||||||||||
| "id", "project_id", "last_seen" | ||||||||||||||||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Grabbing |
||||||||||||||||||||
| )[:BATCH_LIMIT] | ||||||||||||||||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since there's no index on status, let's grab a few a time. Currently, the query takes about 20 seconds.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We have a compund index with status being the first on the index so it should be hitting and using it, you can try and EXPLAIN the query and you will see its hitting the index
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. sentry/src/sentry/models/group.py Lines 635 to 643 in b2fff1d
It looks like there is an index on
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh my! I'm glad it's there. I may have been looking at the list of compound indexes. |
||||||||||||||||||||
|
|
||||||||||||||||||||
| if not groups: | ||||||||||||||||||||
| logger.info("delete_pending_groups.no_groups_found") | ||||||||||||||||||||
| return | ||||||||||||||||||||
|
|
||||||||||||||||||||
| # Round to midnight to make the task idempotent throughout the day | ||||||||||||||||||||
| now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0) | ||||||||||||||||||||
| min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS) | ||||||||||||||||||||
| max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS) | ||||||||||||||||||||
| # Group by project_id to ensure all groups in a batch belong to the same project | ||||||||||||||||||||
| groups_by_project: dict[int, list[int]] = defaultdict(list) | ||||||||||||||||||||
| for group_id, project_id, last_seen in groups: | ||||||||||||||||||||
| if last_seen >= min_last_seen and last_seen <= max_last_seen: | ||||||||||||||||||||
| groups_by_project[project_id].append(group_id) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| total_groups = sum(len(group_ids) for group_ids in groups_by_project.values()) | ||||||||||||||||||||
| total_tasks = 0 | ||||||||||||||||||||
|
|
||||||||||||||||||||
| logger.info( | ||||||||||||||||||||
| "delete_pending_groups.started", | ||||||||||||||||||||
| extra={"total_groups": total_groups, "projects_count": len(groups_by_project)}, | ||||||||||||||||||||
| ) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| for project_id, group_ids in groups_by_project.items(): | ||||||||||||||||||||
| # Schedule deletion tasks in chunks of GROUP_CHUNK_SIZE | ||||||||||||||||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Similar to how we schedule it here: sentry/src/sentry/api/helpers/group_index/delete.py Lines 87 to 95 in 1483996
|
||||||||||||||||||||
| for i in range(0, len(group_ids), GROUP_CHUNK_SIZE): | ||||||||||||||||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit, we got the
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I will handle this later. I want to merge it now. |
||||||||||||||||||||
| chunk = group_ids[i : i + GROUP_CHUNK_SIZE] | ||||||||||||||||||||
| transaction_id = str(uuid4()) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| delete_groups_for_project.apply_async( | ||||||||||||||||||||
|
Member
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As reference, the called task:
|
||||||||||||||||||||
| kwargs={ | ||||||||||||||||||||
| "project_id": project_id, | ||||||||||||||||||||
| "object_ids": chunk, | ||||||||||||||||||||
| "transaction_id": transaction_id, | ||||||||||||||||||||
| } | ||||||||||||||||||||
| ) | ||||||||||||||||||||
| total_tasks += 1 | ||||||||||||||||||||
|
|
||||||||||||||||||||
| metrics.incr("delete_pending_groups.groups_scheduled", amount=total_groups, sample_rate=1.0) | ||||||||||||||||||||
| metrics.incr("delete_pending_groups.tasks_scheduled", amount=total_tasks, sample_rate=1.0) | ||||||||||||||||||||
|
|
||||||||||||||||||||
| logger.info("delete_pending_groups.completed") | ||||||||||||||||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,159 @@ | ||
| from __future__ import annotations | ||
|
|
||
| from datetime import datetime, timedelta | ||
| from unittest.mock import MagicMock, patch | ||
|
|
||
| from django.utils import timezone | ||
|
|
||
| from sentry.models.group import Group, GroupStatus | ||
| from sentry.tasks.delete_pending_groups import ( | ||
| MAX_LAST_SEEN_DAYS, | ||
| MIN_LAST_SEEN_DAYS, | ||
| delete_pending_groups, | ||
| ) | ||
| from sentry.testutils.cases import TestCase | ||
| from sentry.types.group import GroupSubStatus | ||
|
|
||
|
|
||
| class DeletePendingGroupsTest(TestCase): | ||
| def _count_groups_in_deletion_status(self) -> int: | ||
| """Count groups with deletion statuses in the valid date range.""" | ||
| return Group.objects.filter( | ||
| status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS], | ||
| last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS), | ||
| last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS), | ||
| ).count() | ||
|
|
||
| def _days_ago(self, days: int) -> datetime: | ||
| return timezone.now() - timedelta(days=days) | ||
|
|
||
| def test_schedules_only_groups_within_valid_date_range(self) -> None: | ||
| """Test that only groups with last_seen between 24h-90d are scheduled for deletion.""" | ||
| project = self.create_project() | ||
|
|
||
| # Too recent - within 24 hours (should NOT be scheduled) | ||
| too_recent = self.create_group( | ||
| project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0) | ||
| ) | ||
|
|
||
| # Valid range - should be scheduled | ||
| valid_group = self.create_group( | ||
| project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) | ||
| ) | ||
|
|
||
| # Too old - over 90 days (should NOT be scheduled) | ||
| too_old = self.create_group( | ||
| project=project, status=GroupStatus.DELETION_IN_PROGRESS, last_seen=self._days_ago(91) | ||
| ) | ||
|
|
||
| # Wrong status - should NOT be scheduled | ||
| wrong_status = self.create_group( | ||
| project=project, | ||
| status=GroupStatus.UNRESOLVED, | ||
| substatus=GroupSubStatus.NEW, | ||
| last_seen=self._days_ago(5), | ||
| ) | ||
|
|
||
| with patch( | ||
| "sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async" | ||
| ) as mock_delete_task: | ||
| delete_pending_groups() | ||
|
|
||
| # Verify only the valid group was scheduled | ||
| mock_delete_task.assert_called_once() | ||
| call_kwargs = mock_delete_task.call_args.kwargs["kwargs"] | ||
| assert call_kwargs["object_ids"] == [valid_group.id] | ||
| assert call_kwargs["project_id"] == project.id | ||
|
|
||
| assert self._count_groups_in_deletion_status() != 0 | ||
| with self.tasks(): | ||
| delete_pending_groups() | ||
|
|
||
| assert self._count_groups_in_deletion_status() == 0 | ||
| assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [ | ||
| too_recent.id, | ||
| too_old.id, | ||
| wrong_status.id, | ||
| ] | ||
|
|
||
| @patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async") | ||
| def test_groups_by_project(self, mock_delete_task: MagicMock) -> None: | ||
| """Test that groups are properly grouped by project when scheduling deletion.""" | ||
| project1 = self.create_project() | ||
| project2 = self.create_project() | ||
|
|
||
| group1 = self.create_group( | ||
| project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) | ||
| ) | ||
| group2 = self.create_group( | ||
| project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) | ||
| ) | ||
| group3 = self.create_group( | ||
| project=project2, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) | ||
| ) | ||
|
|
||
| delete_pending_groups() | ||
|
|
||
| assert mock_delete_task.call_count == 2 | ||
|
|
||
| # Verify both projects got their deletion tasks scheduled | ||
| all_calls = mock_delete_task.call_args_list | ||
| project_ids = {call.kwargs["kwargs"]["project_id"] for call in all_calls} | ||
| assert project_ids == {project1.id, project2.id} | ||
|
|
||
| # Verify correct groups are in each call | ||
| for call in all_calls: | ||
| call_kwargs = call.kwargs["kwargs"] | ||
| if call_kwargs["project_id"] == project1.id: | ||
| assert set(call_kwargs["object_ids"]) == {group1.id, group2.id} | ||
| elif call_kwargs["project_id"] == project2.id: | ||
| assert set(call_kwargs["object_ids"]) == {group3.id} | ||
|
|
||
| @patch("sentry.tasks.delete_pending_groups.GROUP_CHUNK_SIZE", new=10) | ||
| @patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async") | ||
| @patch("sentry.tasks.delete_pending_groups.metrics.incr") | ||
| def test_chunks_large_batches( | ||
| self, | ||
| mock_metrics_incr: MagicMock, | ||
| mock_delete_task: MagicMock, | ||
| ) -> None: | ||
| """Test that groups are chunked according to GROUP_CHUNK_SIZE when scheduling deletion.""" | ||
| GROUP_CHUNK_SIZE = 10 | ||
| GROUPS_MORE_THAN_CHUNK_SIZE = 5 | ||
| project = self.create_project() | ||
|
|
||
| # Create more groups than GROUP_CHUNK_SIZE (10 in this test) | ||
| num_groups = GROUPS_MORE_THAN_CHUNK_SIZE + GROUP_CHUNK_SIZE | ||
| for _ in range(num_groups): | ||
| self.create_group( | ||
| project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2) | ||
| ) | ||
|
|
||
| delete_pending_groups() | ||
|
|
||
| # Should be called twice: one chunk of 10 and one of 5 | ||
| assert mock_delete_task.call_count == 2 | ||
|
|
||
| # Verify first chunk has GROUP_CHUNK_SIZE groups | ||
| first_call_kwargs = mock_delete_task.call_args_list[0].kwargs["kwargs"] | ||
| assert len(first_call_kwargs["object_ids"]) == GROUP_CHUNK_SIZE | ||
|
|
||
| # Verify second chunk has remaining groups | ||
| second_call_kwargs = mock_delete_task.call_args_list[1].kwargs["kwargs"] | ||
| assert len(second_call_kwargs["object_ids"]) == GROUPS_MORE_THAN_CHUNK_SIZE | ||
|
|
||
| # Assert metrics are called with correct totals | ||
| incr_calls = mock_metrics_incr.call_args_list | ||
| incr_names = [c.args[0] for c in incr_calls] | ||
| assert "delete_pending_groups.groups_scheduled" in incr_names | ||
| assert "delete_pending_groups.tasks_scheduled" in incr_names | ||
|
|
||
| groups_scheduled_call = next( | ||
| c for c in incr_calls if c.args[0] == "delete_pending_groups.groups_scheduled" | ||
| ) | ||
| assert groups_scheduled_call.kwargs["amount"] == num_groups | ||
|
|
||
| tasks_scheduled_call = next( | ||
| c for c in incr_calls if c.args[0] == "delete_pending_groups.tasks_scheduled" | ||
| ) | ||
| assert tasks_scheduled_call.kwargs["amount"] == 2 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The schedule now is wonky. I want to see it run during my working hours. Once there are no issues I will switch it to once a day.