Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions src/sentry/conf/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -893,6 +893,7 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
"sentry.tasks.collect_project_platforms",
"sentry.tasks.commit_context",
"sentry.tasks.commits",
"sentry.tasks.delete_pending_groups",
"sentry.tasks.delete_seer_grouping_records",
"sentry.tasks.digests",
"sentry.tasks.email",
Expand Down Expand Up @@ -1006,6 +1007,12 @@ def SOCIAL_AUTH_DEFAULT_USERNAME() -> str:
"task": "deletions:sentry.deletions.tasks.reattempt_deletions",
"schedule": task_crontab("0", "*/2", "*", "*", "*"),
},
"delete-pending-groups": {
"task": "deletions:sentry.tasks.delete_pending_groups",
# Runs every 2 hours during 9am-5pm Eastern Time (EST: UTC-5)
# 9am, 11am, 1pm, 3pm, 5pm EST = 14:00, 16:00, 18:00, 20:00, 22:00 UTC
"schedule": task_crontab("0", "14,16,18,20,22", "*", "*", "*"),
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The schedule now is wonky. I want to see it run during my working hours. Once there are no issues I will switch it to once a day.

},
"schedule-weekly-organization-reports-new": {
"task": "reports:sentry.tasks.summaries.weekly_reports.schedule_organizations",
"schedule": task_crontab("0", "12", "sat", "*", "*"),
Expand Down
91 changes: 91 additions & 0 deletions src/sentry/tasks/delete_pending_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import logging
from collections import defaultdict
from datetime import timedelta
from uuid import uuid4

from django.utils import timezone

from sentry.deletions.defaults.group import GROUP_CHUNK_SIZE
from sentry.deletions.tasks.groups import delete_groups_for_project
from sentry.models.group import Group, GroupStatus
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task
from sentry.taskworker.namespaces import deletion_tasks
from sentry.taskworker.retry import Retry
from sentry.utils import metrics

logger = logging.getLogger(__name__)

BATCH_LIMIT = 1000
MAX_LAST_SEEN_DAYS = 90
MIN_LAST_SEEN_DAYS = 1


@instrumented_task(
name="sentry.tasks.delete_pending_groups",
namespace=deletion_tasks,
processing_deadline_duration=10 * 60,
retry=Retry(times=3, delay=60),
silo_mode=SiloMode.REGION,
)
def delete_pending_groups() -> None:
"""
Scheduled task that runs daily to clean up groups in pending deletion states.
This task queries groups with status PENDING_DELETION or DELETION_IN_PROGRESS
and schedules deletion tasks for them. Groups are batched by project to ensure
efficient deletion processing.
Only processes groups with last_seen between 24 hours and 90 days ago to avoid
processing very recent groups (safety window) or very old stuck groups.
"""
statuses_to_delete = [GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS]

# XXX: If needed add a partial index with the status and last_seen fields
# This can timeout for lack of an index on the status field
# Not using the last_seen index to avoid the lack of composite index on status and last_seen
groups = Group.objects.filter(status__in=statuses_to_delete).values_list(
"id", "project_id", "last_seen"
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Grabbing last_seen here to filter it out programatically down below.

)[:BATCH_LIMIT]
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since there's no index on status, let's grab a few a time. Currently, the query takes about 20 seconds.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We have a compund index with status being the first on the index so it should be hitting and using it, you can try and EXPLAIN the query and you will see its hitting the index

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

status = BoundedPositiveIntegerField(
default=GroupStatus.UNRESOLVED,
choices=(
(GroupStatus.UNRESOLVED, _("Unresolved")),
(GroupStatus.RESOLVED, _("Resolved")),
(GroupStatus.IGNORED, _("Ignored")),
),
db_index=True,
)

It looks like there is an index on status, and checking locally I have an index on the status column as well.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh my! I'm glad it's there. I may have been looking at the list of compound indexes.


if not groups:
logger.info("delete_pending_groups.no_groups_found")
return

# Round to midnight to make the task idempotent throughout the day
now = timezone.now().replace(hour=0, minute=0, second=0, microsecond=0)
min_last_seen = now - timedelta(days=MAX_LAST_SEEN_DAYS)
max_last_seen = now - timedelta(days=MIN_LAST_SEEN_DAYS)
# Group by project_id to ensure all groups in a batch belong to the same project
groups_by_project: dict[int, list[int]] = defaultdict(list)
for group_id, project_id, last_seen in groups:
if last_seen >= min_last_seen and last_seen <= max_last_seen:
groups_by_project[project_id].append(group_id)

total_groups = sum(len(group_ids) for group_ids in groups_by_project.values())
total_tasks = 0

logger.info(
"delete_pending_groups.started",
extra={"total_groups": total_groups, "projects_count": len(groups_by_project)},
)

for project_id, group_ids in groups_by_project.items():
# Schedule deletion tasks in chunks of GROUP_CHUNK_SIZE
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar to how we schedule it here:

# Schedule a task per GROUP_CHUNK_SIZE batch of groups
for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
delete_groups_for_project.apply_async(
kwargs={
"project_id": project.id,
"object_ids": group_ids[i : i + GROUP_CHUNK_SIZE],
"transaction_id": str(transaction_id),
}
)

for i in range(0, len(group_ids), GROUP_CHUNK_SIZE):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit, we got the chunked function you can use

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will handle this later. I want to merge it now.

chunk = group_ids[i : i + GROUP_CHUNK_SIZE]
transaction_id = str(uuid4())

delete_groups_for_project.apply_async(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As reference, the called task:

def delete_groups_for_project(

kwargs={
"project_id": project_id,
"object_ids": chunk,
"transaction_id": transaction_id,
}
)
total_tasks += 1

metrics.incr("delete_pending_groups.groups_scheduled", amount=total_groups, sample_rate=1.0)
metrics.incr("delete_pending_groups.tasks_scheduled", amount=total_tasks, sample_rate=1.0)

logger.info("delete_pending_groups.completed")
159 changes: 159 additions & 0 deletions tests/sentry/tasks/test_delete_pending_groups.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
from __future__ import annotations

from datetime import datetime, timedelta
from unittest.mock import MagicMock, patch

from django.utils import timezone

from sentry.models.group import Group, GroupStatus
from sentry.tasks.delete_pending_groups import (
MAX_LAST_SEEN_DAYS,
MIN_LAST_SEEN_DAYS,
delete_pending_groups,
)
from sentry.testutils.cases import TestCase
from sentry.types.group import GroupSubStatus


class DeletePendingGroupsTest(TestCase):
def _count_groups_in_deletion_status(self) -> int:
"""Count groups with deletion statuses in the valid date range."""
return Group.objects.filter(
status__in=[GroupStatus.PENDING_DELETION, GroupStatus.DELETION_IN_PROGRESS],
last_seen__gte=self._days_ago(MAX_LAST_SEEN_DAYS),
last_seen__lte=self._days_ago(MIN_LAST_SEEN_DAYS),
).count()

def _days_ago(self, days: int) -> datetime:
return timezone.now() - timedelta(days=days)

def test_schedules_only_groups_within_valid_date_range(self) -> None:
"""Test that only groups with last_seen between 24h-90d are scheduled for deletion."""
project = self.create_project()

# Too recent - within 24 hours (should NOT be scheduled)
too_recent = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(0)
)

# Valid range - should be scheduled
valid_group = self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)

# Too old - over 90 days (should NOT be scheduled)
too_old = self.create_group(
project=project, status=GroupStatus.DELETION_IN_PROGRESS, last_seen=self._days_ago(91)
)

# Wrong status - should NOT be scheduled
wrong_status = self.create_group(
project=project,
status=GroupStatus.UNRESOLVED,
substatus=GroupSubStatus.NEW,
last_seen=self._days_ago(5),
)

with patch(
"sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async"
) as mock_delete_task:
delete_pending_groups()

# Verify only the valid group was scheduled
mock_delete_task.assert_called_once()
call_kwargs = mock_delete_task.call_args.kwargs["kwargs"]
assert call_kwargs["object_ids"] == [valid_group.id]
assert call_kwargs["project_id"] == project.id

assert self._count_groups_in_deletion_status() != 0
with self.tasks():
delete_pending_groups()

assert self._count_groups_in_deletion_status() == 0
assert list(Group.objects.all().values_list("id", flat=True).order_by("id")) == [
too_recent.id,
too_old.id,
wrong_status.id,
]

@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
def test_groups_by_project(self, mock_delete_task: MagicMock) -> None:
"""Test that groups are properly grouped by project when scheduling deletion."""
project1 = self.create_project()
project2 = self.create_project()

group1 = self.create_group(
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)
group2 = self.create_group(
project=project1, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)
group3 = self.create_group(
project=project2, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)

delete_pending_groups()

assert mock_delete_task.call_count == 2

# Verify both projects got their deletion tasks scheduled
all_calls = mock_delete_task.call_args_list
project_ids = {call.kwargs["kwargs"]["project_id"] for call in all_calls}
assert project_ids == {project1.id, project2.id}

# Verify correct groups are in each call
for call in all_calls:
call_kwargs = call.kwargs["kwargs"]
if call_kwargs["project_id"] == project1.id:
assert set(call_kwargs["object_ids"]) == {group1.id, group2.id}
elif call_kwargs["project_id"] == project2.id:
assert set(call_kwargs["object_ids"]) == {group3.id}

@patch("sentry.tasks.delete_pending_groups.GROUP_CHUNK_SIZE", new=10)
@patch("sentry.tasks.delete_pending_groups.delete_groups_for_project.apply_async")
@patch("sentry.tasks.delete_pending_groups.metrics.incr")
def test_chunks_large_batches(
self,
mock_metrics_incr: MagicMock,
mock_delete_task: MagicMock,
) -> None:
"""Test that groups are chunked according to GROUP_CHUNK_SIZE when scheduling deletion."""
GROUP_CHUNK_SIZE = 10
GROUPS_MORE_THAN_CHUNK_SIZE = 5
project = self.create_project()

# Create more groups than GROUP_CHUNK_SIZE (10 in this test)
num_groups = GROUPS_MORE_THAN_CHUNK_SIZE + GROUP_CHUNK_SIZE
for _ in range(num_groups):
self.create_group(
project=project, status=GroupStatus.PENDING_DELETION, last_seen=self._days_ago(2)
)

delete_pending_groups()

# Should be called twice: one chunk of 10 and one of 5
assert mock_delete_task.call_count == 2

# Verify first chunk has GROUP_CHUNK_SIZE groups
first_call_kwargs = mock_delete_task.call_args_list[0].kwargs["kwargs"]
assert len(first_call_kwargs["object_ids"]) == GROUP_CHUNK_SIZE

# Verify second chunk has remaining groups
second_call_kwargs = mock_delete_task.call_args_list[1].kwargs["kwargs"]
assert len(second_call_kwargs["object_ids"]) == GROUPS_MORE_THAN_CHUNK_SIZE

# Assert metrics are called with correct totals
incr_calls = mock_metrics_incr.call_args_list
incr_names = [c.args[0] for c in incr_calls]
assert "delete_pending_groups.groups_scheduled" in incr_names
assert "delete_pending_groups.tasks_scheduled" in incr_names

groups_scheduled_call = next(
c for c in incr_calls if c.args[0] == "delete_pending_groups.groups_scheduled"
)
assert groups_scheduled_call.kwargs["amount"] == num_groups

tasks_scheduled_call = next(
c for c in incr_calls if c.args[0] == "delete_pending_groups.tasks_scheduled"
)
assert tasks_scheduled_call.kwargs["amount"] == 2
Loading