-
-
Couldn't load subscription status.
- Fork 4.5k
fix(deletions): Prevent timeouts when deleting GroupHash records #101720
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -11,11 +11,13 @@ | |
| from sentry.issues.grouptype import GroupCategory, InvalidGroupTypeError | ||
| from sentry.models.group import Group, GroupStatus | ||
| from sentry.models.grouphash import GroupHash | ||
| from sentry.models.grouphashmetadata import GroupHashMetadata | ||
| from sentry.models.rulefirehistory import RuleFireHistory | ||
| from sentry.notifications.models.notificationmessage import NotificationMessage | ||
| from sentry.services.eventstore.models import Event | ||
| from sentry.snuba.dataset import Dataset | ||
| from sentry.tasks.delete_seer_grouping_records import may_schedule_task_to_delete_hashes_from_seer | ||
| from sentry.utils import metrics | ||
| from sentry.utils.query import RangeQuerySetWrapper | ||
|
|
||
| from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation | ||
|
|
@@ -256,10 +258,38 @@ def delete_group_hashes( | |
| logger.warning("Error scheduling task to delete hashes from seer") | ||
| finally: | ||
| hash_ids = [gh[0] for gh in hashes_chunk] | ||
| if options.get("deletions.group.delete_group_hashes_metadata_first"): | ||
| # If we delete the grouphash metadata rows first we will not need to update the references to the other grouphashes. | ||
| # If we try to delete the group hashes first, then it will require the updating of the columns first. | ||
| # | ||
| # To understand this, let's say we have the following relationships: | ||
| # gh A -> ghm A -> no reference to another grouphash | ||
| # gh B -> ghm B -> gh C | ||
| # gh C -> ghm C -> gh A | ||
| # | ||
| # Deleting group hashes A, B & C (since they all point to the same group) will require: | ||
| # * Updating columns ghmB & ghmC to point to None | ||
| # * Deleting the group hash metadata rows | ||
| # * Deleting the group hashes | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Before this PR, the current approach is this:
|
||
| # | ||
| # If we delete the metadata first, we will not need to update the columns before deleting them. | ||
| try: | ||
| GroupHashMetadata.objects.filter(grouphash_id__in=hash_ids).delete() | ||
| except Exception: | ||
| # XXX: Let's make sure that no issues are caused by this and then remove it | ||
| logger.exception("Error deleting group hash metadata") | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Once I enable the option, I would like to know if any problems are caused by this while falling back to the original behaviour rather than completely aborting the process. |
||
|
|
||
| GroupHash.objects.filter(id__in=hash_ids).delete() | ||
|
|
||
| iterations += 1 | ||
|
|
||
| if iterations == GROUP_HASH_ITERATIONS: | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Drive-by metric. |
||
| metrics.incr("deletions.group_hashes.max_iterations_reached", sample_rate=1.0) | ||
| logger.warning( | ||
| "Group hashes batch deletion reached the maximum number of iterations. " | ||
| "Investigate if we need to change the GROUP_HASH_ITERATIONS value." | ||
| ) | ||
|
|
||
|
|
||
| def separate_by_group_category(instance_list: Sequence[Group]) -> tuple[list[Group], list[Group]]: | ||
| error_groups = [] | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -53,7 +53,8 @@ def metadata(self) -> GroupHashMetadata | None: | |
| except AttributeError: | ||
| return None | ||
|
|
||
| __repr__ = sane_repr("group_id", "hash") | ||
| __repr__ = sane_repr("group_id", "hash", "metadata") | ||
| __str__ = __repr__ | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It makes print statements during debugging actually useful. |
||
|
|
||
| def get_associated_fingerprint(self) -> list[str] | None: | ||
| """ | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -338,7 +338,14 @@ | |
| # Deletions | ||
| register( | ||
| "deletions.group-hashes-batch-size", | ||
| default=10000, | ||
| default=100, | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
| type=Int, | ||
| flags=FLAG_AUTOMATOR_MODIFIABLE, | ||
| ) | ||
| register( | ||
| "deletions.group.delete_group_hashes_metadata_first", | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This will control the new behaviour. |
||
| default=False, | ||
| type=Bool, | ||
| flags=FLAG_AUTOMATOR_MODIFIABLE, | ||
| ) | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,7 +7,7 @@ | |
|
|
||
| from snuba_sdk import Column, Condition, Entity, Function, Op, Query, Request | ||
|
|
||
| from sentry import nodestore | ||
| from sentry import deletions, nodestore | ||
| from sentry.deletions.defaults.group import ErrorEventsDeletionTask | ||
| from sentry.deletions.tasks.groups import delete_groups_for_project | ||
| from sentry.issues.grouptype import FeedbackGroup, GroupCategory | ||
|
|
@@ -16,6 +16,7 @@ | |
| from sentry.models.group import Group | ||
| from sentry.models.groupassignee import GroupAssignee | ||
| from sentry.models.grouphash import GroupHash | ||
| from sentry.models.grouphashmetadata import GroupHashMetadata | ||
| from sentry.models.grouphistory import GroupHistory, GroupHistoryStatus | ||
| from sentry.models.groupmeta import GroupMeta | ||
| from sentry.models.groupredirect import GroupRedirect | ||
|
|
@@ -254,6 +255,73 @@ def test_invalid_group_type_handling( | |
| "args": [self.project.id, error_group_hashes, 0] | ||
| } | ||
|
|
||
| def test_delete_grouphashes_and_metadata(self) -> None: | ||
| """ | ||
| Test that when deleting group hashes, the group hash metadata is deleted first and the references to the other group hashes are updated. | ||
| """ | ||
| # This enables checking Seer for similarity and to mock the call to return a specific grouphash | ||
| self.project.update_option("sentry:similarity_backfill_completed", int(time())) | ||
|
|
||
| # Create two events/grouphashes and one of them | ||
| # Event A will be deleted | ||
| event_a = self.store_event( | ||
| data={ | ||
| "platform": "python", | ||
| "stacktrace": {"frames": [{"filename": "error_a.py"}]}, | ||
| }, | ||
| project_id=self.project.id, | ||
| ) | ||
| grouphash_a = GroupHash.objects.get(group_id=event_a.group_id) | ||
| assert grouphash_a.metadata is not None | ||
| assert grouphash_a.metadata.seer_matched_grouphash is None | ||
| metadata_a_id = grouphash_a.metadata.id | ||
armenzg marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| with mock.patch( | ||
| "sentry.grouping.ingest.seer.get_seer_similar_issues" | ||
| ) as mock_get_seer_similar_issues: | ||
| # This will allow grouphash_b to be matched to grouphash_a by Seer | ||
| mock_get_seer_similar_issues.return_value = (0.01, grouphash_a) | ||
|
|
||
| # Event B will be kept - different exception to ensure different group hash to grouphash_a | ||
| event_b = self.store_event( | ||
| data={ | ||
| "platform": "python", | ||
| "stacktrace": {"frames": [{"filename": "error_b.py"}]}, | ||
| }, | ||
| project_id=self.project.id, | ||
| ) | ||
| grouphash_b = GroupHash.objects.get(hash=event_b.get_primary_hash()) | ||
| # assert grouphash_b.metadata is not None | ||
| assert grouphash_b.metadata is not None | ||
| metadata_b_id = grouphash_b.metadata.id | ||
|
|
||
| # Verify that seer matched event_b to event_a's hash | ||
| assert event_a.group_id == event_b.group_id | ||
| # Make sure it has not changed | ||
| assert grouphash_a.metadata is not None | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The django ORM record won't change even if the underlying db record was changed. You could add There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I forgot to push that change. I have it locally. I will make it part of my next PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I added this in #101796 |
||
| assert grouphash_a.metadata.seer_matched_grouphash is None | ||
| assert grouphash_b.metadata is not None | ||
| assert grouphash_b.metadata.seer_matched_grouphash == grouphash_a | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This verifies that we have the column set to the first group hash. Not that we end up doing the update statement, however, I want to verify that we're testing the same code path. |
||
|
|
||
| with self.tasks(): | ||
| # It will delete all groups, group hashes and group hash metadata | ||
| task = deletions.get(model=Group, query={"id__in": [event_a.group_id]}) | ||
| more = task.chunk() | ||
| assert not more | ||
|
|
||
| assert not Group.objects.filter(id=event_a.group_id).exists() | ||
| assert not GroupHash.objects.filter(id=grouphash_a.id).exists() | ||
| assert not GroupHashMetadata.objects.filter(id=metadata_a_id).exists() | ||
| assert not GroupHash.objects.filter(id=grouphash_b.id).exists() | ||
| assert not GroupHashMetadata.objects.filter(id=metadata_b_id).exists() | ||
|
|
||
| def test_delete_grouphashes_and_metadata_and_metadata_but_delete_metadata_first(self) -> None: | ||
| """ | ||
| Test that when deleting group hashes, the group hash metadata is deleted first (which will not update the references to the other group hashes) | ||
| """ | ||
| with self.options({"deletions.group.delete_group_hashes_metadata_first": True}): | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The two tests are functionally the same. |
||
| self.test_delete_grouphashes_and_metadata() | ||
|
|
||
|
|
||
| class DeleteIssuePlatformTest(TestCase, SnubaTestCase, OccurrenceTestMixin): | ||
| referrer = Referrer.TESTING_TEST.value | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This function is called as this:
delete_group_hashes(project_id, group_ids), thus, all group hashes point to the same group.