From 39632bcc5ee2c4075165186d6c9681474480365e Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 17:43:19 +0100 Subject: [PATCH 01/11] ref(reprocessing): Retain issue data by migrating associated models over --- src/sentry/deletions/defaults/group.py | 65 +++++++++++++++----------- src/sentry/reprocessing2.py | 41 +++++++++++++--- src/sentry/tasks/reprocessing2.py | 3 +- 3 files changed, 72 insertions(+), 37 deletions(-) diff --git a/src/sentry/deletions/defaults/group.py b/src/sentry/deletions/defaults/group.py index d9e6479b06dbd9..d689733444fc26 100644 --- a/src/sentry/deletions/defaults/group.py +++ b/src/sentry/deletions/defaults/group.py @@ -3,12 +3,34 @@ import os from sentry import eventstore, nodestore from sentry.eventstore.models import Event -from sentry.models import EventAttachment, UserReport -from sentry.reprocessing2 import delete_unprocessed_events +from sentry import models from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation +GROUP_RELATED_MODELS = ( + # prioritize GroupHash + models.GroupHash, + models.GroupAssignee, + models.GroupCommitResolution, + models.GroupLink, + models.GroupBookmark, + models.GroupMeta, + models.GroupEnvironment, + models.GroupRelease, + models.GroupRedirect, + models.GroupResolution, + models.GroupRuleStatus, + models.GroupSeen, + models.GroupShare, + models.GroupSnooze, + models.GroupEmailThread, + models.GroupSubscription, + models.UserReport, + models.EventAttachment, +) + + class EventDataDeletionTask(BaseDeletionTask): """ Deletes nodestore data, EventAttachment and UserReports for group @@ -53,46 +75,33 @@ def chunk(self): node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events] nodestore.delete_multi(node_ids) + from sentry.reprocessing2 import delete_unprocessed_events + delete_unprocessed_events(events) - # Remove EventAttachment and UserReport + # Remove EventAttachment and UserReport *again* as those may not have a + # group ID, therefore there may be dangling ones after "regular" model + # deletion. event_ids = [event.event_id for event in events] - EventAttachment.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() - UserReport.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() + models.EventAttachment.objects.filter( + event_id__in=event_ids, project_id=self.project_id + ).delete() + models.UserReport.objects.filter( + event_id__in=event_ids, project_id=self.project_id + ).delete() return True class GroupDeletionTask(ModelDeletionTask): def get_child_relations(self, instance): - from sentry import models relations = [] - model_list = ( - # prioritize GroupHash - models.GroupHash, - models.GroupAssignee, - models.GroupCommitResolution, - models.GroupLink, - models.GroupBookmark, - models.GroupMeta, - models.GroupEnvironment, - models.GroupRelease, - models.GroupRedirect, - models.GroupResolution, - models.GroupRuleStatus, - models.GroupSeen, - models.GroupShare, - models.GroupSnooze, - models.GroupEmailThread, - models.GroupSubscription, - models.UserReport, - models.EventAttachment, + relations.extend( + [ModelRelation(m, {"group_id": instance.id}) for m in GROUP_RELATED_MODELS] ) - relations.extend([ModelRelation(m, {"group_id": instance.id}) for m in model_list]) - # Skip EventDataDeletionTask if this is being called from cleanup.py if not os.environ.get("_SENTRY_CLEANUP"): relations.extend( diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index 83b4e711e27d60..77facd64257a6a 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -9,17 +9,27 @@ from sentry import nodestore, features, eventstore from sentry.attachments import CachedAttachment, attachment_cache -from sentry.models import EventAttachment +from sentry import models from sentry.utils import snuba from sentry.utils.cache import cache_key_for_event from sentry.utils.redis import redis_clusters from sentry.eventstore.processing import event_processing_store +from sentry.deletions.defaults.group import GROUP_RELATED_MODELS logger = logging.getLogger("sentry.reprocessing") _REDIS_SYNC_TTL = 3600 +GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity,) + +RESET_GROUP_ATTRS = { + "times_seen": 0, +} + +TRANSFER_GROUP_ATTRS = ("status", "short_id") + + def _generate_unprocessed_event_node_id(project_id, event_id): return hashlib.md5( u"{}:{}:unprocessed".format(project_id, event_id).encode("utf-8") @@ -101,7 +111,7 @@ def reprocess_event(project_id, event_id, start_time): cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache - queryset = EventAttachment.objects.filter( + queryset = models.EventAttachment.objects.filter( project_id=project_id, event_id=orig_event_id ).select_related("file") @@ -204,14 +214,31 @@ def mark_event_reprocessed(data): def start_group_reprocessing(project_id, group_id, max_events=None): from sentry.models.group import Group, GroupStatus - from sentry.models.grouphash import GroupHash from django.db import transaction with transaction.atomic(): - Group.objects.filter(id=group_id).update(status=GroupStatus.REPROCESSING) - # Remove all grouphashes such that new events get sorted into a - # different group. - GroupHash.objects.filter(group_id=group_id).delete() + group = Group.objects.get(id=group_id) + transferred_group_attrs = {} + for attr in TRANSFER_GROUP_ATTRS: + transferred_group_attrs[attr] = getattr(group, attr) + + group.status = GroupStatus.REPROCESSING + group.short_id = None # satisfy unique constraint + group.save() + + group.pk = group.id = None + for attr in TRANSFER_GROUP_ATTRS: + setattr(group, attr, transferred_group_attrs[attr]) + + for attr in RESET_GROUP_ATTRS: + setattr(group, attr, RESET_GROUP_ATTRS[attr]) + + group.save() + new_group = group + del group + + for model in GROUP_MODELS_TO_MIGRATE: + model.objects.filter(group_id=group_id).update(group_id=new_group.id) # Get event counts of issue (for all environments etc). This was copypasted # and simplified from groupserializer. diff --git a/src/sentry/tasks/reprocessing2.py b/src/sentry/tasks/reprocessing2.py index ce3348eb64a8b2..1633732850fda7 100644 --- a/src/sentry/tasks/reprocessing2.py +++ b/src/sentry/tasks/reprocessing2.py @@ -18,10 +18,9 @@ def reprocess_group(project_id, group_id, offset=0, start_time=None, max_events=None): from sentry.reprocessing2 import start_group_reprocessing - start_group_reprocessing(project_id, group_id, max_events=max_events) - if start_time is None: start_time = time.time() + start_group_reprocessing(project_id, group_id, max_events=max_events) if max_events is not None and max_events <= 0: events = [] From 2df67882d0ca24817199a7b611df1ab8000e6342 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 18:09:15 +0100 Subject: [PATCH 02/11] add extra assertion and take chances with deadlocks again --- tests/sentry/tasks/test_reprocessing2.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/sentry/tasks/test_reprocessing2.py b/tests/sentry/tasks/test_reprocessing2.py index d0c69dcc57dcc1..e61f39c74ade45 100644 --- a/tests/sentry/tasks/test_reprocessing2.py +++ b/tests/sentry/tasks/test_reprocessing2.py @@ -59,7 +59,6 @@ def is_enabled(self, project=None): @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") @pytest.mark.parametrize("change_groups", (True, False), ids=("new_group", "same_group")) def test_basic( task_runner, @@ -138,7 +137,6 @@ def get_event_by_processing_counter(n): @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") def test_concurrent_events_go_into_new_group( default_project, reset_snuba, register_event_preprocessor, process_and_save, burst_task_runner ): @@ -152,6 +150,7 @@ def event_preprocessor(data): event_id = process_and_save({"message": "hello world"}) event = eventstore.get_event_by_id(default_project.id, event_id) + original_short_id = event.group.short_id with burst_task_runner() as burst_reprocess: reprocess_group(default_project.id, event.group_id) @@ -177,10 +176,11 @@ def event_preprocessor(data): assert event2.group_id == event3.group_id assert event.get_hashes() == event2.get_hashes() == event3.get_hashes() + assert event3.group.short_id == original_short_id + @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") def test_max_events( default_project, reset_snuba, From 3c76f09639448f9a5f7aa4a01551bb529c4d1fb8 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 18:15:06 +0100 Subject: [PATCH 03/11] some more assertions --- tests/sentry/tasks/test_reprocessing2.py | 25 ++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/tests/sentry/tasks/test_reprocessing2.py b/tests/sentry/tasks/test_reprocessing2.py index e61f39c74ade45..310efc02b0686d 100644 --- a/tests/sentry/tasks/test_reprocessing2.py +++ b/tests/sentry/tasks/test_reprocessing2.py @@ -6,6 +6,7 @@ from sentry import eventstore from sentry.models.group import Group +from sentry.models import GroupAssignee from sentry.event_manager import EventManager from sentry.eventstore.processing import event_processing_store from sentry.plugins.base.v2 import Plugin2 @@ -138,8 +139,19 @@ def get_event_by_processing_counter(n): @pytest.mark.django_db @pytest.mark.snuba def test_concurrent_events_go_into_new_group( - default_project, reset_snuba, register_event_preprocessor, process_and_save, burst_task_runner + default_project, + reset_snuba, + register_event_preprocessor, + process_and_save, + burst_task_runner, + default_user, ): + """ + Assert that both unmodified and concurrently inserted events go into "the + new group", i.e. the successor of the reprocessed (old) group that + inherited the group hashes. + """ + @register_event_preprocessor def event_preprocessor(data): extra = data.setdefault("extra", {}) @@ -151,6 +163,12 @@ def event_preprocessor(data): event = eventstore.get_event_by_id(default_project.id, event_id) original_short_id = event.group.short_id + assert original_short_id + original_group_id = event.group.id + + original_assignee = GroupAssignee.objects.create( + group_id=original_group_id, project=default_project, user=default_user + ) with burst_task_runner() as burst_reprocess: reprocess_group(default_project.id, event.group_id) @@ -176,7 +194,10 @@ def event_preprocessor(data): assert event2.group_id == event3.group_id assert event.get_hashes() == event2.get_hashes() == event3.get_hashes() - assert event3.group.short_id == original_short_id + group = event3.group + + assert group.short_id == original_short_id + assert GroupAssignee.objects.get(group=group) == original_assignee @pytest.mark.django_db From 24de9cf3187895e8cc505bcca135caf48571f8f5 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 18:37:58 +0100 Subject: [PATCH 04/11] add external issue model and add group redirect --- src/sentry/reprocessing2.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index 77facd64257a6a..bec9350534f68c 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -21,7 +21,7 @@ _REDIS_SYNC_TTL = 3600 -GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity,) +GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity, models.ExternalIssue,) RESET_GROUP_ATTRS = { "times_seen": 0, @@ -213,16 +213,15 @@ def mark_event_reprocessed(data): def start_group_reprocessing(project_id, group_id, max_events=None): - from sentry.models.group import Group, GroupStatus from django.db import transaction with transaction.atomic(): - group = Group.objects.get(id=group_id) + group = models.Group.objects.get(id=group_id) transferred_group_attrs = {} for attr in TRANSFER_GROUP_ATTRS: transferred_group_attrs[attr] = getattr(group, attr) - group.status = GroupStatus.REPROCESSING + group.status = models.GroupStatus.REPROCESSING group.short_id = None # satisfy unique constraint group.save() @@ -240,6 +239,10 @@ def start_group_reprocessing(project_id, group_id, max_events=None): for model in GROUP_MODELS_TO_MIGRATE: model.objects.filter(group_id=group_id).update(group_id=new_group.id) + models.GroupRedirect.objects.create( + organization_id=new_group.organization_id, group_id=new_group.id, previous_group_id=group_id + ) + # Get event counts of issue (for all environments etc). This was copypasted # and simplified from groupserializer. event_count = snuba.aliased_query( From 8f270816a1add2affd7dcf55b5c55205394709d7 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 18:52:53 +0100 Subject: [PATCH 05/11] add reprocessing activity --- src/sentry/api/endpoints/group_reprocessing.py | 7 ++++++- src/sentry/models/activity.py | 3 +++ src/sentry/reprocessing2.py | 17 ++++++++++++++--- src/sentry/tasks/reprocessing2.py | 4 +++- tests/sentry/tasks/test_reprocessing2.py | 6 ++++-- 5 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/sentry/api/endpoints/group_reprocessing.py b/src/sentry/api/endpoints/group_reprocessing.py index 4e85b5d8bc9c60..99d85af10f83bc 100644 --- a/src/sentry/api/endpoints/group_reprocessing.py +++ b/src/sentry/api/endpoints/group_reprocessing.py @@ -34,5 +34,10 @@ def post(self, request, group): else: max_events = None - reprocess_group.delay(project_id=group.project_id, group_id=group.id, max_events=max_events) + reprocess_group.delay( + project_id=group.project_id, + group_id=group.id, + max_events=max_events, + acting_user_id=request.user.id, + ) return self.respond(status=200) diff --git a/src/sentry/models/activity.py b/src/sentry/models/activity.py index 380d1f63acc3f2..934a519679c2ee 100644 --- a/src/sentry/models/activity.py +++ b/src/sentry/models/activity.py @@ -40,6 +40,7 @@ class Activity(Model): UNMERGE_SOURCE = 19 UNMERGE_DESTINATION = 20 SET_RESOLVED_IN_PULL_REQUEST = 21 + REPROCESS = 22 TYPE = ( # (TYPE, verb-slug) @@ -64,6 +65,8 @@ class Activity(Model): (NEW_PROCESSING_ISSUES, u"new_processing_issues"), (UNMERGE_SOURCE, u"unmerge_source"), (UNMERGE_DESTINATION, u"unmerge_destination"), + # The user has reprocessed the group, so events may have moved to new groups + (REPROCESS, u"reprocess"), ) project = FlexibleForeignKey("sentry.Project") diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index bec9350534f68c..743a4b8ae703a2 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -4,6 +4,7 @@ import hashlib import logging import sentry_sdk +import six from django.conf import settings @@ -21,7 +22,7 @@ _REDIS_SYNC_TTL = 3600 -GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity, models.ExternalIssue,) +GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity,) RESET_GROUP_ATTRS = { "times_seen": 0, @@ -212,7 +213,7 @@ def mark_event_reprocessed(data): _get_sync_redis_client().decr(key) -def start_group_reprocessing(project_id, group_id, max_events=None): +def start_group_reprocessing(project_id, group_id, max_events=None, acting_user_id=None): from django.db import transaction with transaction.atomic(): @@ -240,7 +241,17 @@ def start_group_reprocessing(project_id, group_id, max_events=None): model.objects.filter(group_id=group_id).update(group_id=new_group.id) models.GroupRedirect.objects.create( - organization_id=new_group.organization_id, group_id=new_group.id, previous_group_id=group_id + organization_id=new_group.project.organization_id, + group_id=new_group.id, + previous_group_id=group_id, + ) + + models.Activity.objects.create( + type=models.Activity.REPROCESS, + project=new_group.project, + ident=six.text_type(group_id), + group=new_group, + user_id=acting_user_id, ) # Get event counts of issue (for all environments etc). This was copypasted diff --git a/src/sentry/tasks/reprocessing2.py b/src/sentry/tasks/reprocessing2.py index 1633732850fda7..d0d823090eda7f 100644 --- a/src/sentry/tasks/reprocessing2.py +++ b/src/sentry/tasks/reprocessing2.py @@ -15,7 +15,9 @@ time_limit=120, soft_time_limit=110, ) -def reprocess_group(project_id, group_id, offset=0, start_time=None, max_events=None): +def reprocess_group( + project_id, group_id, offset=0, start_time=None, max_events=None, acting_user_id=None +): from sentry.reprocessing2 import start_group_reprocessing if start_time is None: diff --git a/tests/sentry/tasks/test_reprocessing2.py b/tests/sentry/tasks/test_reprocessing2.py index 310efc02b0686d..949fa92444734c 100644 --- a/tests/sentry/tasks/test_reprocessing2.py +++ b/tests/sentry/tasks/test_reprocessing2.py @@ -3,10 +3,10 @@ from time import time import pytest import uuid +import six from sentry import eventstore -from sentry.models.group import Group -from sentry.models import GroupAssignee +from sentry.models import Group, GroupAssignee, Activity from sentry.event_manager import EventManager from sentry.eventstore.processing import event_processing_store from sentry.plugins.base.v2 import Plugin2 @@ -198,6 +198,8 @@ def event_preprocessor(data): assert group.short_id == original_short_id assert GroupAssignee.objects.get(group=group) == original_assignee + activity = Activity.objects.get(group=group, type=Activity.REPROCESS) + assert activity.ident == six.text_type(original_group_id) @pytest.mark.django_db From d96205d2ed22f4057c96760fc9f671b4603d1a8f Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 19:07:56 +0100 Subject: [PATCH 06/11] fix up reprocessing activity --- .../app/views/organizationGroupDetails/groupActivityItem.jsx | 5 +++++ src/sentry/tasks/reprocessing2.py | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx b/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx index 56ba85bc42ce0c..97b0701df0aff4 100644 --- a/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx +++ b/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx @@ -181,6 +181,11 @@ class GroupActivityItem extends React.Component { data.issues.length, author ); + case 'reprocess': + return t( + '%(author)s reprocessed this issue, some events may have moved into new issues', + {author} + ); default: return ''; // should never hit (?) } diff --git a/src/sentry/tasks/reprocessing2.py b/src/sentry/tasks/reprocessing2.py index d0d823090eda7f..cd1f0d1b899fb6 100644 --- a/src/sentry/tasks/reprocessing2.py +++ b/src/sentry/tasks/reprocessing2.py @@ -22,7 +22,9 @@ def reprocess_group( if start_time is None: start_time = time.time() - start_group_reprocessing(project_id, group_id, max_events=max_events) + start_group_reprocessing( + project_id, group_id, max_events=max_events, acting_user_id=acting_user_id + ) if max_events is not None and max_events <= 0: events = [] From 877408cd9c50ea03772f3ca3f16183d827570a25 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 19:48:52 +0100 Subject: [PATCH 07/11] add migration --- migrations_lockfile.txt | 2 +- .../migrations/0117_dummy-activityupdate.py | 38 +++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) create mode 100644 src/sentry/migrations/0117_dummy-activityupdate.py diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index ca254f58158df7..e277c24579303f 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -10,7 +10,7 @@ auth: 0008_alter_user_username_max_length contenttypes: 0002_remove_content_type_name jira_ac: 0001_initial nodestore: 0001_initial -sentry: 0116_backfill_debug_file_checksum +sentry: 0117_dummy-activityupdate sessions: 0001_initial sites: 0002_alter_domain_unique social_auth: 0001_initial diff --git a/src/sentry/migrations/0117_dummy-activityupdate.py b/src/sentry/migrations/0117_dummy-activityupdate.py new file mode 100644 index 00000000000000..03933f6503fa60 --- /dev/null +++ b/src/sentry/migrations/0117_dummy-activityupdate.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.28 on 2020-10-27 18:48 +from __future__ import unicode_literals + +from django.db import migrations +import sentry.db.models.fields.bounded + + +class Migration(migrations.Migration): + # This flag is used to mark that a migration shouldn't be automatically run in + # production. We set this to True for operations that we think are risky and want + # someone from ops to run manually and monitor. + # General advice is that if in doubt, mark your migration as `is_dangerous`. + # Some things you should always mark as dangerous: + # - Large data migrations. Typically we want these to be run manually by ops so that + # they can be monitored. Since data migrations will now hold a transaction open + # this is even more important. + # - Adding columns to highly active tables, even ones that are NULL. + is_dangerous = False + + # This flag is used to decide whether to run this migration in a transaction or not. + # By default we prefer to run in a transaction, but for migrations where you want + # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll + # want to create an index concurrently when adding one to an existing table. + atomic = True + + + dependencies = [ + ('sentry', '0116_backfill_debug_file_checksum'), + ] + + operations = [ + migrations.AlterField( + model_name='activity', + name='type', + field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(choices=[(1, 'set_resolved'), (15, 'set_resolved_by_age'), (13, 'set_resolved_in_release'), (16, 'set_resolved_in_commit'), (21, 'set_resolved_in_pull_request'), (2, 'set_unresolved'), (3, 'set_ignored'), (4, 'set_public'), (5, 'set_private'), (6, 'set_regression'), (7, 'create_issue'), (8, 'note'), (9, 'first_seen'), (10, 'release'), (11, 'assigned'), (12, 'unassigned'), (14, 'merge'), (17, 'deploy'), (18, 'new_processing_issues'), (19, 'unmerge_source'), (20, 'unmerge_destination'), (22, 'reprocess')]), + ), + ] From 819ac425fcc2e1acd12ecf338b610c9e362dfd36 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Tue, 27 Oct 2020 20:11:52 +0100 Subject: [PATCH 08/11] literally empty migration --- src/sentry/migrations/0117_dummy-activityupdate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/migrations/0117_dummy-activityupdate.py b/src/sentry/migrations/0117_dummy-activityupdate.py index 03933f6503fa60..90ab6ab16883be 100644 --- a/src/sentry/migrations/0117_dummy-activityupdate.py +++ b/src/sentry/migrations/0117_dummy-activityupdate.py @@ -22,7 +22,7 @@ class Migration(migrations.Migration): # By default we prefer to run in a transaction, but for migrations where you want # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll # want to create an index concurrently when adding one to an existing table. - atomic = True + atomic = False dependencies = [ From 5b1c8c00c653da52f6bc82d8ba3335c69e2b82d4 Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 28 Oct 2020 12:03:31 +0100 Subject: [PATCH 09/11] apply review feedback --- src/sentry/reprocessing2.py | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index 743a4b8ae703a2..ed15480c339f87 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -24,12 +24,6 @@ GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity,) -RESET_GROUP_ATTRS = { - "times_seen": 0, -} - -TRANSFER_GROUP_ATTRS = ("status", "short_id") - def _generate_unprocessed_event_node_id(project_id, event_id): return hashlib.md5( @@ -218,24 +212,23 @@ def start_group_reprocessing(project_id, group_id, max_events=None, acting_user_ with transaction.atomic(): group = models.Group.objects.get(id=group_id) - transferred_group_attrs = {} - for attr in TRANSFER_GROUP_ATTRS: - transferred_group_attrs[attr] = getattr(group, attr) - + original_status = group.status + original_short_id = group.short_id group.status = models.GroupStatus.REPROCESSING - group.short_id = None # satisfy unique constraint + # satisfy unique constraint of (project_id, short_id) + group.short_id = None group.save() + # Create a duplicate row that has the same attributes by nulling out + # the primary key and saving group.pk = group.id = None - for attr in TRANSFER_GROUP_ATTRS: - setattr(group, attr, transferred_group_attrs[attr]) - - for attr in RESET_GROUP_ATTRS: - setattr(group, attr, RESET_GROUP_ATTRS[attr]) - - group.save() - new_group = group + new_group = group # rename variable just to avoid confusion del group + new_group.status = original_status + new_group.short_id = original_short_id + # this will be incremented by the events that are reprocessed + new_group.times_seen = 0 + new_group.save() for model in GROUP_MODELS_TO_MIGRATE: model.objects.filter(group_id=group_id).update(group_id=new_group.id) From 88b1311fdb76d7d027701eeac15f150b54d4b2ea Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 28 Oct 2020 12:05:12 +0100 Subject: [PATCH 10/11] safeguard access to request.user --- src/sentry/api/endpoints/group_reprocessing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/sentry/api/endpoints/group_reprocessing.py b/src/sentry/api/endpoints/group_reprocessing.py index 99d85af10f83bc..8034bd2f656888 100644 --- a/src/sentry/api/endpoints/group_reprocessing.py +++ b/src/sentry/api/endpoints/group_reprocessing.py @@ -38,6 +38,6 @@ def post(self, request, group): project_id=group.project_id, group_id=group.id, max_events=max_events, - acting_user_id=request.user.id, + acting_user_id=getattr(request.user, "id", None), ) return self.respond(status=200) From 3cbbfda81686959f24bbf1444d76520c39e8522b Mon Sep 17 00:00:00 2001 From: Markus Unterwaditzer Date: Wed, 28 Oct 2020 12:12:17 +0100 Subject: [PATCH 11/11] add extra comment --- src/sentry/reprocessing2.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index ed15480c339f87..6d37bc6b217118 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -216,6 +216,8 @@ def start_group_reprocessing(project_id, group_id, max_events=None, acting_user_ original_short_id = group.short_id group.status = models.GroupStatus.REPROCESSING # satisfy unique constraint of (project_id, short_id) + # we manually tested that multiple groups with (project_id=1, + # short_id=null) can exist in postgres group.short_id = None group.save()