diff --git a/migrations_lockfile.txt b/migrations_lockfile.txt index ca254f58158df7..e277c24579303f 100644 --- a/migrations_lockfile.txt +++ b/migrations_lockfile.txt @@ -10,7 +10,7 @@ auth: 0008_alter_user_username_max_length contenttypes: 0002_remove_content_type_name jira_ac: 0001_initial nodestore: 0001_initial -sentry: 0116_backfill_debug_file_checksum +sentry: 0117_dummy-activityupdate sessions: 0001_initial sites: 0002_alter_domain_unique social_auth: 0001_initial diff --git a/src/sentry/api/endpoints/group_reprocessing.py b/src/sentry/api/endpoints/group_reprocessing.py index 4e85b5d8bc9c60..8034bd2f656888 100644 --- a/src/sentry/api/endpoints/group_reprocessing.py +++ b/src/sentry/api/endpoints/group_reprocessing.py @@ -34,5 +34,10 @@ def post(self, request, group): else: max_events = None - reprocess_group.delay(project_id=group.project_id, group_id=group.id, max_events=max_events) + reprocess_group.delay( + project_id=group.project_id, + group_id=group.id, + max_events=max_events, + acting_user_id=getattr(request.user, "id", None), + ) return self.respond(status=200) diff --git a/src/sentry/deletions/defaults/group.py b/src/sentry/deletions/defaults/group.py index d9e6479b06dbd9..d689733444fc26 100644 --- a/src/sentry/deletions/defaults/group.py +++ b/src/sentry/deletions/defaults/group.py @@ -3,12 +3,34 @@ import os from sentry import eventstore, nodestore from sentry.eventstore.models import Event -from sentry.models import EventAttachment, UserReport -from sentry.reprocessing2 import delete_unprocessed_events +from sentry import models from ..base import BaseDeletionTask, BaseRelation, ModelDeletionTask, ModelRelation +GROUP_RELATED_MODELS = ( + # prioritize GroupHash + models.GroupHash, + models.GroupAssignee, + models.GroupCommitResolution, + models.GroupLink, + models.GroupBookmark, + models.GroupMeta, + models.GroupEnvironment, + models.GroupRelease, + models.GroupRedirect, + models.GroupResolution, + models.GroupRuleStatus, + models.GroupSeen, + models.GroupShare, + models.GroupSnooze, + models.GroupEmailThread, + models.GroupSubscription, + models.UserReport, + models.EventAttachment, +) + + class EventDataDeletionTask(BaseDeletionTask): """ Deletes nodestore data, EventAttachment and UserReports for group @@ -53,46 +75,33 @@ def chunk(self): node_ids = [Event.generate_node_id(self.project_id, event.event_id) for event in events] nodestore.delete_multi(node_ids) + from sentry.reprocessing2 import delete_unprocessed_events + delete_unprocessed_events(events) - # Remove EventAttachment and UserReport + # Remove EventAttachment and UserReport *again* as those may not have a + # group ID, therefore there may be dangling ones after "regular" model + # deletion. event_ids = [event.event_id for event in events] - EventAttachment.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() - UserReport.objects.filter(event_id__in=event_ids, project_id=self.project_id).delete() + models.EventAttachment.objects.filter( + event_id__in=event_ids, project_id=self.project_id + ).delete() + models.UserReport.objects.filter( + event_id__in=event_ids, project_id=self.project_id + ).delete() return True class GroupDeletionTask(ModelDeletionTask): def get_child_relations(self, instance): - from sentry import models relations = [] - model_list = ( - # prioritize GroupHash - models.GroupHash, - models.GroupAssignee, - models.GroupCommitResolution, - models.GroupLink, - models.GroupBookmark, - models.GroupMeta, - models.GroupEnvironment, - models.GroupRelease, - models.GroupRedirect, - models.GroupResolution, - models.GroupRuleStatus, - models.GroupSeen, - models.GroupShare, - models.GroupSnooze, - models.GroupEmailThread, - models.GroupSubscription, - models.UserReport, - models.EventAttachment, + relations.extend( + [ModelRelation(m, {"group_id": instance.id}) for m in GROUP_RELATED_MODELS] ) - relations.extend([ModelRelation(m, {"group_id": instance.id}) for m in model_list]) - # Skip EventDataDeletionTask if this is being called from cleanup.py if not os.environ.get("_SENTRY_CLEANUP"): relations.extend( diff --git a/src/sentry/migrations/0117_dummy-activityupdate.py b/src/sentry/migrations/0117_dummy-activityupdate.py new file mode 100644 index 00000000000000..90ab6ab16883be --- /dev/null +++ b/src/sentry/migrations/0117_dummy-activityupdate.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.28 on 2020-10-27 18:48 +from __future__ import unicode_literals + +from django.db import migrations +import sentry.db.models.fields.bounded + + +class Migration(migrations.Migration): + # This flag is used to mark that a migration shouldn't be automatically run in + # production. We set this to True for operations that we think are risky and want + # someone from ops to run manually and monitor. + # General advice is that if in doubt, mark your migration as `is_dangerous`. + # Some things you should always mark as dangerous: + # - Large data migrations. Typically we want these to be run manually by ops so that + # they can be monitored. Since data migrations will now hold a transaction open + # this is even more important. + # - Adding columns to highly active tables, even ones that are NULL. + is_dangerous = False + + # This flag is used to decide whether to run this migration in a transaction or not. + # By default we prefer to run in a transaction, but for migrations where you want + # to `CREATE INDEX CONCURRENTLY` this needs to be set to False. Typically you'll + # want to create an index concurrently when adding one to an existing table. + atomic = False + + + dependencies = [ + ('sentry', '0116_backfill_debug_file_checksum'), + ] + + operations = [ + migrations.AlterField( + model_name='activity', + name='type', + field=sentry.db.models.fields.bounded.BoundedPositiveIntegerField(choices=[(1, 'set_resolved'), (15, 'set_resolved_by_age'), (13, 'set_resolved_in_release'), (16, 'set_resolved_in_commit'), (21, 'set_resolved_in_pull_request'), (2, 'set_unresolved'), (3, 'set_ignored'), (4, 'set_public'), (5, 'set_private'), (6, 'set_regression'), (7, 'create_issue'), (8, 'note'), (9, 'first_seen'), (10, 'release'), (11, 'assigned'), (12, 'unassigned'), (14, 'merge'), (17, 'deploy'), (18, 'new_processing_issues'), (19, 'unmerge_source'), (20, 'unmerge_destination'), (22, 'reprocess')]), + ), + ] diff --git a/src/sentry/models/activity.py b/src/sentry/models/activity.py index 380d1f63acc3f2..934a519679c2ee 100644 --- a/src/sentry/models/activity.py +++ b/src/sentry/models/activity.py @@ -40,6 +40,7 @@ class Activity(Model): UNMERGE_SOURCE = 19 UNMERGE_DESTINATION = 20 SET_RESOLVED_IN_PULL_REQUEST = 21 + REPROCESS = 22 TYPE = ( # (TYPE, verb-slug) @@ -64,6 +65,8 @@ class Activity(Model): (NEW_PROCESSING_ISSUES, u"new_processing_issues"), (UNMERGE_SOURCE, u"unmerge_source"), (UNMERGE_DESTINATION, u"unmerge_destination"), + # The user has reprocessed the group, so events may have moved to new groups + (REPROCESS, u"reprocess"), ) project = FlexibleForeignKey("sentry.Project") diff --git a/src/sentry/reprocessing2.py b/src/sentry/reprocessing2.py index 83b4e711e27d60..6d37bc6b217118 100644 --- a/src/sentry/reprocessing2.py +++ b/src/sentry/reprocessing2.py @@ -4,22 +4,27 @@ import hashlib import logging import sentry_sdk +import six from django.conf import settings from sentry import nodestore, features, eventstore from sentry.attachments import CachedAttachment, attachment_cache -from sentry.models import EventAttachment +from sentry import models from sentry.utils import snuba from sentry.utils.cache import cache_key_for_event from sentry.utils.redis import redis_clusters from sentry.eventstore.processing import event_processing_store +from sentry.deletions.defaults.group import GROUP_RELATED_MODELS logger = logging.getLogger("sentry.reprocessing") _REDIS_SYNC_TTL = 3600 +GROUP_MODELS_TO_MIGRATE = GROUP_RELATED_MODELS + (models.Activity,) + + def _generate_unprocessed_event_node_id(project_id, event_id): return hashlib.md5( u"{}:{}:unprocessed".format(project_id, event_id).encode("utf-8") @@ -101,7 +106,7 @@ def reprocess_event(project_id, event_id, start_time): cache_key = event_processing_store.store(data) # Step 2: Copy attachments into attachment cache - queryset = EventAttachment.objects.filter( + queryset = models.EventAttachment.objects.filter( project_id=project_id, event_id=orig_event_id ).select_related("file") @@ -202,16 +207,47 @@ def mark_event_reprocessed(data): _get_sync_redis_client().decr(key) -def start_group_reprocessing(project_id, group_id, max_events=None): - from sentry.models.group import Group, GroupStatus - from sentry.models.grouphash import GroupHash +def start_group_reprocessing(project_id, group_id, max_events=None, acting_user_id=None): from django.db import transaction with transaction.atomic(): - Group.objects.filter(id=group_id).update(status=GroupStatus.REPROCESSING) - # Remove all grouphashes such that new events get sorted into a - # different group. - GroupHash.objects.filter(group_id=group_id).delete() + group = models.Group.objects.get(id=group_id) + original_status = group.status + original_short_id = group.short_id + group.status = models.GroupStatus.REPROCESSING + # satisfy unique constraint of (project_id, short_id) + # we manually tested that multiple groups with (project_id=1, + # short_id=null) can exist in postgres + group.short_id = None + group.save() + + # Create a duplicate row that has the same attributes by nulling out + # the primary key and saving + group.pk = group.id = None + new_group = group # rename variable just to avoid confusion + del group + new_group.status = original_status + new_group.short_id = original_short_id + # this will be incremented by the events that are reprocessed + new_group.times_seen = 0 + new_group.save() + + for model in GROUP_MODELS_TO_MIGRATE: + model.objects.filter(group_id=group_id).update(group_id=new_group.id) + + models.GroupRedirect.objects.create( + organization_id=new_group.project.organization_id, + group_id=new_group.id, + previous_group_id=group_id, + ) + + models.Activity.objects.create( + type=models.Activity.REPROCESS, + project=new_group.project, + ident=six.text_type(group_id), + group=new_group, + user_id=acting_user_id, + ) # Get event counts of issue (for all environments etc). This was copypasted # and simplified from groupserializer. diff --git a/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx b/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx index 56ba85bc42ce0c..97b0701df0aff4 100644 --- a/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx +++ b/src/sentry/static/sentry/app/views/organizationGroupDetails/groupActivityItem.jsx @@ -181,6 +181,11 @@ class GroupActivityItem extends React.Component { data.issues.length, author ); + case 'reprocess': + return t( + '%(author)s reprocessed this issue, some events may have moved into new issues', + {author} + ); default: return ''; // should never hit (?) } diff --git a/src/sentry/tasks/reprocessing2.py b/src/sentry/tasks/reprocessing2.py index ce3348eb64a8b2..cd1f0d1b899fb6 100644 --- a/src/sentry/tasks/reprocessing2.py +++ b/src/sentry/tasks/reprocessing2.py @@ -15,13 +15,16 @@ time_limit=120, soft_time_limit=110, ) -def reprocess_group(project_id, group_id, offset=0, start_time=None, max_events=None): +def reprocess_group( + project_id, group_id, offset=0, start_time=None, max_events=None, acting_user_id=None +): from sentry.reprocessing2 import start_group_reprocessing - start_group_reprocessing(project_id, group_id, max_events=max_events) - if start_time is None: start_time = time.time() + start_group_reprocessing( + project_id, group_id, max_events=max_events, acting_user_id=acting_user_id + ) if max_events is not None and max_events <= 0: events = [] diff --git a/tests/sentry/tasks/test_reprocessing2.py b/tests/sentry/tasks/test_reprocessing2.py index d0c69dcc57dcc1..949fa92444734c 100644 --- a/tests/sentry/tasks/test_reprocessing2.py +++ b/tests/sentry/tasks/test_reprocessing2.py @@ -3,9 +3,10 @@ from time import time import pytest import uuid +import six from sentry import eventstore -from sentry.models.group import Group +from sentry.models import Group, GroupAssignee, Activity from sentry.event_manager import EventManager from sentry.eventstore.processing import event_processing_store from sentry.plugins.base.v2 import Plugin2 @@ -59,7 +60,6 @@ def is_enabled(self, project=None): @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") @pytest.mark.parametrize("change_groups", (True, False), ids=("new_group", "same_group")) def test_basic( task_runner, @@ -138,10 +138,20 @@ def get_event_by_processing_counter(n): @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") def test_concurrent_events_go_into_new_group( - default_project, reset_snuba, register_event_preprocessor, process_and_save, burst_task_runner + default_project, + reset_snuba, + register_event_preprocessor, + process_and_save, + burst_task_runner, + default_user, ): + """ + Assert that both unmodified and concurrently inserted events go into "the + new group", i.e. the successor of the reprocessed (old) group that + inherited the group hashes. + """ + @register_event_preprocessor def event_preprocessor(data): extra = data.setdefault("extra", {}) @@ -152,6 +162,13 @@ def event_preprocessor(data): event_id = process_and_save({"message": "hello world"}) event = eventstore.get_event_by_id(default_project.id, event_id) + original_short_id = event.group.short_id + assert original_short_id + original_group_id = event.group.id + + original_assignee = GroupAssignee.objects.create( + group_id=original_group_id, project=default_project, user=default_user + ) with burst_task_runner() as burst_reprocess: reprocess_group(default_project.id, event.group_id) @@ -177,10 +194,16 @@ def event_preprocessor(data): assert event2.group_id == event3.group_id assert event.get_hashes() == event2.get_hashes() == event3.get_hashes() + group = event3.group + + assert group.short_id == original_short_id + assert GroupAssignee.objects.get(group=group) == original_assignee + activity = Activity.objects.get(group=group, type=Activity.REPROCESS) + assert activity.ident == six.text_type(original_group_id) + @pytest.mark.django_db @pytest.mark.snuba -@pytest.mark.skip(reason="Some of these tests deadlock on CI") def test_max_events( default_project, reset_snuba,