diff --git a/src/sentry/event_manager.py b/src/sentry/event_manager.py index 3362dfd1d89e3a..2adda52c3f69b1 100644 --- a/src/sentry/event_manager.py +++ b/src/sentry/event_manager.py @@ -84,6 +84,11 @@ ENABLE_RUST = os.environ.get("SENTRY_USE_RUST_NORMALIZER", "false").lower() in ("1", "true") +def set_tag(data, key, value): + data['tags'] = [(k, v) for k, v in data['tags'] if k != key] + data['tags'].append((key, value)) + + def get_event_metadata_compat(data, fallback_message): """This is a fallback path to getting the event metadata. This is used by some code paths that could potentially deal with old sentry events that @@ -410,6 +415,7 @@ def __init__( self._auth = auth self._key = key self._for_store = for_store + self._normalized = False def process_csp_report(self): """Only called from the CSP report endpoint.""" @@ -462,6 +468,10 @@ def clean(d): self._data = data def normalize(self): + if self._normalized: + raise RuntimeError('Already normalized') + self._normalized = True + if ENABLE_RUST: from semaphore.processing import StoreNormalizer rust_normalizer = StoreNormalizer( @@ -654,8 +664,6 @@ def stringify(f): data.setdefault('fingerprint', None) data.setdefault('logger', DEFAULT_LOGGER_NAME) data.setdefault('platform', None) - data.setdefault('server_name', None) - data.setdefault('site', None) data.setdefault('tags', []) data.setdefault('transaction', None) @@ -716,12 +724,18 @@ def stringify(f): # XXX: This will be trimmed again when inserted into tag values data['transaction'] = trim(data['transaction'], MAX_CULPRIT_LENGTH) + # Move some legacy data into tags + site = data.pop('site', None) + if site is not None: + set_tag(data, 'site', site) + server_name = data.pop('server_name', None) + if server_name is not None: + set_tag(data, 'server_name', server_name) + # Do not add errors unless there are for non store mode if not self._for_store and not data.get('errors'): self._data.pop('errors') - self._data = data - def should_filter(self): ''' returns (result: bool, reason: string or None) @@ -771,16 +785,14 @@ def get_data(self): return self._data def _get_event_instance(self, project_id=None): - data = self._data.copy() - event_id = data.pop('event_id') - platform = data.pop('platform', None) + data = self._data + event_id = data.get('event_id') + platform = data.get('platform') - recorded_timestamp = data.pop('timestamp') + recorded_timestamp = data.get('timestamp') date = datetime.fromtimestamp(recorded_timestamp) date = date.replace(tzinfo=timezone.utc) - - # unused - time_spent = data.pop('time_spent', None) + time_spent = data.get('time_spent') return Event( project_id=project_id or self._project.id, @@ -791,11 +803,30 @@ def _get_event_instance(self, project_id=None): platform=platform ) - def get_search_message(self, data, event_metadata=None, culprit=None): + def get_culprit(self): + """Helper to calculate the default culprit""" + return force_text( + self._data.get('culprit') or + self._data.get('transaction') or + generate_culprit(self._data, platform=self._data['platform']) or + '' + ) + + def get_event_type(self): + """Returns the event type.""" + return eventtypes.get(self._data.get('type', 'default'))(self._data) + + def get_search_message(self, event_metadata=None, culprit=None): """This generates the internal event.message attribute which is used for search purposes. It adds a bunch of data from the metadata and the culprit. """ + if event_metadata is None: + event_metadata = self.get_event_type().get_metadata() + if culprit is None: + culprit = self.get_culprit() + + data = self._data message = '' if 'logentry' in data: @@ -814,7 +845,13 @@ def get_search_message(self, data, event_metadata=None, culprit=None): return trim(message.strip(), settings.SENTRY_MAX_MESSAGE_LENGTH) - def save(self, project_id, raw=False): + def save(self, project_id, raw=False, assume_normalized=False): + # Normalize if needed + if not self._normalized: + if not assume_normalized: + self.normalize() + self._normalized = True + from sentry.tasks.post_process import index_event_tags data = self._data @@ -845,53 +882,50 @@ def save(self, project_id, raw=False): ) return event - # pull out our top-level (non-data attr) kwargs - level = data.pop('level') - transaction_name = data.pop('transaction', None) - culprit = data.pop('culprit', None) - logger_name = data.pop('logger', None) - server_name = data.pop('server_name', None) - site = data.pop('site', None) - checksum = data.pop('checksum', None) - fingerprint = data.pop('fingerprint', None) - release = data.pop('release', None) - dist = data.pop('dist', None) - environment = data.pop('environment', None) - recorded_timestamp = data.get("timestamp") - - # old events had a small chance of having a legacy message - # attribute show up here. In all reality this is being coerced - # into logentry for more than two years at this point (2018). - data.pop('message', None) - + # Pull out the culprit + culprit = self.get_culprit() + + # Pull the toplevel data we're interested in + level = data.get('level') + + # TODO(mitsuhiko): this code path should be gone by July 2018. + # This is going to be fine because no code actually still depends + # on integers here. When we need an integer it will be converted + # into one later. Old workers used to send integers here. + if level is not None and isinstance(level, six.integer_types): + level = LOG_LEVELS[level] + + transaction_name = data.get('transaction') + logger_name = data.get('logger') + checksum = data.get('checksum') + fingerprint = data.get('fingerprint') + release = data.get('release') + dist = data.get('dist') + environment = data.get('environment') + recorded_timestamp = data.get('timestamp') + + # We need to swap out the data with the one internal to the newly + # created event object event = self._get_event_instance(project_id=project_id) + self._data = data = event.data.data + event._project_cache = project date = event.datetime platform = event.platform event_id = event.event_id - data = event.data.data - self._data = None - - culprit = culprit or \ - transaction_name or \ - generate_culprit(data, platform=platform) or \ - '' - culprit = force_text(culprit) if transaction_name: transaction_name = force_text(transaction_name) - # convert this to a dict to ensure we're only storing one value per key - # as most parts of Sentry dont currently play well with multiple values + # Some of the data that are toplevel attributes are duplicated + # into tags (logger, level, environment, transaction). These are + # different from legacy attributes which are normalized into tags + # ahead of time (site, server_name). tags = dict(data.get('tags') or []) - tags['level'] = LOG_LEVELS[level] + tags['level'] = level if logger_name: tags['logger'] = logger_name - if server_name: - tags['server_name'] = server_name - if site: - tags['site'] = site if environment: tags['environment'] = trim(environment, MAX_TAG_VALUE_LENGTH) if transaction_name: @@ -911,6 +945,9 @@ def save(self, project_id, raw=False): if dist and release: dist = release.add_dist(dist, date) + # dont allow a conflicting 'dist' tag + if 'dist' in tags: + del tags['dist'] tags['sentry:dist'] = dist.name else: dist = None @@ -959,7 +996,7 @@ def save(self, project_id, raw=False): else: hashes = [md5_from_hash(h) for h in get_hashes_for_event(event)] - event_type = eventtypes.get(data.get('type', 'default'))(data) + event_type = self.get_event_type() event_metadata = event_type.get_metadata() data['type'] = event_type.key @@ -967,34 +1004,27 @@ def save(self, project_id, raw=False): # index components into ``Event.message`` # See GH-3248 - event.message = self.get_search_message(data, event_metadata, culprit) + event.message = self.get_search_message(event_metadata, culprit) + received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) kwargs = { 'platform': platform, - 'message': event.message + 'message': event.message, + 'culprit': culprit, + 'logger': logger_name, + 'level': LOG_LEVELS_MAP.get(level), + 'last_seen': date, + 'first_seen': date, + 'active_at': date, + 'data': { + 'last_received': received_timestamp, + 'type': event_type.key, + # we cache the events metadata on the group to ensure its + # accessible in the stream + 'metadata': event_metadata, + }, } - received_timestamp = event.data.get('received') or float(event.datetime.strftime('%s')) - kwargs.update( - { - 'culprit': culprit, - 'logger': logger_name, - 'level': level, - 'last_seen': date, - 'first_seen': date, - 'active_at': date, - 'data': { - 'last_received': received_timestamp, - 'type': - event_type.key, - # we cache the events metadata on the group to ensure its - # accessible in the stream - 'metadata': - event_metadata, - }, - } - ) - if release: kwargs['first_release'] = release diff --git a/src/sentry/models/event.py b/src/sentry/models/event.py index 6293dfc2fbaff8..f6847c7d6c72cc 100644 --- a/src/sentry/models/event.py +++ b/src/sentry/models/event.py @@ -182,6 +182,10 @@ def get_tag(self, key): return v return None + @property + def release(self): + return self.get_tag('sentry:release') + @property def dist(self): return self.get_tag('sentry:dist') @@ -189,20 +193,27 @@ def dist(self): def as_dict(self): # We use a OrderedDict to keep elements ordered for a potential JSON serializer data = OrderedDict() - data['id'] = self.event_id + data['event_id'] = self.event_id data['project'] = self.project_id - data['release'] = self.get_tag('sentry:release') + data['release'] = self.release data['dist'] = self.dist data['platform'] = self.platform - data['culprit'] = self.group.culprit data['message'] = self.get_legacy_message() data['datetime'] = self.datetime data['time_spent'] = self.time_spent - data['tags'] = self.get_tags() + data['tags'] = [(k.split('sentry:', 1)[-1], v) for (k, v) in self.get_tags()] for k, v in sorted(six.iteritems(self.data)): + if k in data: + continue if k == 'sdk': v = {v_k: v_v for v_k, v_v in six.iteritems(v) if v_k != 'client_ip'} data[k] = v + + # for a long time culprit was not persisted. In those cases put + # the culprit in from the group. + if data.get('culprit') is None: + data['culprit'] = self.group.culprit + return data @property diff --git a/src/sentry/tasks/store.py b/src/sentry/tasks/store.py index 547473d3d8403f..9cc0343e10537a 100644 --- a/src/sentry/tasks/store.py +++ b/src/sentry/tasks/store.py @@ -383,7 +383,7 @@ def save_event(cache_key=None, data=None, start_time=None, event_id=None, try: manager = EventManager(data) - event = manager.save(project_id) + event = manager.save(project_id, assume_normalized=True) # Always load attachments from the cache so we can later prune them. # Only save them if the event-attachments feature is active, though. diff --git a/src/sentry/testutils/fixtures.py b/src/sentry/testutils/fixtures.py index 13cbdf4d98b26f..3ca0f859b3224e 100644 --- a/src/sentry/testutils/fixtures.py +++ b/src/sentry/testutils/fixtures.py @@ -499,6 +499,9 @@ def create_event(self, event_id=None, normalize=True, **kwargs): manager.normalize() kwargs['data'] = manager.get_data() + else: + assert 'message' not in kwargs, 'do not pass message this way' + event = Event(event_id=event_id, **kwargs) EventMapping.objects.create( project_id=event.project.id, diff --git a/tests/sentry/event_manager/test_event_manager.py b/tests/sentry/event_manager/test_event_manager.py index a3a3c61cf1ecb8..a6888e2bb8d4b2 100644 --- a/tests/sentry/event_manager/test_event_manager.py +++ b/tests/sentry/event_manager/test_event_manager.py @@ -109,6 +109,7 @@ def test_saves_event_mapping_when_sampled(self, should_sample): def test_ephemral_interfaces_removed_on_save(self): manager = EventManager(make_event(platform='python')) + manager.normalize() event = manager.save(1) group = event.group @@ -119,12 +120,14 @@ def test_dupe_message_id(self): event_id = 'a' * 32 manager = EventManager(make_event(event_id=event_id)) + manager.normalize() manager.save(1) assert Event.objects.count() == 1 # ensure that calling it again doesn't raise a db error manager = EventManager(make_event(event_id=event_id)) + manager.normalize() manager.save(1) assert Event.objects.count() == 1 @@ -166,11 +169,13 @@ def test_updates_group(self): } def test_updates_group_with_fingerprint(self): + ts = time() - 200 manager = EventManager( make_event( message='foo', event_id='a' * 32, fingerprint=['a' * 32], + timestamp=ts, ) ) with self.tasks(): @@ -181,6 +186,7 @@ def test_updates_group_with_fingerprint(self): message='foo bar', event_id='b' * 32, fingerprint=['a' * 32], + timestamp=ts, ) ) with self.tasks(): @@ -218,13 +224,15 @@ def test_differentiates_with_fingerprint(self): assert event.group_id != event2.group_id def test_unresolves_group(self): + ts = time() - 300 + # N.B. EventManager won't unresolve the group unless the event2 has a # later timestamp than event1. MySQL doesn't support microseconds. manager = EventManager( make_event( event_id='a' * 32, checksum='a' * 32, - timestamp=1403007314, + timestamp=ts, ) ) with self.tasks(): @@ -239,7 +247,7 @@ def test_unresolves_group(self): make_event( event_id='b' * 32, checksum='a' * 32, - timestamp=1403007345, + timestamp=ts + 50, ) ) event2 = manager.save(1) @@ -262,6 +270,7 @@ def test_does_not_unresolve_group(self, plugin_is_regression): ) ) with self.tasks(): + manager.normalize() event = manager.save(1) group = Group.objects.get(id=event.group_id) @@ -276,6 +285,7 @@ def test_does_not_unresolve_group(self, plugin_is_regression): timestamp=1403007315, ) ) + manager.normalize() event2 = manager.save(1) assert event.group_id == event2.group_id @@ -497,12 +507,13 @@ def test_marks_as_unresolved_with_new_release_with_integration( @mock.patch('sentry.models.Group.is_resolved') def test_unresolves_group_with_auto_resolve(self, mock_is_resolved): + ts = time() - 100 mock_is_resolved.return_value = False manager = EventManager( make_event( event_id='a' * 32, checksum='a' * 32, - timestamp=1403007314, + timestamp=ts, ) ) with self.tasks(): @@ -513,7 +524,7 @@ def test_unresolves_group_with_auto_resolve(self, mock_is_resolved): make_event( event_id='b' * 32, checksum='a' * 32, - timestamp=1403007414, + timestamp=ts + 100, ) ) with self.tasks(): @@ -521,7 +532,8 @@ def test_unresolves_group_with_auto_resolve(self, mock_is_resolved): assert event.group_id == event2.group_id group = Group.objects.get(id=event.group.id) - assert group.active_at == event2.datetime != event.datetime + assert group.active_at == event2.datetime + assert group.active_at != event.datetime def test_invalid_transaction(self): dict_input = {'messages': 'foo'} @@ -629,6 +641,7 @@ def test_group_release_with_env(self): manager = EventManager( make_event(release='1.0', environment='prod', event_id='a' * 32) ) + manager.normalize() event = manager.save(1) release = Release.objects.get(version='1.0', projects=event.project_id) @@ -1193,6 +1206,29 @@ def test_should_filter_message(self, mock_is_valid_error_message): assert mock_is_valid_error_message.call_args_list == [ mock.call(self.project, item.formatted) for item in items] + def test_legacy_attributes_moved(self): + event = make_event( + release='my-release', + environment='my-environment', + site='whatever', + server_name='foo.com', + event_id=uuid.uuid1().hex, + ) + manager = EventManager(event) + event = manager.save(1) + + # release and environment stay toplevel + assert event.data['release'] == 'my-release' + assert event.data['environment'] == 'my-environment' + + # site is a legacy attribute that is just a tag + assert event.data.get('site') is None + tags = dict(event.tags) + assert tags['site'] == 'whatever' + assert event.data.get('server_name') is None + tags = dict(event.tags) + assert tags['server_name'] == 'foo.com' + class ReleaseIssueTest(TransactionTestCase): def setUp(self): @@ -1200,13 +1236,13 @@ def setUp(self): self.release = Release.get_or_create(self.project, '1.0') self.environment1 = Environment.get_or_create(self.project, 'prod') self.environment2 = Environment.get_or_create(self.project, 'staging') - self.timestamp = 1403007314 + self.timestamp = float(int(time() - 300)) def make_event(self, **kwargs): result = { 'event_id': 'a' * 32, 'message': 'foo', - 'timestamp': 1403007314.570599, + 'timestamp': self.timestamp + 0.23, 'level': logging.ERROR, 'logger': 'default', 'tags': [], diff --git a/tests/sentry/web/frontend/test_group_event_json.py b/tests/sentry/web/frontend/test_group_event_json.py index 61e1a665b99b93..3b7afe4bdeef00 100644 --- a/tests/sentry/web/frontend/test_group_event_json.py +++ b/tests/sentry/web/frontend/test_group_event_json.py @@ -23,4 +23,4 @@ def test_does_render(self): assert resp.status_code == 200 assert resp['Content-Type'] == 'application/json' data = json.loads(resp.content.decode('utf-8')) - assert data['id'] == self.event.event_id + assert data['event_id'] == self.event.event_id