From a28e74cb8134e67634ca0b422761066ce4c80b3e Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Wed, 24 Jul 2024 16:30:34 -0700 Subject: [PATCH 1/3] stop checking flag in `_project_has_similarity_grouping_enabled` --- src/sentry/grouping/ingest/seer.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index c86576d6b7294a..4abea076b907d5 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -57,9 +57,7 @@ def should_call_seer_for_grouping(event: Event, primary_hashes: CalculatedHashes def _project_has_similarity_grouping_enabled(project: Project) -> bool: - has_either_seer_grouping_feature = features.has( - "projects:similarity-embeddings-metadata", project - ) or features.has("projects:similarity-embeddings-grouping", project) + has_seer_grouping_flag_on = features.has("projects:similarity-embeddings-grouping", project) # TODO: This is a hack to get ingest to turn on for projects as soon as they're backfilled. When # the backfill script completes, we turn on this option, enabling ingest immediately rather than @@ -67,7 +65,7 @@ def _project_has_similarity_grouping_enabled(project: Project) -> bool: # projects have been backfilled, the option (and this check) can go away. has_been_backfilled = project.get_option("sentry:similarity_backfill_completed") - return has_either_seer_grouping_feature or has_been_backfilled + return has_seer_grouping_flag_on or has_been_backfilled # TODO: Here we're including events with hybrid fingerprints (ones which are `{{ default }}` From aa63f729bfa5356f4f3dd1b8677d61c8c9c9eb32 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Wed, 24 Jul 2024 16:31:44 -0700 Subject: [PATCH 2/3] remove feature flag check in `get_seer_similar_issues` --- src/sentry/grouping/ingest/seer.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 4abea076b907d5..f8de83a22c0b73 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -197,11 +197,7 @@ def get_seer_similar_issues( ) parent_group = ( Group.objects.filter(id=seer_results[0].parent_group_id).first() - if ( - seer_results - and seer_results[0].should_group - and features.has("projects:similarity-embeddings-grouping", event.project) - ) + if seer_results and seer_results[0].should_group else None ) From 5c996983bbbe5853f347cf323105e13c9a4b3484 Mon Sep 17 00:00:00 2001 From: Katie Byers Date: Wed, 24 Jul 2024 16:36:13 -0700 Subject: [PATCH 3/3] fix tests --- .../grouping/test_seer_grouping.py | 72 +------------------ tests/sentry/grouping/ingest/test_seer.py | 59 +++------------ 2 files changed, 10 insertions(+), 121 deletions(-) diff --git a/tests/sentry/event_manager/grouping/test_seer_grouping.py b/tests/sentry/event_manager/grouping/test_seer_grouping.py index bd9212e51ad250..8671da11790e1b 100644 --- a/tests/sentry/event_manager/grouping/test_seer_grouping.py +++ b/tests/sentry/event_manager/grouping/test_seer_grouping.py @@ -54,12 +54,7 @@ def test_obeys_seer_similarity_flags(self): ), ): - with Feature( - { - "projects:similarity-embeddings-metadata": False, - "projects:similarity-embeddings-grouping": False, - } - ): + with Feature({"projects:similarity-embeddings-grouping": False}): new_event = save_new_event({"message": "Adopt don't shop"}, self.project) # We checked whether to make the call, but didn't go through with it @@ -74,12 +69,7 @@ def test_obeys_seer_similarity_flags(self): should_call_seer_spy.reset_mock() get_seer_similar_issues_spy.reset_mock() - with Feature( - { - "projects:similarity-embeddings-metadata": True, - "projects:similarity-embeddings-grouping": False, - } - ): + with Feature({"projects:similarity-embeddings-grouping": True}): new_event = save_new_event({"message": "Maisey is silly"}, self.project) expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()} @@ -87,63 +77,6 @@ def test_obeys_seer_similarity_flags(self): assert should_call_seer_spy.call_count == 1 assert get_seer_similar_issues_spy.call_count == 1 - # Metadata returned and stored - assert get_seer_similar_issues_return_values[0][0] == expected_metadata - assert ( - NonNone(new_event.group).data["metadata"]["seer_similarity"] - == expected_metadata - ) - assert new_event.data["seer_similarity"] == expected_metadata - - # No parent group returned or used (even though `should_group` is True) - assert get_seer_similar_issues_return_values[0][1] is None - assert new_event.group_id != existing_event.group_id - - should_call_seer_spy.reset_mock() - get_seer_similar_issues_spy.reset_mock() - get_seer_similar_issues_return_values.pop() - - with Feature( - { - "projects:similarity-embeddings-metadata": False, - "projects:similarity-embeddings-grouping": True, - } - ): - new_event = save_new_event({"message": "Charlie is goofy"}, self.project) - expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()} - - # We checked whether to make the call, and then made it - assert should_call_seer_spy.call_count == 1 - assert get_seer_similar_issues_spy.call_count == 1 - - # Metadata returned and stored (metadata flag being off doesn't matter because - # grouping flag takes precedence) - assert get_seer_similar_issues_return_values[0][0] == expected_metadata - assert new_event.data["seer_similarity"] == expected_metadata - - # Parent group returned and used - assert get_seer_similar_issues_return_values[0][1] == existing_event.group - assert new_event.group_id == existing_event.group_id - - should_call_seer_spy.reset_mock() - get_seer_similar_issues_spy.reset_mock() - get_seer_similar_issues_return_values.pop() - - with Feature( - { - "projects:similarity-embeddings-metadata": True, - "projects:similarity-embeddings-grouping": True, - } - ): - new_event = save_new_event( - {"message": "Cori and Bodhi are ridiculous"}, self.project - ) - expected_metadata = {**metadata_base, "request_hash": new_event.get_primary_hash()} - - # We checked whether to make the call, and then made it - assert should_call_seer_spy.call_count == 1 - assert get_seer_similar_issues_spy.call_count == 1 - # Metadata returned and stored assert get_seer_similar_issues_return_values[0][0] == expected_metadata assert new_event.data["seer_similarity"] == expected_metadata @@ -208,7 +141,6 @@ def test_stores_seer_results_in_metadata(self, _): "results": [asdict(seer_result_data)], } - assert NonNone(new_event.group).data["metadata"]["seer_similarity"] == expected_metadata assert new_event.data["seer_similarity"] == expected_metadata @with_feature("projects:similarity-embeddings-grouping") diff --git a/tests/sentry/grouping/ingest/test_seer.py b/tests/sentry/grouping/ingest/test_seer.py index 945ed67422d272..9554fa2dc2832f 100644 --- a/tests/sentry/grouping/ingest/test_seer.py +++ b/tests/sentry/grouping/ingest/test_seer.py @@ -46,36 +46,20 @@ def setUp(self): self.primary_hashes = self.event.get_hashes() def test_obeys_feature_enablement_check(self): - for metadata_flag, grouping_flag, backfill_completed_option, expected_result in [ - # TODO: This manual cartesian product business is gross, but thankfully it's temporary - - # the metadata flag is about to go away and the backfill completed option will go away - # once all projects are backfilled. - (False, False, None, False), - (True, False, None, True), - (False, True, None, True), - (True, True, None, True), - (False, False, 11211231, True), - (True, False, 11211231, True), - (False, True, 11211231, True), - (True, True, 11211231, True), + for grouping_flag, backfill_completed_option, expected_result in [ + (False, None, False), + (True, None, True), + (False, 11211231, True), + (True, 11211231, True), ]: - with ( - Feature( - { - "projects:similarity-embeddings-metadata": metadata_flag, - "projects:similarity-embeddings-grouping": grouping_flag, - } - ), - # Having too many cases above makes us hit the project rate limit if we don't patch this - patch("sentry.grouping.ingest.seer._ratelimiting_enabled", return_value=False), - ): + with Feature({"projects:similarity-embeddings-grouping": grouping_flag}): self.project.update_option( "sentry:similarity_backfill_completed", backfill_completed_option ) assert ( should_call_seer_for_grouping(self.event, self.primary_hashes) is expected_result - ), f"Case (metadata {metadata_flag}, grouping {grouping_flag}, backfill completed {backfill_completed_option}) failed." + ), f"Case (grouping {grouping_flag}, backfill completed {backfill_completed_option}) failed." @with_feature("projects:similarity-embeddings-grouping") def test_obeys_content_filter(self): @@ -186,7 +170,7 @@ def setUp(self): ) self.new_event_hashes = CalculatedHashes(["20130809201315042012311220122111"]) - @patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer") + @patch("sentry.grouping.ingest.seer.get_similarity_data_from_seer", return_value=[]) def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock): new_event = Event( project_id=self.project.id, @@ -234,31 +218,6 @@ def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock): } ) - @with_feature({"projects:similarity-embeddings-grouping": False}) - def test_returns_metadata_but_no_group_if_seer_grouping_flag_off(self): - seer_result_data = SeerSimilarIssueData( - parent_hash=NonNone(self.existing_event.get_primary_hash()), - parent_group_id=NonNone(self.existing_event.group_id), - stacktrace_distance=0.01, - message_distance=0.05, - should_group=True, - ) - expected_metadata = { - "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION, - "request_hash": self.new_event_hashes.hashes[0], - "results": [asdict(seer_result_data)], - } - - with patch( - "sentry.grouping.ingest.seer.get_similarity_data_from_seer", - return_value=[seer_result_data], - ): - assert get_seer_similar_issues(self.new_event, self.new_event_hashes) == ( - expected_metadata, - None, # No group returned, even though `should_group` is True - ) - - @with_feature("projects:similarity-embeddings-grouping") def test_returns_metadata_and_group_if_sufficiently_close_group_found(self): seer_result_data = SeerSimilarIssueData( parent_hash=NonNone(self.existing_event.get_primary_hash()), @@ -282,7 +241,6 @@ def test_returns_metadata_and_group_if_sufficiently_close_group_found(self): self.existing_event.group, ) - @with_feature("projects:similarity-embeddings-grouping") def test_returns_metadata_but_no_group_if_similar_group_insufficiently_close(self): seer_result_data = SeerSimilarIssueData( parent_hash=NonNone(self.existing_event.get_primary_hash()), @@ -306,7 +264,6 @@ def test_returns_metadata_but_no_group_if_similar_group_insufficiently_close(sel None, ) - @with_feature("projects:similarity-embeddings-grouping") def test_returns_no_group_and_empty_metadata_if_no_similar_group_found(self): expected_metadata = { "similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,