diff --git a/src/sentry/features/temporary.py b/src/sentry/features/temporary.py index b2f5df67a3d3ed..d78e43cdc0313c 100644 --- a/src/sentry/features/temporary.py +++ b/src/sentry/features/temporary.py @@ -625,6 +625,8 @@ def register_temporary_features(manager: FeatureManager) -> None: manager.add("projects:similarity-embeddings", ProjectFeature, FeatureHandlerStrategy.INTERNAL, default=False, api_expose=True) manager.add("projects:similarity-indexing", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False) manager.add("projects:similarity-view", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True) + # Enable v2 similarity grouping model (part of v2 grouping rollout) + manager.add("projects:similarity-grouping-v2-model", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False) # Starfish: extract metrics from the spans manager.add("projects:span-metrics-extraction", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True) manager.add("projects:span-metrics-extraction-addons", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False) diff --git a/src/sentry/grouping/ingest/seer.py b/src/sentry/grouping/ingest/seer.py index 2d66a68a2bb578..30191a065f75cc 100644 --- a/src/sentry/grouping/ingest/seer.py +++ b/src/sentry/grouping/ingest/seer.py @@ -6,7 +6,7 @@ from django.conf import settings from django.utils import timezone -from sentry import options +from sentry import features, options from sentry import ratelimits as ratelimiter from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy @@ -18,7 +18,7 @@ from sentry.models.grouphash import GroupHash from sentry.models.project import Project from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer -from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest +from sentry.seer.similarity.types import GroupingVersion, SimilarIssuesEmbeddingsRequest from sentry.seer.similarity.utils import ( SEER_INELIGIBLE_EVENT_PLATFORMS, ReferrerOptions, @@ -272,6 +272,11 @@ def get_seer_similar_issues( get_stacktrace_string(get_grouping_info_from_variants_legacy(variants)), ) + # Get model configuration from feature flags + use_v2_model = features.has("projects:similarity-grouping-v2-model", event.project) + model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1 + training_mode = False # PR #B will add the smart logic + request_data: SimilarIssuesEmbeddingsRequest = { "event_id": event.event_id, "hash": event_hash, @@ -281,10 +286,16 @@ def get_seer_similar_issues( "k": options.get("seer.similarity.ingest.num_matches_to_request"), "referrer": "ingest", "use_reranking": options.get("seer.similarity.ingest.use_reranking"), + "model": model_version, + "training_mode": training_mode, } event.data.pop("stacktrace_string", None) - seer_request_metric_tags = {"platform": event.platform or "unknown"} + seer_request_metric_tags: dict[str, str | int | bool] = { + "platform": event.platform or "unknown", + "model_version": model_version.value, + "training_mode": training_mode, + } seer_results = get_similarity_data_from_seer( request_data, diff --git a/src/sentry/issues/endpoints/group_similar_issues_embeddings.py b/src/sentry/issues/endpoints/group_similar_issues_embeddings.py index a5ba735503f0fa..2b31df855dbdd6 100644 --- a/src/sentry/issues/endpoints/group_similar_issues_embeddings.py +++ b/src/sentry/issues/endpoints/group_similar_issues_embeddings.py @@ -6,7 +6,7 @@ from rest_framework.request import Request from rest_framework.response import Response -from sentry import analytics, options +from sentry import analytics, features, options from sentry.api.analytics import GroupSimilarIssuesEmbeddingsCountEvent from sentry.api.api_owners import ApiOwner from sentry.api.api_publish_status import ApiPublishStatus @@ -17,7 +17,11 @@ from sentry.models.group import Group from sentry.models.grouphash import GroupHash from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer -from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest +from sentry.seer.similarity.types import ( + GroupingVersion, + SeerSimilarIssueData, + SimilarIssuesEmbeddingsRequest, +) from sentry.seer.similarity.utils import ( ReferrerOptions, event_content_has_stacktrace, @@ -100,6 +104,11 @@ def get(self, request: Request, group: Group) -> Response: if not stacktrace_string or not latest_event: return Response([]) # No exception, stacktrace or in-app frames, or event + # Get model configuration from feature flags + use_v2_model = features.has("projects:similarity-grouping-v2-model", group.project) + model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1 + training_mode = False # TODO: currently hardcoded, follow up PR will add the logic + similar_issues_params: SimilarIssuesEmbeddingsRequest = { "event_id": latest_event.event_id, "hash": latest_event.get_primary_hash(), @@ -109,6 +118,8 @@ def get(self, request: Request, group: Group) -> Response: "read_only": True, "referrer": "similar_issues", "use_reranking": options.get("seer.similarity.similar_issues.use_reranking"), + "model": model_version, + "training_mode": training_mode, } # Add optional parameters if request.GET.get("k"): diff --git a/src/sentry/seer/similarity/grouping_records.py b/src/sentry/seer/similarity/grouping_records.py index a1dbf187581ded..7c70df0326091c 100644 --- a/src/sentry/seer/similarity/grouping_records.py +++ b/src/sentry/seer/similarity/grouping_records.py @@ -15,7 +15,7 @@ ) from sentry.net.http import connection_from_url from sentry.seer.signed_seer_api import make_signed_seer_api_request -from sentry.seer.similarity.types import RawSeerSimilarIssueData +from sentry.seer.similarity.types import GroupingVersion, RawSeerSimilarIssueData from sentry.utils import json, metrics logger = logging.getLogger(__name__) @@ -36,6 +36,8 @@ class CreateGroupingRecordsRequest(TypedDict): data: list[CreateGroupingRecordData] stacktrace_list: list[str] use_reranking: bool | None + model: NotRequired[GroupingVersion] # Model version, defaults to V1 for backward compatibility + training_mode: NotRequired[bool] class BulkCreateGroupingRecordsResponse(TypedDict): @@ -62,6 +64,8 @@ def post_bulk_grouping_records( [len(stacktrace) for stacktrace in grouping_records_request["stacktrace_list"]] ), "use_reranking": grouping_records_request.get("use_reranking"), + "model": grouping_records_request.get("model"), + "training_mode": grouping_records_request.get("training_mode"), } try: diff --git a/src/sentry/seer/similarity/similar_issues.py b/src/sentry/seer/similarity/similar_issues.py index 683a9baccccb4b..ee095cb2b49c78 100644 --- a/src/sentry/seer/similarity/similar_issues.py +++ b/src/sentry/seer/similarity/similar_issues.py @@ -53,7 +53,8 @@ def get_similarity_data_from_seer( logger_extra = { k: v for k, v in similar_issues_request.items() - if k in {"event_id", "project_id", "hash", "referrer", "use_reranking"} + if k + in {"event_id", "project_id", "hash", "referrer", "use_reranking", "model", "training_mode"} } logger.info( "get_seer_similar_issues.request", diff --git a/src/sentry/seer/similarity/types.py b/src/sentry/seer/similarity/types.py index 74cc0282da0c04..e786b0de4f8c56 100644 --- a/src/sentry/seer/similarity/types.py +++ b/src/sentry/seer/similarity/types.py @@ -1,6 +1,7 @@ import logging from collections.abc import Mapping from dataclasses import dataclass +from enum import StrEnum from typing import Any, ClassVar, NotRequired, Self, TypedDict from sentry.models.grouphash import GroupHash @@ -8,6 +9,13 @@ logger = logging.getLogger(__name__) +class GroupingVersion(StrEnum): + """Model version for similarity grouping.""" + + V1 = "v1" + V2 = "v2" + + class IncompleteSeerDataError(Exception): pass @@ -31,6 +39,8 @@ class SimilarIssuesEmbeddingsRequest(TypedDict): event_id: NotRequired[str] referrer: NotRequired[str] use_reranking: NotRequired[bool] + model: NotRequired[GroupingVersion] # Model version, defaults to V1 for backward compatibility + training_mode: NotRequired[bool] # whether to just insert embedding without querying class RawSeerSimilarIssueData(TypedDict): diff --git a/src/sentry/tasks/embeddings_grouping/utils.py b/src/sentry/tasks/embeddings_grouping/utils.py index 344f1cca15788f..09b014cca3027e 100644 --- a/src/sentry/tasks/embeddings_grouping/utils.py +++ b/src/sentry/tasks/embeddings_grouping/utils.py @@ -11,7 +11,7 @@ from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request -from sentry import nodestore, options +from sentry import features, nodestore, options from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy from sentry.grouping.grouptype import ErrorGroupType @@ -25,6 +25,7 @@ post_bulk_grouping_records, ) from sentry.seer.similarity.types import ( + GroupingVersion, IncompleteSeerDataError, SeerSimilarIssueData, SimilarHashMissingGroupError, @@ -491,12 +492,20 @@ def send_group_and_stacktrace_to_seer( f"{BACKFILL_NAME}.send_group_and_stacktrace_to_seer", sample_rate=options.get("seer.similarity.metrics_sample_rate"), ): + # Get model configuration from feature flags + project = Project.objects.get_from_cache(id=project_id) + use_v2_model = features.has("projects:similarity-grouping-v2-model", project) + model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1 + training_mode = False # TODO: currently hardcoded, follow up PR will add the logic + return _make_seer_call( CreateGroupingRecordsRequest( group_id_list=groups_to_backfill_with_no_embedding_has_snuba_row_and_nodestore_row, data=nodestore_results["data"], stacktrace_list=nodestore_results["stacktrace_list"], use_reranking=options.get("similarity.backfill_use_reranking"), + model=model_version, + training_mode=training_mode, ), project_id, ) @@ -508,6 +517,12 @@ def send_group_and_stacktrace_to_seer_multithreaded( nodestore_results, project_id, ): + # Get model configuration from feature flags + project = Project.objects.get_from_cache(id=project_id) + use_v2_model = features.has("projects:similarity-grouping-v2-model", project) + model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1 + training_mode = False # TODO: currently hardcoded, follow up PR will add the logic + def process_chunk(chunk_data, chunk_stacktrace): return _make_seer_call( CreateGroupingRecordsRequest( @@ -515,6 +530,8 @@ def process_chunk(chunk_data, chunk_stacktrace): data=chunk_data["data"], stacktrace_list=chunk_stacktrace, use_reranking=options.get("similarity.backfill_use_reranking"), + model=model_version, + training_mode=training_mode, ), project_id, ) diff --git a/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py b/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py index de4b52c2d416a5..83e6168a107bcc 100644 --- a/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py +++ b/tests/sentry/grouping/seer_similarity/test_get_seer_similar_issues.py @@ -10,7 +10,7 @@ from sentry.models.grouphash import GroupHash from sentry.models.grouphashmetadata import GroupHashMetadata from sentry.models.project import Project -from sentry.seer.similarity.types import SeerSimilarIssueData +from sentry.seer.similarity.types import GroupingVersion, SeerSimilarIssueData from sentry.seer.similarity.utils import get_stacktrace_string from sentry.services.eventstore.models import Event from sentry.testutils.cases import TestCase @@ -108,8 +108,15 @@ def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock) "k": options.get("seer.similarity.ingest.num_matches_to_request"), "referrer": "ingest", "use_reranking": True, + "model": GroupingVersion.V1, + "training_mode": False, + }, + { + "platform": "python", + "model_version": "v1", + "training_mode": False, + "hybrid_fingerprint": False, }, - {"platform": "python", "hybrid_fingerprint": False}, ) @patch("sentry.grouping.ingest.seer.metrics.incr") @@ -152,6 +159,8 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable( "project_id": self.project.id, "stacktrace": new_stacktrace_string, "exception_type": "FailedToFetchError", + "model": GroupingVersion.V1, + "training_mode": False, } assert mock_get_similarity_data.call_count == 2 @@ -164,7 +173,12 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable( "referrer": "ingest", "use_reranking": True, }, - {"platform": "python", "hybrid_fingerprint": False}, + { + "platform": "python", + "model_version": "v1", + "training_mode": False, + "hybrid_fingerprint": False, + }, ), # Second call to store the event's data since the match that came back from Seer # wasn't usable @@ -175,7 +189,11 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable( "referrer": "ingest_follow_up", "use_reranking": False, }, - {"platform": "python"}, + { + "platform": "python", + "model_version": "v1", + "training_mode": False, + }, ), ] diff --git a/tests/sentry/grouping/seer_similarity/test_seer.py b/tests/sentry/grouping/seer_similarity/test_seer.py index 1b89440cee322e..13c5612d7f57ac 100644 --- a/tests/sentry/grouping/seer_similarity/test_seer.py +++ b/tests/sentry/grouping/seer_similarity/test_seer.py @@ -4,6 +4,7 @@ from sentry import options from sentry.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash from sentry.models.grouphash import GroupHash +from sentry.seer.similarity.types import GroupingVersion from sentry.seer.similarity.utils import MAX_FRAME_COUNT from sentry.services.eventstore.models import Event from sentry.testutils.cases import TestCase @@ -60,8 +61,15 @@ def test_simple(self, mock_get_similarity_data: MagicMock) -> None: "k": 1, "referrer": "ingest", "use_reranking": True, + "model": GroupingVersion.V1, + "training_mode": False, + }, + { + "platform": "python", + "model_version": "v1", + "training_mode": False, + "hybrid_fingerprint": False, }, - {"platform": "python", "hybrid_fingerprint": False}, ) @patch("sentry.grouping.ingest.seer.record_did_call_seer_metric") @@ -182,6 +190,13 @@ def test_too_many_frames_bypassed_platform(self, mock_get_similarity_data: Magic "k": 1, "referrer": "ingest", "use_reranking": True, + "model": GroupingVersion.V1, + "training_mode": False, + }, + { + "platform": "python", + "model_version": "v1", + "training_mode": False, + "hybrid_fingerprint": False, }, - {"platform": "python", "hybrid_fingerprint": False}, ) diff --git a/tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py b/tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py index 376e059db05c95..64e3f0b1dc5f9d 100644 --- a/tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py +++ b/tests/sentry/issues/endpoints/test_group_similar_issues_embeddings.py @@ -179,6 +179,8 @@ def test_simple( "read_only": True, "referrer": "similar_issues", "use_reranking": True, + "model": "v1", + "training_mode": False, "k": 1, } @@ -350,6 +352,8 @@ def test_incomplete_return_data( "read_only": True, "referrer": "similar_issues", "use_reranking": True, + "model": "v1", + "training_mode": False, }, "raw_similar_issue_data": { "should_group": True, @@ -632,6 +636,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None: "read_only": True, "referrer": "similar_issues", "use_reranking": True, + "model": "v1", + "training_mode": False, }, ), headers={"content-type": "application/json;charset=utf-8"}, @@ -660,6 +666,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None: "read_only": True, "referrer": "similar_issues", "use_reranking": True, + "model": "v1", + "training_mode": False, "k": 1, }, ), @@ -691,6 +699,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None: "read_only": True, "referrer": "similar_issues", "use_reranking": True, + "model": "v1", + "training_mode": False, }, ), headers={"content-type": "application/json;charset=utf-8"}, diff --git a/tests/sentry/seer/similarity/test_grouping_records.py b/tests/sentry/seer/similarity/test_grouping_records.py index 7a3321a209ff60..59fc1454f73880 100644 --- a/tests/sentry/seer/similarity/test_grouping_records.py +++ b/tests/sentry/seer/similarity/test_grouping_records.py @@ -67,6 +67,8 @@ def test_post_bulk_grouping_records_success( "project_id": 1, "stacktrace_length_sum": 24, "use_reranking": False, + "model": None, + "training_mode": None, }, ) @@ -91,10 +93,12 @@ def test_post_bulk_grouping_records_timeout( extra={ "group_ids": json.dumps(_create_grouping_records_request_params()["group_id_list"]), "project_id": 1, - "reason": "ReadTimeoutError", - "timeout": POST_BULK_GROUPING_RECORDS_TIMEOUT, "stacktrace_length_sum": 24, "use_reranking": False, + "model": None, + "training_mode": None, + "reason": "ReadTimeoutError", + "timeout": POST_BULK_GROUPING_RECORDS_TIMEOUT, }, ) @@ -124,6 +128,8 @@ def test_post_bulk_grouping_records_failure( "reason": "INTERNAL SERVER ERROR", "stacktrace_length_sum": 24, "use_reranking": False, + "model": None, + "training_mode": None, }, ) @@ -169,6 +175,8 @@ def test_post_bulk_grouping_records_use_reranking( "project_id": 1, "stacktrace_length_sum": 24, "use_reranking": True, + "model": None, + "training_mode": None, }, )