Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/sentry/features/temporary.py
Original file line number Diff line number Diff line change
Expand Up @@ -625,6 +625,8 @@ def register_temporary_features(manager: FeatureManager) -> None:
manager.add("projects:similarity-embeddings", ProjectFeature, FeatureHandlerStrategy.INTERNAL, default=False, api_expose=True)
manager.add("projects:similarity-indexing", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
manager.add("projects:similarity-view", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
# Enable v2 similarity grouping model (part of v2 grouping rollout)
manager.add("projects:similarity-grouping-v2-model", ProjectFeature, FeatureHandlerStrategy.FLAGPOLE, api_expose=False)
# Starfish: extract metrics from the spans
manager.add("projects:span-metrics-extraction", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=True)
manager.add("projects:span-metrics-extraction-addons", ProjectFeature, FeatureHandlerStrategy.INTERNAL, api_expose=False)
Expand Down
17 changes: 14 additions & 3 deletions src/sentry/grouping/ingest/seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from django.conf import settings
from django.utils import timezone

from sentry import options
from sentry import features, options
from sentry import ratelimits as ratelimiter
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy
Expand All @@ -18,7 +18,7 @@
from sentry.models.grouphash import GroupHash
from sentry.models.project import Project
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
from sentry.seer.similarity.types import SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.types import GroupingVersion, SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.utils import (
SEER_INELIGIBLE_EVENT_PLATFORMS,
ReferrerOptions,
Expand Down Expand Up @@ -272,6 +272,11 @@ def get_seer_similar_issues(
get_stacktrace_string(get_grouping_info_from_variants_legacy(variants)),
)

# Get model configuration from feature flags
use_v2_model = features.has("projects:similarity-grouping-v2-model", event.project)
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
training_mode = False # PR #B will add the smart logic

request_data: SimilarIssuesEmbeddingsRequest = {
"event_id": event.event_id,
"hash": event_hash,
Expand All @@ -281,10 +286,16 @@ def get_seer_similar_issues(
"k": options.get("seer.similarity.ingest.num_matches_to_request"),
"referrer": "ingest",
"use_reranking": options.get("seer.similarity.ingest.use_reranking"),
"model": model_version,
"training_mode": training_mode,
}
event.data.pop("stacktrace_string", None)

seer_request_metric_tags = {"platform": event.platform or "unknown"}
seer_request_metric_tags: dict[str, str | int | bool] = {
"platform": event.platform or "unknown",
"model_version": model_version.value,
"training_mode": training_mode,
}

seer_results = get_similarity_data_from_seer(
request_data,
Expand Down
15 changes: 13 additions & 2 deletions src/sentry/issues/endpoints/group_similar_issues_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from rest_framework.request import Request
from rest_framework.response import Response

from sentry import analytics, options
from sentry import analytics, features, options
from sentry.api.analytics import GroupSimilarIssuesEmbeddingsCountEvent
from sentry.api.api_owners import ApiOwner
from sentry.api.api_publish_status import ApiPublishStatus
Expand All @@ -17,7 +17,11 @@
from sentry.models.group import Group
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.similar_issues import get_similarity_data_from_seer
from sentry.seer.similarity.types import SeerSimilarIssueData, SimilarIssuesEmbeddingsRequest
from sentry.seer.similarity.types import (
GroupingVersion,
SeerSimilarIssueData,
SimilarIssuesEmbeddingsRequest,
)
from sentry.seer.similarity.utils import (
ReferrerOptions,
event_content_has_stacktrace,
Expand Down Expand Up @@ -100,6 +104,11 @@ def get(self, request: Request, group: Group) -> Response:
if not stacktrace_string or not latest_event:
return Response([]) # No exception, stacktrace or in-app frames, or event

# Get model configuration from feature flags
use_v2_model = features.has("projects:similarity-grouping-v2-model", group.project)
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic

similar_issues_params: SimilarIssuesEmbeddingsRequest = {
"event_id": latest_event.event_id,
"hash": latest_event.get_primary_hash(),
Expand All @@ -109,6 +118,8 @@ def get(self, request: Request, group: Group) -> Response:
"read_only": True,
"referrer": "similar_issues",
"use_reranking": options.get("seer.similarity.similar_issues.use_reranking"),
"model": model_version,
"training_mode": training_mode,
}
# Add optional parameters
if request.GET.get("k"):
Expand Down
6 changes: 5 additions & 1 deletion src/sentry/seer/similarity/grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
)
from sentry.net.http import connection_from_url
from sentry.seer.signed_seer_api import make_signed_seer_api_request
from sentry.seer.similarity.types import RawSeerSimilarIssueData
from sentry.seer.similarity.types import GroupingVersion, RawSeerSimilarIssueData
from sentry.utils import json, metrics

logger = logging.getLogger(__name__)
Expand All @@ -36,6 +36,8 @@ class CreateGroupingRecordsRequest(TypedDict):
data: list[CreateGroupingRecordData]
stacktrace_list: list[str]
use_reranking: bool | None
model: NotRequired[GroupingVersion] # Model version, defaults to V1 for backward compatibility
training_mode: NotRequired[bool]


class BulkCreateGroupingRecordsResponse(TypedDict):
Expand All @@ -62,6 +64,8 @@ def post_bulk_grouping_records(
[len(stacktrace) for stacktrace in grouping_records_request["stacktrace_list"]]
),
"use_reranking": grouping_records_request.get("use_reranking"),
"model": grouping_records_request.get("model"),
"training_mode": grouping_records_request.get("training_mode"),
}

try:
Expand Down
3 changes: 2 additions & 1 deletion src/sentry/seer/similarity/similar_issues.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def get_similarity_data_from_seer(
logger_extra = {
k: v
for k, v in similar_issues_request.items()
if k in {"event_id", "project_id", "hash", "referrer", "use_reranking"}
if k
in {"event_id", "project_id", "hash", "referrer", "use_reranking", "model", "training_mode"}
}
logger.info(
"get_seer_similar_issues.request",
Expand Down
10 changes: 10 additions & 0 deletions src/sentry/seer/similarity/types.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
import logging
from collections.abc import Mapping
from dataclasses import dataclass
from enum import StrEnum
from typing import Any, ClassVar, NotRequired, Self, TypedDict

from sentry.models.grouphash import GroupHash

logger = logging.getLogger(__name__)


class GroupingVersion(StrEnum):
"""Model version for similarity grouping."""

V1 = "v1"
V2 = "v2"


class IncompleteSeerDataError(Exception):
pass

Expand All @@ -31,6 +39,8 @@ class SimilarIssuesEmbeddingsRequest(TypedDict):
event_id: NotRequired[str]
referrer: NotRequired[str]
use_reranking: NotRequired[bool]
model: NotRequired[GroupingVersion] # Model version, defaults to V1 for backward compatibility
training_mode: NotRequired[bool] # whether to just insert embedding without querying


class RawSeerSimilarIssueData(TypedDict):
Expand Down
19 changes: 18 additions & 1 deletion src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from google.api_core.exceptions import DeadlineExceeded, ServiceUnavailable
from snuba_sdk import Column, Condition, Entity, Limit, Op, Query, Request

from sentry import nodestore, options
from sentry import features, nodestore, options
from sentry.conf.server import SEER_SIMILARITY_MODEL_VERSION
from sentry.grouping.grouping_info import get_grouping_info_from_variants_legacy
from sentry.grouping.grouptype import ErrorGroupType
Expand All @@ -25,6 +25,7 @@
post_bulk_grouping_records,
)
from sentry.seer.similarity.types import (
GroupingVersion,
IncompleteSeerDataError,
SeerSimilarIssueData,
SimilarHashMissingGroupError,
Expand Down Expand Up @@ -491,12 +492,20 @@ def send_group_and_stacktrace_to_seer(
f"{BACKFILL_NAME}.send_group_and_stacktrace_to_seer",
sample_rate=options.get("seer.similarity.metrics_sample_rate"),
):
# Get model configuration from feature flags
project = Project.objects.get_from_cache(id=project_id)
use_v2_model = features.has("projects:similarity-grouping-v2-model", project)
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic

return _make_seer_call(
CreateGroupingRecordsRequest(
group_id_list=groups_to_backfill_with_no_embedding_has_snuba_row_and_nodestore_row,
data=nodestore_results["data"],
stacktrace_list=nodestore_results["stacktrace_list"],
use_reranking=options.get("similarity.backfill_use_reranking"),
model=model_version,
training_mode=training_mode,
),
project_id,
)
Expand All @@ -508,13 +517,21 @@ def send_group_and_stacktrace_to_seer_multithreaded(
nodestore_results,
project_id,
):
# Get model configuration from feature flags
project = Project.objects.get_from_cache(id=project_id)
use_v2_model = features.has("projects:similarity-grouping-v2-model", project)
model_version = GroupingVersion.V2 if use_v2_model else GroupingVersion.V1
training_mode = False # TODO: currently hardcoded, follow up PR will add the logic

def process_chunk(chunk_data, chunk_stacktrace):
return _make_seer_call(
CreateGroupingRecordsRequest(
group_id_list=chunk_data["group_ids"],
data=chunk_data["data"],
stacktrace_list=chunk_stacktrace,
use_reranking=options.get("similarity.backfill_use_reranking"),
model=model_version,
training_mode=training_mode,
),
project_id,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from sentry.models.grouphash import GroupHash
from sentry.models.grouphashmetadata import GroupHashMetadata
from sentry.models.project import Project
from sentry.seer.similarity.types import SeerSimilarIssueData
from sentry.seer.similarity.types import GroupingVersion, SeerSimilarIssueData
from sentry.seer.similarity.utils import get_stacktrace_string
from sentry.services.eventstore.models import Event
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -108,8 +108,15 @@ def test_sends_expected_data_to_seer(self, mock_get_similarity_data: MagicMock)
"k": options.get("seer.similarity.ingest.num_matches_to_request"),
"referrer": "ingest",
"use_reranking": True,
"model": GroupingVersion.V1,
"training_mode": False,
},
{
"platform": "python",
"model_version": "v1",
"training_mode": False,
"hybrid_fingerprint": False,
},
{"platform": "python", "hybrid_fingerprint": False},
)

@patch("sentry.grouping.ingest.seer.metrics.incr")
Expand Down Expand Up @@ -152,6 +159,8 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable(
"project_id": self.project.id,
"stacktrace": new_stacktrace_string,
"exception_type": "FailedToFetchError",
"model": GroupingVersion.V1,
"training_mode": False,
}

assert mock_get_similarity_data.call_count == 2
Expand All @@ -164,7 +173,12 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable(
"referrer": "ingest",
"use_reranking": True,
},
{"platform": "python", "hybrid_fingerprint": False},
{
"platform": "python",
"model_version": "v1",
"training_mode": False,
"hybrid_fingerprint": False,
},
),
# Second call to store the event's data since the match that came back from Seer
# wasn't usable
Expand All @@ -175,7 +189,11 @@ def test_sends_second_seer_request_when_seer_matches_are_unusable(
"referrer": "ingest_follow_up",
"use_reranking": False,
},
{"platform": "python"},
{
"platform": "python",
"model_version": "v1",
"training_mode": False,
},
),
]

Expand Down
19 changes: 17 additions & 2 deletions tests/sentry/grouping/seer_similarity/test_seer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sentry import options
from sentry.grouping.ingest.seer import maybe_check_seer_for_matching_grouphash
from sentry.models.grouphash import GroupHash
from sentry.seer.similarity.types import GroupingVersion
from sentry.seer.similarity.utils import MAX_FRAME_COUNT
from sentry.services.eventstore.models import Event
from sentry.testutils.cases import TestCase
Expand Down Expand Up @@ -60,8 +61,15 @@ def test_simple(self, mock_get_similarity_data: MagicMock) -> None:
"k": 1,
"referrer": "ingest",
"use_reranking": True,
"model": GroupingVersion.V1,
"training_mode": False,
},
{
"platform": "python",
"model_version": "v1",
"training_mode": False,
"hybrid_fingerprint": False,
},
{"platform": "python", "hybrid_fingerprint": False},
)

@patch("sentry.grouping.ingest.seer.record_did_call_seer_metric")
Expand Down Expand Up @@ -182,6 +190,13 @@ def test_too_many_frames_bypassed_platform(self, mock_get_similarity_data: Magic
"k": 1,
"referrer": "ingest",
"use_reranking": True,
"model": GroupingVersion.V1,
"training_mode": False,
},
{
"platform": "python",
"model_version": "v1",
"training_mode": False,
"hybrid_fingerprint": False,
},
{"platform": "python", "hybrid_fingerprint": False},
)
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,8 @@ def test_simple(
"read_only": True,
"referrer": "similar_issues",
"use_reranking": True,
"model": "v1",
"training_mode": False,
"k": 1,
}

Expand Down Expand Up @@ -350,6 +352,8 @@ def test_incomplete_return_data(
"read_only": True,
"referrer": "similar_issues",
"use_reranking": True,
"model": "v1",
"training_mode": False,
},
"raw_similar_issue_data": {
"should_group": True,
Expand Down Expand Up @@ -632,6 +636,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None:
"read_only": True,
"referrer": "similar_issues",
"use_reranking": True,
"model": "v1",
"training_mode": False,
},
),
headers={"content-type": "application/json;charset=utf-8"},
Expand Down Expand Up @@ -660,6 +666,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None:
"read_only": True,
"referrer": "similar_issues",
"use_reranking": True,
"model": "v1",
"training_mode": False,
"k": 1,
},
),
Expand Down Expand Up @@ -691,6 +699,8 @@ def test_no_optional_params(self, mock_seer_request: mock.MagicMock) -> None:
"read_only": True,
"referrer": "similar_issues",
"use_reranking": True,
"model": "v1",
"training_mode": False,
},
),
headers={"content-type": "application/json;charset=utf-8"},
Expand Down
Loading
Loading