Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,21 @@ def post(self, request: Request, project) -> Response:

last_processed_id = None
only_delete = False
enable_ingestion = False

if request.data.get("last_processed_id"):
last_processed_id = int(request.data["last_processed_id"])

if request.data.get("only_delete"):
only_delete = True

if request.data.get("enable_ingestion"):
enable_ingestion = request.data["enable_ingestion"] == "true"

backfill_seer_grouping_records_for_project.delay(
current_project_id=project.id,
last_processed_group_id_input=last_processed_id,
only_delete=only_delete,
enable_ingestion=enable_ingestion,
)
return Response(status=204)
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,8 @@ def backfill_seer_grouping_records_for_project(
last_processed_group_id_input: int | None,
cohort: str | list[int] | None = None,
last_processed_project_index_input: int | None = None,
only_delete=False,
only_delete: bool = False,
enable_ingestion: bool = False,
*args: Any,
**kwargs: Any,
) -> None:
Expand Down Expand Up @@ -105,6 +106,7 @@ def backfill_seer_grouping_records_for_project(
last_processed_project_index=last_processed_project_index_input,
cohort=cohort,
only_delete=only_delete,
enable_ingestion=enable_ingestion,
)
return

Expand All @@ -121,13 +123,14 @@ def backfill_seer_grouping_records_for_project(
last_processed_project_index=last_processed_project_index,
cohort=cohort,
only_delete=only_delete,
enable_ingestion=enable_ingestion,
)
return

batch_size = options.get("embeddings-grouping.seer.backfill-batch-size")

(groups_to_backfill_with_no_embedding, batch_end_id) = get_current_batch_groups_from_postgres(
project, last_processed_group_id, batch_size
project, last_processed_group_id, batch_size, enable_ingestion
)

if len(groups_to_backfill_with_no_embedding) == 0:
Expand All @@ -137,6 +140,7 @@ def backfill_seer_grouping_records_for_project(
redis_client=redis_client,
last_processed_project_index=last_processed_project_index,
cohort=cohort,
enable_ingestion=enable_ingestion,
)
return

Expand All @@ -154,6 +158,7 @@ def backfill_seer_grouping_records_for_project(
redis_client=redis_client,
last_processed_project_index=last_processed_project_index,
cohort=cohort,
enable_ingestion=enable_ingestion,
)
return

Expand All @@ -167,6 +172,7 @@ def backfill_seer_grouping_records_for_project(
redis_client=redis_client,
last_processed_project_index=last_processed_project_index,
cohort=cohort,
enable_ingestion=enable_ingestion,
)
return

Expand Down Expand Up @@ -217,6 +223,7 @@ def backfill_seer_grouping_records_for_project(
redis_client=redis_client,
last_processed_project_index=last_processed_project_index,
cohort=cohort,
enable_ingestion=enable_ingestion,
)


Expand All @@ -228,6 +235,7 @@ def call_next_backfill(
last_processed_project_index: int,
cohort: str | list[int] | None = None,
only_delete: bool = False,
enable_ingestion: bool = False,
):
if last_processed_group_id is not None:
redis_client.set(
Expand All @@ -249,6 +257,7 @@ def call_next_backfill(
cohort,
last_processed_project_index,
only_delete,
enable_ingestion,
],
headers={"sentry-propagate-traces": False},
)
Expand Down Expand Up @@ -295,6 +304,7 @@ def call_next_backfill(
cohort,
last_processed_project_index,
only_delete,
enable_ingestion,
],
headers={"sentry-propagate-traces": False},
)
11 changes: 10 additions & 1 deletion src/sentry/tasks/embeddings_grouping/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,9 @@ def initialize_backfill(


@sentry_sdk.tracing.trace
def get_current_batch_groups_from_postgres(project, last_processed_group_id, batch_size):
def get_current_batch_groups_from_postgres(
project, last_processed_group_id, batch_size, enable_ingestion: bool = False
):
group_id_filter = Q()
if last_processed_group_id is not None:
group_id_filter = Q(id__lt=last_processed_group_id)
Expand Down Expand Up @@ -174,6 +176,13 @@ def get_current_batch_groups_from_postgres(project, last_processed_group_id, bat
"backfill_seer_grouping_records.no_more_groups",
extra={"project_id": project.id},
)
if enable_ingestion:
logger.info(
"backfill_seer_grouping_records.enable_ingestion",
extra={"project_id": project.id},
)
project.update_option("sentry:similarity_backfill_completed", int(time.time()))

return (
groups_to_backfill_batch,
None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ def test_post_success_no_last_processed_id(
current_project_id=self.project.id,
last_processed_group_id_input=None,
only_delete=False,
enable_ingestion=False,
)

@patch(
Expand All @@ -66,6 +67,7 @@ def test_post_success_no_last_processed_id_single_org(
current_project_id=self.project.id,
last_processed_group_id_input=None,
only_delete=False,
enable_ingestion=False,
)

@patch(
Expand All @@ -85,6 +87,7 @@ def test_post_success_last_processed_id(
current_project_id=self.project.id,
last_processed_group_id_input=8,
only_delete=False,
enable_ingestion=False,
)

@patch(
Expand All @@ -106,4 +109,27 @@ def test_post_success_only_delete(
current_project_id=self.project.id,
last_processed_group_id_input=8,
only_delete=True,
enable_ingestion=False,
)

@patch(
"sentry.api.endpoints.project_backfill_similar_issues_embeddings_records.is_active_superuser",
return_value=True,
)
@patch(
"sentry.api.endpoints.project_backfill_similar_issues_embeddings_records.backfill_seer_grouping_records_for_project.delay"
)
@with_feature("projects:similarity-embeddings-backfill")
def test_post_success_enable_ingestion(
self, mock_backfill_seer_grouping_records, mock_is_active_superuser
):
response = self.client.post(
self.url, data={"last_processed_id": "8", "enable_ingestion": "true"}
)
assert response.status_code == 204, response.content
mock_backfill_seer_grouping_records.assert_called_with(
current_project_id=self.project.id,
last_processed_group_id_input=8,
only_delete=False,
enable_ingestion=True,
)
52 changes: 52 additions & 0 deletions tests/sentry/tasks/test_backfill_seer_grouping_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -1486,6 +1486,7 @@ def test_backfill_seer_grouping_records_empty_nodestore(
redis_client=ANY,
last_processed_project_index=0,
cohort=None,
enable_ingestion=False,
)

@with_feature("projects:similarity-embeddings-backfill")
Expand Down Expand Up @@ -1534,3 +1535,54 @@ def test_backfill_seer_grouping_records_killswitch_enabled(self, mock_logger):
mock_logger.info.assert_called_with(
"backfill_seer_grouping_records.killswitch_enabled",
)

@with_feature("projects:similarity-embeddings-backfill")
@patch("sentry.tasks.embeddings_grouping.utils.logger")
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
def test_backfill_seer_grouping_records_enable_ingestion(
self, mock_post_bulk_grouping_records, mock_logger
):
"""
Test that when the enable_ingestion flag is True, the project option is set and the
log is called.
"""
mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}

with TaskRunner():
backfill_seer_grouping_records_for_project(self.project.id, None, enable_ingestion=True)

groups = Group.objects.filter(project_id=self.project.id)
for group in groups:
assert group.data["metadata"].get("seer_similarity") == {
"similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
"request_hash": self.group_hashes[group.id],
}

mock_logger.info.assert_called_with(
"backfill_seer_grouping_records.enable_ingestion",
extra={"project_id": self.project.id},
)
assert self.project.get_option("sentry:similarity_backfill_completed") is not None

@with_feature("projects:similarity-embeddings-backfill")
@patch("sentry.tasks.embeddings_grouping.utils.logger")
@patch("sentry.tasks.embeddings_grouping.utils.post_bulk_grouping_records")
def test_backfill_seer_grouping_records_no_enable_ingestion(
self, mock_post_bulk_grouping_records, mock_logger
):
"""
Test that when the enable_ingestion flag is False, the project option is not set.
"""
mock_post_bulk_grouping_records.return_value = {"success": True, "groups_with_neighbor": {}}

with TaskRunner():
backfill_seer_grouping_records_for_project(self.project.id, None)

groups = Group.objects.filter(project_id=self.project.id)
for group in groups:
assert group.data["metadata"].get("seer_similarity") == {
"similarity_model_version": SEER_SIMILARITY_MODEL_VERSION,
"request_hash": self.group_hashes[group.id],
}

assert self.project.get_option("sentry:similarity_backfill_completed") is None