Skip to content

Commit

Permalink
feat(similarity): Add delete record by hash task (#72767)
Browse files Browse the repository at this point in the history
Add the task that calls the seer delete record by hash endpoint
Given a list of hashes, batch by size 100
  • Loading branch information
jangjodi authored Jun 26, 2024
1 parent 6c95799 commit 34c5f0b
Show file tree
Hide file tree
Showing 3 changed files with 63 additions and 0 deletions.
6 changes: 6 additions & 0 deletions src/sentry/options/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -935,6 +935,12 @@
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

register(
"embeddings-grouping.seer.delete-record-batch-size",
default=100,
flags=FLAG_ALLOW_EMPTY | FLAG_AUTOMATOR_MODIFIABLE,
)

# ## sentry.killswitches
#
# The following options are documented in sentry.killswitches in more detail
Expand Down
33 changes: 33 additions & 0 deletions src/sentry/tasks/delete_seer_grouping_records_by_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from typing import Any

from sentry import options
from sentry.seer.similarity.grouping_records import delete_grouping_records_by_hash
from sentry.silo.base import SiloMode
from sentry.tasks.base import instrumented_task


@instrumented_task(
name="sentry.tasks.delete_seer_grouping_records_by_hash",
queue="delete_seer_grouping_records_by_hash",
max_retries=0,
silo_mode=SiloMode.REGION,
)
def delete_seer_grouping_records_by_hash(
project_id: int,
hashes: list[str],
last_deleted_index: int = 0,
soft_time_limit=60 * 15,
time_limit=60 * (15 + 5),
*args: Any,
**kwargs: Any,
) -> None:
"""
Task to delete seer grouping records by hash list.
Calls the seer delete by hash endpoint with batches of hashes of size `BATCH_SIZE`.
"""
batch_size = options.get("embeddings-grouping.seer.delete-record-batch-size")
len_hashes = len(hashes)
end_index = min(last_deleted_index + batch_size, len_hashes)
delete_grouping_records_by_hash(project_id, hashes[last_deleted_index:end_index])
if end_index < len_hashes:
delete_seer_grouping_records_by_hash.apply_async(args=[project_id, hashes, end_index])
24 changes: 24 additions & 0 deletions tests/sentry/tasks/test_delete_seer_grouping_records_by_hash.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from unittest.mock import patch

from sentry.tasks.delete_seer_grouping_records_by_hash import delete_seer_grouping_records_by_hash
from sentry.testutils.pytest.fixtures import django_db_all


@django_db_all
@patch("sentry.tasks.delete_seer_grouping_records_by_hash.delete_grouping_records_by_hash")
@patch(
"sentry.tasks.delete_seer_grouping_records_by_hash.delete_seer_grouping_records_by_hash.apply_async"
)
def test_delete_seer_grouping_records_by_hash_batches(
mock_delete_seer_grouping_records_by_hash_apply_async, mock_delete_grouping_records_by_hash
):
"""
Test that when delete_seer_grouping_records_by_hash is called with over 20 hashes, it spawns
another task with the end index of the previous batch.
"""
mock_delete_grouping_records_by_hash.return_value = True
project_id, hashes = 1, [str(i) for i in range(101)]
delete_seer_grouping_records_by_hash(project_id, hashes, 0)
assert mock_delete_seer_grouping_records_by_hash_apply_async.call_args[1] == {
"args": [project_id, hashes, 100]
}

0 comments on commit 34c5f0b

Please sign in to comment.