From 3519835b296e8eb478cb84892a8f8dac80ad70ca Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Thu, 2 Oct 2025 11:16:07 -0700 Subject: [PATCH 1/3] chore(cleanup): Add File cleanup task Currently only cleans up old orphaned release type Files, which basically had their ReleaseFile deleted but stayed hanging. We are trusting the `type` field here to differentiate - release type File should not be on their own and are 1:1 with ReleaseFile --- src/sentry/deletions/defaults/file.py | 45 ++++++++ tests/sentry/deletions/test_file.py | 157 ++++++++++++++++++++++++++ 2 files changed, 202 insertions(+) create mode 100644 src/sentry/deletions/defaults/file.py create mode 100644 tests/sentry/deletions/test_file.py diff --git a/src/sentry/deletions/defaults/file.py b/src/sentry/deletions/defaults/file.py new file mode 100644 index 00000000000000..44a6671a40e307 --- /dev/null +++ b/src/sentry/deletions/defaults/file.py @@ -0,0 +1,45 @@ +from datetime import datetime, timedelta, timezone + +from django.db.models import Q + +from sentry.deletions.base import BaseRelation, ModelDeletionTask +from sentry.models.files.file import File + + +class FileDeletionTask(ModelDeletionTask[File]): + def get_query_filter(self) -> Q: + """ + Returns a Q object that filters for orphaned release-type Files. + Only targets Files that are: + 1. Of release-related types (release.file, release.artifact-index) + 2. Have no corresponding ReleaseFile entry + 3. Are older than 90 days + """ + from django.db.models import Exists, OuterRef + + from sentry.models.releasefile import ReleaseFile + + cutoff = datetime.now(timezone.utc) - timedelta(days=90) + + # Subquery for checking if ReleaseFile references this File + releasefile_exists = Exists(ReleaseFile.objects.filter(file_id=OuterRef("id"))) + + return Q( + Q( + type__in=["release.file", "release.artifact-index"], + timestamp__lt=cutoff, + ) + & ~releasefile_exists + ) + + def get_child_relations(self, instance: File) -> list[BaseRelation]: + from sentry.models.files.fileblobindex import FileBlobIndex + + # Delete the associated FileBlobIndex records + # The FileBlob cleanup is handled by a separate task + return [ + BaseRelation( + params={"model": FileBlobIndex, "query": {"file_id": instance.id}}, + task=None, # Use BulkModelDeletionTask + ), + ] diff --git a/tests/sentry/deletions/test_file.py b/tests/sentry/deletions/test_file.py new file mode 100644 index 00000000000000..1981a5373b8284 --- /dev/null +++ b/tests/sentry/deletions/test_file.py @@ -0,0 +1,157 @@ +from datetime import timedelta + +from django.utils import timezone + +from sentry.deletions.defaults.file import FileDeletionTask +from sentry.models.files.file import File +from sentry.models.files.fileblobindex import FileBlobIndex +from sentry.models.releasefile import ReleaseFile +from sentry.testutils.cases import TestCase + + +class FileDeletionTaskTest(TestCase): + def test_get_query_filter_orphaned_release_file(self): + """Test that orphaned release.file type Files are selected for deletion""" + project = self.create_project() + self.create_release(project=project) + + # Create an orphaned release.file (no ReleaseFile pointing to it) + old_timestamp = timezone.now() - timedelta(days=91) + orphaned_file = File.objects.create( + name="orphaned.js", + type="release.file", + timestamp=old_timestamp, + ) + + # Get the deletion task and query filter + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + query_filter = task.get_query_filter() + + # Apply the filter to get Files that should be deleted + files_to_delete = File.objects.filter(query_filter) + + assert orphaned_file in files_to_delete + + def test_get_query_filter_does_not_select_referenced_file(self): + """Test that Files referenced by ReleaseFile are NOT selected for deletion""" + project = self.create_project() + release = self.create_release(project=project) + + # Create a File and ReleaseFile pointing to it + old_timestamp = timezone.now() - timedelta(days=91) + referenced_file = File.objects.create( + name="referenced.js", + type="release.file", + timestamp=old_timestamp, + ) + ReleaseFile.objects.create( + organization_id=project.organization_id, + release_id=release.id, + file=referenced_file, + name="referenced.js", + ident="abc123", + ) + + # Get the deletion task and query filter + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + query_filter = task.get_query_filter() + + # Apply the filter + files_to_delete = File.objects.filter(query_filter) + + assert referenced_file not in files_to_delete + + def test_get_query_filter_does_not_select_recent_files(self): + """Test that recent Files are NOT selected even if orphaned""" + # Create an orphaned file but with recent timestamp + recent_file = File.objects.create( + name="recent.js", + type="release.file", + timestamp=timezone.now() - timedelta(days=30), # Only 30 days old + ) + + # Get the deletion task and query filter + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + query_filter = task.get_query_filter() + + # Apply the filter + files_to_delete = File.objects.filter(query_filter) + + assert recent_file not in files_to_delete + + def test_get_query_filter_artifact_index_files(self): + """Test that orphaned release.artifact-index Files are selected""" + old_timestamp = timezone.now() - timedelta(days=91) + orphaned_index = File.objects.create( + name="artifact-index.json", + type="release.artifact-index", + timestamp=old_timestamp, + ) + + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + query_filter = task.get_query_filter() + files_to_delete = File.objects.filter(query_filter) + + assert orphaned_index in files_to_delete + + def test_get_query_filter_does_not_select_other_file_types(self): + """Test that non-release file types are NOT selected""" + old_timestamp = timezone.now() - timedelta(days=91) + + # Create files with different types + artifact_bundle_file = File.objects.create( + name="bundle.zip", + type="artifact.bundle", + timestamp=old_timestamp, + ) + debug_file = File.objects.create( + name="debug.sym", + type="debug.file", + timestamp=old_timestamp, + ) + + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + query_filter = task.get_query_filter() + files_to_delete = File.objects.filter(query_filter) + + assert artifact_bundle_file not in files_to_delete + assert debug_file not in files_to_delete + + def test_get_child_relations(self): + """Test that FileBlobIndex records are returned as child relations""" + file = File.objects.create( + name="test.js", + type="release.file", + ) + + task = FileDeletionTask( + manager=None, # type: ignore[arg-type] + model=File, + query={}, + ) + child_relations = task.get_child_relations(file) + + # Should have one relation for FileBlobIndex + assert len(child_relations) == 1 + assert child_relations[0].params["model"] == FileBlobIndex + assert child_relations[0].params["query"] == {"file_id": file.id} From a8507052fa60cd9c18ada015334dd9b562d38549 Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Thu, 2 Oct 2025 12:04:59 -0700 Subject: [PATCH 2/3] typing fix --- tests/sentry/deletions/test_file.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/sentry/deletions/test_file.py b/tests/sentry/deletions/test_file.py index 1981a5373b8284..2036354c548a44 100644 --- a/tests/sentry/deletions/test_file.py +++ b/tests/sentry/deletions/test_file.py @@ -10,7 +10,7 @@ class FileDeletionTaskTest(TestCase): - def test_get_query_filter_orphaned_release_file(self): + def test_get_query_filter_orphaned_release_file(self) -> None: """Test that orphaned release.file type Files are selected for deletion""" project = self.create_project() self.create_release(project=project) @@ -36,7 +36,7 @@ def test_get_query_filter_orphaned_release_file(self): assert orphaned_file in files_to_delete - def test_get_query_filter_does_not_select_referenced_file(self): + def test_get_query_filter_does_not_select_referenced_file(self) -> None: """Test that Files referenced by ReleaseFile are NOT selected for deletion""" project = self.create_project() release = self.create_release(project=project) @@ -69,7 +69,7 @@ def test_get_query_filter_does_not_select_referenced_file(self): assert referenced_file not in files_to_delete - def test_get_query_filter_does_not_select_recent_files(self): + def test_get_query_filter_does_not_select_recent_files(self) -> None: """Test that recent Files are NOT selected even if orphaned""" # Create an orphaned file but with recent timestamp recent_file = File.objects.create( @@ -91,7 +91,7 @@ def test_get_query_filter_does_not_select_recent_files(self): assert recent_file not in files_to_delete - def test_get_query_filter_artifact_index_files(self): + def test_get_query_filter_artifact_index_files(self) -> None: """Test that orphaned release.artifact-index Files are selected""" old_timestamp = timezone.now() - timedelta(days=91) orphaned_index = File.objects.create( @@ -110,7 +110,7 @@ def test_get_query_filter_artifact_index_files(self): assert orphaned_index in files_to_delete - def test_get_query_filter_does_not_select_other_file_types(self): + def test_get_query_filter_does_not_select_other_file_types(self) -> None: """Test that non-release file types are NOT selected""" old_timestamp = timezone.now() - timedelta(days=91) @@ -137,7 +137,7 @@ def test_get_query_filter_does_not_select_other_file_types(self): assert artifact_bundle_file not in files_to_delete assert debug_file not in files_to_delete - def test_get_child_relations(self): + def test_get_child_relations(self) -> None: """Test that FileBlobIndex records are returned as child relations""" file = File.objects.create( name="test.js", From 520a9903f5dbed082436985b3379d651a93eb433 Mon Sep 17 00:00:00 2001 From: Yuval Mandelboum Date: Thu, 2 Oct 2025 14:56:59 -0700 Subject: [PATCH 3/3] fix cutoff time calc --- src/sentry/deletions/defaults/file.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/sentry/deletions/defaults/file.py b/src/sentry/deletions/defaults/file.py index 44a6671a40e307..7a867c511f4974 100644 --- a/src/sentry/deletions/defaults/file.py +++ b/src/sentry/deletions/defaults/file.py @@ -1,6 +1,7 @@ -from datetime import datetime, timedelta, timezone +from datetime import timedelta from django.db.models import Q +from django.utils import timezone from sentry.deletions.base import BaseRelation, ModelDeletionTask from sentry.models.files.file import File @@ -19,7 +20,7 @@ def get_query_filter(self) -> Q: from sentry.models.releasefile import ReleaseFile - cutoff = datetime.now(timezone.utc) - timedelta(days=90) + cutoff = timezone.now() - timedelta(days=90) # Subquery for checking if ReleaseFile references this File releasefile_exists = Exists(ReleaseFile.objects.filter(file_id=OuterRef("id"))) @@ -35,8 +36,6 @@ def get_query_filter(self) -> Q: def get_child_relations(self, instance: File) -> list[BaseRelation]: from sentry.models.files.fileblobindex import FileBlobIndex - # Delete the associated FileBlobIndex records - # The FileBlob cleanup is handled by a separate task return [ BaseRelation( params={"model": FileBlobIndex, "query": {"file_id": instance.id}},