From ae6dd8677778c7efd630872cf809cf750a4babf4 Mon Sep 17 00:00:00 2001
From: "seer-by-sentry[bot]"
 <157164994+seer-by-sentry[bot]@users.noreply.github.com>
Date: Tue, 11 Nov 2025 18:18:37 +0000
Subject: [PATCH 1/4] fix: Batch commit file change queries to avoid timeouts

---
 src/sentry/utils/committers.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/src/sentry/utils/committers.py b/src/sentry/utils/committers.py
index afdb025ba46dc6..f9bbe01af2373c 100644
--- a/src/sentry/utils/committers.py
+++ b/src/sentry/utils/committers.py
@@ -26,6 +26,9 @@
 from sentry.utils.hashlib import hash_values
 
 PATH_SEPARATORS = frozenset(["/", "\\"])
+# Limit the number of commits to batch in a single query to avoid query timeouts
+# from large IN clauses combined with complex LIKE conditions
+COMMIT_BATCH_SIZE = 50
 
 
 def tokenize_path(path: str) -> Iterator[str]:
@@ -96,11 +99,19 @@ def _get_commit_file_changes(
     # build a single query to get all of the commit file that might match the first n frames
     path_query = reduce(operator.or_, (Q(filename__iendswith=path) for path in filenames))
 
-    commit_file_change_matches = CommitFileChange.objects.filter(
-        path_query, commit_id__in=[c.id for c in commits]
-    )
+    # Batch commits to avoid query timeouts from large IN clauses
+    # combined with complex LIKE conditions
+    all_file_changes: list[CommitFileChange] = []
+    commit_ids = [c.id for c in commits]
+    
+    for i in range(0, len(commit_ids), COMMIT_BATCH_SIZE):
+        batch_commit_ids = commit_ids[i:i + COMMIT_BATCH_SIZE]
+        commit_file_change_matches = CommitFileChange.objects.filter(
+            path_query, commit_id__in=batch_commit_ids
+        )
+        all_file_changes.extend(list(commit_file_change_matches))
 
-    return list(commit_file_change_matches)
+    return all_file_changes
 
 
 def _match_commits_paths(

From 8b3d8dc951db8b5e012e1cef4bf892aee383dfd8 Mon Sep 17 00:00:00 2001
From: "getsantry[bot]" <66042841+getsantry[bot]@users.noreply.github.com>
Date: Tue, 11 Nov 2025 18:19:39 +0000
Subject: [PATCH 2/4] :hammer_and_wrench: apply pre-commit fixes

---
 src/sentry/utils/committers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sentry/utils/committers.py b/src/sentry/utils/committers.py
index f9bbe01af2373c..c6ca2ad40c9dd4 100644
--- a/src/sentry/utils/committers.py
+++ b/src/sentry/utils/committers.py
@@ -103,9 +103,9 @@ def _get_commit_file_changes(
     # combined with complex LIKE conditions
     all_file_changes: list[CommitFileChange] = []
     commit_ids = [c.id for c in commits]
-    
+
     for i in range(0, len(commit_ids), COMMIT_BATCH_SIZE):
-        batch_commit_ids = commit_ids[i:i + COMMIT_BATCH_SIZE]
+        batch_commit_ids = commit_ids[i : i + COMMIT_BATCH_SIZE]
         commit_file_change_matches = CommitFileChange.objects.filter(
             path_query, commit_id__in=batch_commit_ids
         )

From a7d17ba16f52da29135c00deba78cf9704b08121 Mon Sep 17 00:00:00 2001
From: Nora Shapiro <nora.shapiro@sentry.io>
Date: Tue, 11 Nov 2025 15:40:06 -0800
Subject: [PATCH 3/4] Optimize CommitFileChange query to prevent timeouts

---
 src/sentry/utils/committers.py | 47 +++++++++++++++++++++-------------
 1 file changed, 29 insertions(+), 18 deletions(-)

diff --git a/src/sentry/utils/committers.py b/src/sentry/utils/committers.py
index c6ca2ad40c9dd4..cd217de588bb0e 100644
--- a/src/sentry/utils/committers.py
+++ b/src/sentry/utils/committers.py
@@ -4,11 +4,9 @@
 from collections import defaultdict
 from collections.abc import Iterator, Mapping, MutableMapping, Sequence
 from enum import Enum
-from functools import reduce
 from typing import Any, TypedDict
 
 from django.core.cache import cache
-from django.db.models import Q
 
 from sentry.api.serializers import serialize
 from sentry.api.serializers.models.commit import CommitSerializer, get_users_for_commits
@@ -24,6 +22,7 @@
 from sentry.users.services.user.service import user_service
 from sentry.utils.event_frames import find_stack_frames, munged_filename_and_frames
 from sentry.utils.hashlib import hash_values
+from sentry.utils.iterators import chunked
 
 PATH_SEPARATORS = frozenset(["/", "\\"])
 # Limit the number of commits to batch in a single query to avoid query timeouts
@@ -90,28 +89,40 @@ def _get_commits(releases: Sequence[Release]) -> Sequence[Commit]:
 def _get_commit_file_changes(
     commits: Sequence[Commit], path_name_set: set[str]
 ) -> Sequence[CommitFileChange]:
-    # Get distinct file names and bail if there are no files.
-    filenames = {next(tokenize_path(path), None) for path in path_name_set}
-    filenames = {path for path in filenames if path is not None}
+    """
+    Find CommitFileChanges matching file paths in path_name_set.
+    Batches queries and deduplicates results across multiple filename matches.
+    """
+    filenames: set[str] = {
+        path for path in (next(tokenize_path(p), None) for p in path_name_set) if path is not None
+    }
     if not len(filenames):
         return []
+    if not commits:
+        return []
 
-    # build a single query to get all of the commit file that might match the first n frames
-    path_query = reduce(operator.or_, (Q(filename__iendswith=path) for path in filenames))
-
-    # Batch commits to avoid query timeouts from large IN clauses
-    # combined with complex LIKE conditions
-    all_file_changes: list[CommitFileChange] = []
     commit_ids = [c.id for c in commits]
 
-    for i in range(0, len(commit_ids), COMMIT_BATCH_SIZE):
-        batch_commit_ids = commit_ids[i : i + COMMIT_BATCH_SIZE]
-        commit_file_change_matches = CommitFileChange.objects.filter(
-            path_query, commit_id__in=batch_commit_ids
-        )
-        all_file_changes.extend(list(commit_file_change_matches))
+    # Collect unique CommitFileChange IDs
+    matching_ids: set[int] = set()
+
+    # Optimization 1: Batch commit IDs with chunked() to prevent huge IN clauses
+    for commit_batch in chunked(commit_ids, 100):
+        # Optimization 2: Split filename queries to eliminate OR conditions
+        for filename in filenames:
+            # Optimization 3 (Experimental): separate filter calls to hint optimizer to use indexes first
+            matches = CommitFileChange.objects.filter(commit_id__in=commit_batch).filter(
+                filename__iendswith=filename
+            )
+
+            # Collect IDs and deduplicate with set operations
+            matching_ids.update(matches.values_list("id", flat=True))
+
+    if not matching_ids:
+        return []
 
-    return all_file_changes
+    # Single bulk fetch of unique results, ordered by ID
+    return list(CommitFileChange.objects.filter(id__in=matching_ids).order_by("id"))
 
 
 def _match_commits_paths(

From 0081761e81db71adc082765d4f9462d0271be890 Mon Sep 17 00:00:00 2001
From: Nora Shapiro <nora.shapiro@sentry.io>
Date: Tue, 11 Nov 2025 15:55:11 -0800
Subject: [PATCH 4/4] no magic numbers

---
 src/sentry/utils/committers.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/sentry/utils/committers.py b/src/sentry/utils/committers.py
index cd217de588bb0e..1ff7d5347dcada 100644
--- a/src/sentry/utils/committers.py
+++ b/src/sentry/utils/committers.py
@@ -27,7 +27,7 @@
 PATH_SEPARATORS = frozenset(["/", "\\"])
 # Limit the number of commits to batch in a single query to avoid query timeouts
 # from large IN clauses combined with complex LIKE conditions
-COMMIT_BATCH_SIZE = 50
+COMMIT_BATCH_SIZE = 100
 
 
 def tokenize_path(path: str) -> Iterator[str]:
@@ -107,7 +107,7 @@ def _get_commit_file_changes(
     matching_ids: set[int] = set()
 
     # Optimization 1: Batch commit IDs with chunked() to prevent huge IN clauses
-    for commit_batch in chunked(commit_ids, 100):
+    for commit_batch in chunked(commit_ids, COMMIT_BATCH_SIZE):
         # Optimization 2: Split filename queries to eliminate OR conditions
         for filename in filenames:
             # Optimization 3 (Experimental): separate filter calls to hint optimizer to use indexes first