From 1445c385ec592593bf5bebae8210c7e4b41849ee Mon Sep 17 00:00:00 2001
From: "codeflash-ai[bot]"
 <148906541+codeflash-ai[bot]@users.noreply.github.com>
Date: Fri, 10 Oct 2025 18:29:34 +0000
Subject: [PATCH 1/2] Optimize TestsCache.compute_file_hash

The optimized code achieves a 52% speedup by replacing the traditional file reading approach with a more efficient buffered I/O pattern using `readinto()` and `memoryview`.

**Key optimizations:**

1. **Pre-allocated buffer with `readinto()`**: Instead of `f.read(8192)` which allocates a new bytes object on each iteration, the code uses a single `bytearray(8192)` buffer and reads data directly into it with `f.readinto(mv)`. This eliminates repeated memory allocations.

2. **Memory view for zero-copy slicing**: The `memoryview(buf)` allows efficient slicing (`mv[:n]`) without copying data, reducing memory overhead when updating the hash with partial buffers.

3. **Direct `open()` with unbuffered I/O**: Using `open(path, "rb", buffering=0)` instead of `Path(path).open("rb")` avoids the Path object overhead and disables Python's internal buffering to prevent double-buffering since we're managing our own buffer.

**Performance impact**: The line profiler shows the critical file opening operation dropped from 83.4% to 62.2% of total time, while the new buffer operations (`readinto`, `memoryview`) are very efficient. This optimization is particularly effective for medium to large files where the reduced memory allocation overhead compounds across multiple read operations.

**Best use cases**: This optimization excels when computing hashes for files larger than the 8KB buffer size, where the memory allocation savings become significant, and when called frequently in batch operations.
---
 codeflash/discovery/discover_unit_tests.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index 21e1b71f8..3fe74246f 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -160,12 +160,14 @@ def get_function_to_test_map_for_file(
     @staticmethod
     def compute_file_hash(path: str | Path) -> str:
         h = hashlib.sha256(usedforsecurity=False)
-        with Path(path).open("rb") as f:
+        with open(path, "rb", buffering=0) as f:
+            buf = bytearray(8192)
+            mv = memoryview(buf)
             while True:
-                chunk = f.read(8192)
-                if not chunk:
+                n = f.readinto(mv)
+                if n == 0:
                     break
-                h.update(chunk)
+                h.update(mv[:n])
         return h.hexdigest()
 
     def close(self) -> None:

From c3e2ec28500ea11b1d8879115a66a4c31164f2ae Mon Sep 17 00:00:00 2001
From: Kevin Turcios <turcioskevinr@gmail.com>
Date: Fri, 10 Oct 2025 13:23:13 -0700
Subject: [PATCH 2/2] it's a pathy objectey

---
 codeflash/discovery/discover_unit_tests.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/codeflash/discovery/discover_unit_tests.py b/codeflash/discovery/discover_unit_tests.py
index 3fe74246f..46c18b22d 100644
--- a/codeflash/discovery/discover_unit_tests.py
+++ b/codeflash/discovery/discover_unit_tests.py
@@ -158,9 +158,9 @@ def get_function_to_test_map_for_file(
         return result
 
     @staticmethod
-    def compute_file_hash(path: str | Path) -> str:
+    def compute_file_hash(path: Path) -> str:
         h = hashlib.sha256(usedforsecurity=False)
-        with open(path, "rb", buffering=0) as f:
+        with path.open("rb", buffering=0) as f:
             buf = bytearray(8192)
             mv = memoryview(buf)
             while True: