Fix fuzz_repo False Positive With a Refactor to Improve Effectivene…

…ss & Efficiency (#1317) Sorry for the churn on this test; filesystem I/O makes for slow feedback when trying to test locally. --- Anyway, the issues listed below are addressed in this PR by simplifying the test harness implementation. The result being significantly improved fuzzing coverage and slightly improved execution speed. Prior to the changes introduced here, the implementation of `fuzz_repo` had several issues: 1. `repo.stage()` was not called before the first `repo.do_commit()` call 2. When `repo.stage()` was eventually called, the argument it was passed was incorrect (a list of absolute paths instead of relative to the repo dir) causing a `ValueError` that broke the fuzzer runs. This was hidden during the initial local testing because of point 3 below. 3. Inefficient consumption of the fuzzer provided data resulted in the input bytes being exhausted early in the `TestOneInput` execution until the fuzzer was able to generate a corpus large enough to satisfy all of the `Consume*` calls. Other changes: - `EnhancedFuzzedDataProvider.ConsumeRandomString` now accepts an argument to optionally exclude unicode surrogates from the returned string (useful for places where they will always raise an exception like most filesystem operations.) - Adds new fuzzing engine recommended dictionary entries (now that the test is effective enough to generate recommendations.) Closes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=69054
jelmer · May 16, 2024 · ac01c63 · ac01c63
2 parents 2f7229b + 6d41365
commit ac01c63
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 19 deletions.
diff --git a/fuzzing/dictionaries/fuzz_repo.dict b/fuzzing/dictionaries/fuzz_repo.dict
@@ -1,2 +1,5 @@
 "{self.size}"
 "win32"
+"{self.extended_flags}"
+"P\\303\\000\\000\\000\\000\\000\\000"
+"\\377\\377\\377\\377"
diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py
@@ -21,16 +21,21 @@ def TestOneInput(data):
         repo.set_description(fdp.ConsumeRandomBytes())
         repo.get_description()
 
-        # Generate a minimal set of files based on fuzz data to minimize I/O operations.
-        file_paths = [
-            os.path.join(temp_dir, f"File{i}")
-            for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))
-        ]
-        for file_path in file_paths:
-            with open(file_path, "wb") as f:
-                f.write(fdp.ConsumeRandomBytes())
+        try:
+            # Generate a minimal set of files based on fuzz data to minimize I/O operations.
+            file_names = [
+                f"File{i}{fdp.ConsumeRandomString(without_surrogates=True)}"
+                for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))
+            ]
+            for file in file_names:
+                with open(os.path.join(temp_dir, file), "wb") as f:
+                    f.write(fdp.ConsumeRandomBytes())
+        except (ValueError, OSError):
+            # Exit early if the fuzzer generates an invalid filename.
+            return -1
 
         try:
+            repo.stage(file_names)
             repo.do_commit(
                 message=fdp.ConsumeRandomBytes(),
                 committer=fdp.ConsumeRandomBytes(),
@@ -45,15 +50,8 @@ def TestOneInput(data):
         except ValueError as e:
             if is_expected_exception(["Unable to handle non-minute offset"], e):
                 return -1
-
-        for file_path in file_paths:
-            with open(file_path, "wb") as f:
-                f.write(fdp.ConsumeRandomBytes())
-
-        repo.stage(file_paths)
-        repo.do_commit(
-            message=fdp.ConsumeRandomBytes(),
-        )
+            else:
+                raise e
 
 
 def main():

diff --git a/fuzzing/fuzz-targets/test_utils.py b/fuzzing/fuzz-targets/test_utils.py
@@ -57,11 +57,12 @@ def ConsumeRandomBytes(self, max_length=None) -> bytes:
 
         return self.ConsumeBytes(self.ConsumeIntInRange(0, max_length))
 
-    def ConsumeRandomString(self, max_length=None) -> str:
+    def ConsumeRandomString(self, max_length=None, without_surrogates=False) -> str:
         """Consume bytes to produce a Unicode string.
 
         Args:
           max_length (int, optional): The maximum length of the string. Defaults to the number of remaining bytes.
+          without_surrogates (bool, optional): If True, never generate surrogate pair characters. Defaults to False.
 
         Returns:
          str: A Unicode string.
@@ -71,7 +72,12 @@ def ConsumeRandomString(self, max_length=None) -> str:
         else:
             max_length = min(max_length, self.remaining_bytes())
 
-        return self.ConsumeUnicode(self.ConsumeIntInRange(0, max_length))
+        count = self.ConsumeIntInRange(0, max_length)
+
+        if without_surrogates:
+            return self.ConsumeUnicodeNoSurrogates(count)
+        else:
+            return self.ConsumeUnicode(count)
 
     def ConsumeRandomInt(self, minimum=0, maximum=1234567890) -> int:
         """Consume bytes to produce an integer.