Fix a potential security exploit in RaggedArray.loads(buffer, ldtype=…

…np.uint64). When parsing binary data with ldtype=np.uint64, if a length is sufficiently close to 2^64 that it causes the parsing buffer pointer (starting from &buffer) to overflow, then arbitrary memory owned by the current process and whose address is < &buffer can be read. Should the overflow map the pointer to a location not owned by the current process, a segfault will occur. Overflow is now caught and raised as a ValueError() (like in any other case where the parsing fails).
bwoodsend · May 22, 2022 · 1a15fad · 1a15fad
1 parent a971948
commit 1a15fad
Show file tree

Hide file tree

Showing 3 changed files with 47 additions and 2 deletions.
diff --git a/rockhopper/src/ragged_array.c b/rockhopper/src/ragged_array.c
@@ -59,8 +59,9 @@ int count_rows(void * raw, int raw_length, int length_power, int big_endian,
 
   int rows = 0;
 
+  void * start = raw;
   void * end = raw + raw_length;
-  while (raw <= end - (1 << length_power)) {
+  while (raw <= end - (1 << length_power) && raw >= start) {
     uint64_t length = read(raw);
     raw += (1 << length_power);
     raw += length * itemsize;

diff --git a/setup.py b/setup.py
@@ -34,7 +34,7 @@
     extras_require={
         "test": [
             'pytest>=3', 'pytest-order', 'coverage', 'pytest-cov',
-            'coverage-conditional-plugin'
+            'coverage-conditional-plugin', 'hypothesis'
         ]
     },
     license="MIT license",

diff --git a/tests/test_io.py b/tests/test_io.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pytest
 from cslug import ptr
+from hypothesis import given, strategies, settings, Verbosity, example
 
 from rockhopper import RaggedArray
 from rockhopper._ragged_array import _2_power, _big_endian
@@ -61,6 +62,49 @@ def test_dump_load(dtype, byteorder):
     assert consumed == len(bin)
 
 
+int_types = [
+    np.uint8, np.uint16, np.uint32, np.uint64,
+    np.int8, np.int16, np.int32, np.int64,
+]
+blob = bytes(range(256)) + bytes(range(256))[::-1]
+
+
+@pytest.mark.parametrize("dtype", int_types)
+@pytest.mark.parametrize("ldtype", int_types)
+def test_loads_pointer_overflow_guard(dtype, ldtype):
+    """Test that the check for pointer overflowing caused by reading a huge row
+    length works."""
+    for i in range(-30, len(blob)):
+        try:
+            RaggedArray.loads(blob[i: i+30], dtype=dtype, ldtype=ldtype)
+        except ValueError:
+            pass
+
+
+@pytest.mark.parametrize("dtype", int_types)
+@pytest.mark.parametrize("ldtype", int_types)
+def test_fuzz_loads(dtype, ldtype):
+    """Scan for possible segfaults.
+
+    All invalid inputs must lead to a ValueError rather than a seg-fault or
+    RaggedArray.loads() could be tricked into reading arbitrary memory addresses
+    by a maliciously constructed invalid data file.
+
+    """
+    @given(strategies.binary())
+    @example(b'\xc0\\\\\xb0\x93\x91\xff\xffpEfe\x167\xee')
+    def fuzz(x):
+        print(x)
+        try:
+            self, _ = RaggedArray.loads(x, dtype=dtype, ldtype=ldtype)
+        except ValueError:
+            pass
+        else:
+            assert self.dumps(ldtype=ldtype).tobytes() == x
+
+    fuzz()
+
+
 def test_dump_byteorder():
     self = RaggedArray.from_nested([[0x0109, 0x0208, 0x0307]], dtype=np.uint16)