Skip to content

Commit

Permalink
ARROW-15360: [Python] Check slice bounds in Buffer.slice()
Browse files Browse the repository at this point in the history
Calling `Buffer.slice()` with invalid arguments could return a Buffer referencing out-of-bounds memory.
With this change, an IndexError is raised instead.

Note that `Buffer.__getitem__()` is unaffected, as it applies Python slice semantics: indices out of bounds are automatically clamped.

Closes #12182 from pitrou/ARROW-15360-buffer-slice

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou committed Jan 19, 2022
1 parent 09f23ed commit fd580db
Show file tree
Hide file tree
Showing 8 changed files with 46 additions and 30 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/array_base.cc
Expand Up @@ -271,7 +271,7 @@ Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset, int64_t length)
Result<std::shared_ptr<Array>> Array::SliceSafe(int64_t offset) const {
if (offset < 0) {
// Avoid UBSAN in subtraction below
return Status::Invalid("Negative buffer slice offset");
return Status::IndexError("Negative array slice offset");
}
return SliceSafe(offset, data_->length - offset);
}
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/arrow/array/array_test.cc
Expand Up @@ -150,12 +150,12 @@ TEST_F(TestArray, TestSliceSafe) {
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7, 0));
check_data(*sliced, {});

ASSERT_RAISES(Invalid, arr->SliceSafe(8, 0));
ASSERT_RAISES(Invalid, arr->SliceSafe(0, 8));
ASSERT_RAISES(Invalid, arr->SliceSafe(-1, 0));
ASSERT_RAISES(Invalid, arr->SliceSafe(0, -1));
ASSERT_RAISES(Invalid, arr->SliceSafe(6, 2));
ASSERT_RAISES(Invalid, arr->SliceSafe(6, std::numeric_limits<int64_t>::max() - 5));
ASSERT_RAISES(IndexError, arr->SliceSafe(8, 0));
ASSERT_RAISES(IndexError, arr->SliceSafe(0, 8));
ASSERT_RAISES(IndexError, arr->SliceSafe(-1, 0));
ASSERT_RAISES(IndexError, arr->SliceSafe(0, -1));
ASSERT_RAISES(IndexError, arr->SliceSafe(6, 2));
ASSERT_RAISES(IndexError, arr->SliceSafe(6, std::numeric_limits<int64_t>::max() - 5));

ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(0));
check_data(*sliced, original_data);
Expand All @@ -166,8 +166,8 @@ TEST_F(TestArray, TestSliceSafe) {
ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7));
check_data(*sliced, {});

ASSERT_RAISES(Invalid, arr->SliceSafe(8));
ASSERT_RAISES(Invalid, arr->SliceSafe(-1));
ASSERT_RAISES(IndexError, arr->SliceSafe(8));
ASSERT_RAISES(IndexError, arr->SliceSafe(-1));
}

Status MakeArrayFromValidBytes(const std::vector<uint8_t>& v, MemoryPool* pool,
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/buffer.cc
Expand Up @@ -52,7 +52,7 @@ Status CheckBufferSlice(const Buffer& buffer, int64_t offset, int64_t length) {
Status CheckBufferSlice(const Buffer& buffer, int64_t offset) {
if (ARROW_PREDICT_FALSE(offset < 0)) {
// Avoid UBSAN in subtraction below
return Status::Invalid("Negative buffer slice offset");
return Status::IndexError("Negative buffer slice offset");
}
return CheckBufferSlice(buffer, offset, buffer.size() - offset);
}
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/arrow/buffer_test.cc
Expand Up @@ -492,12 +492,12 @@ TEST(TestBuffer, SliceBufferSafe) {
ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, buf->size(), 0));
AssertBufferEqual(*sliced, "");

ASSERT_RAISES(Invalid, SliceBufferSafe(buf, -1, 0));
ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 0, -1));
ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 0, buf->size() + 1));
ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 2, buf->size() - 1));
ASSERT_RAISES(Invalid, SliceBufferSafe(buf, buf->size() + 1, 0));
ASSERT_RAISES(Invalid,
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, -1, 0));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 0, -1));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 0, buf->size() + 1));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 2, buf->size() - 1));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, buf->size() + 1, 0));
ASSERT_RAISES(IndexError,
SliceBufferSafe(buf, 3, std::numeric_limits<int64_t>::max() - 2));

ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, 0));
Expand All @@ -507,8 +507,8 @@ TEST(TestBuffer, SliceBufferSafe) {
ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, buf->size()));
AssertBufferEqual(*sliced, "");

ASSERT_RAISES(Invalid, SliceBufferSafe(buf, -1));
ASSERT_RAISES(Invalid, SliceBufferSafe(buf, buf->size() + 1));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, -1));
ASSERT_RAISES(IndexError, SliceBufferSafe(buf, buf->size() + 1));
}

TEST(TestMutableBuffer, Wrap) {
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/arrow/util/int_util_internal.h
Expand Up @@ -133,18 +133,19 @@ UpcastInt(Integer v) {
static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset,
int64_t slice_length, const char* object_name) {
if (ARROW_PREDICT_FALSE(slice_offset < 0)) {
return Status::Invalid("Negative ", object_name, " slice offset");
return Status::IndexError("Negative ", object_name, " slice offset");
}
if (ARROW_PREDICT_FALSE(slice_length < 0)) {
return Status::Invalid("Negative ", object_name, " slice length");
return Status::IndexError("Negative ", object_name, " slice length");
}
int64_t offset_plus_length;
if (ARROW_PREDICT_FALSE(
internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) {
return Status::Invalid(object_name, " slice would overflow");
return Status::IndexError(object_name, " slice would overflow");
}
if (ARROW_PREDICT_FALSE(slice_offset + slice_length > object_length)) {
return Status::Invalid(object_name, " slice would exceed ", object_name, " length");
if (ARROW_PREDICT_FALSE(offset_plus_length > object_length)) {
return Status::IndexError(object_name, " slice would exceed ", object_name,
" length");
}
return Status::OK();
}
Expand Down
8 changes: 4 additions & 4 deletions python/pyarrow/includes/libarrow.pxd
Expand Up @@ -309,10 +309,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil:
c_string ToHexString()
c_bool Equals(const CBuffer& other)

shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
int64_t offset, int64_t length)
shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer,
int64_t offset)
CResult[shared_ptr[CBuffer]] SliceBufferSafe(
const shared_ptr[CBuffer]& buffer, int64_t offset)
CResult[shared_ptr[CBuffer]] SliceBufferSafe(
const shared_ptr[CBuffer]& buffer, int64_t offset, int64_t length)

cdef cppclass CMutableBuffer" arrow::MutableBuffer"(CBuffer):
CMutableBuffer(const uint8_t* data, int64_t size)
Expand Down
6 changes: 3 additions & 3 deletions python/pyarrow/io.pxi
Expand Up @@ -1056,10 +1056,10 @@ cdef class Buffer(_Weakrefable):
raise IndexError('Offset must be non-negative')

if length is None:
result = SliceBuffer(self.buffer, offset)
result = GetResultValue(SliceBufferSafe(self.buffer, offset))
else:
result = SliceBuffer(self.buffer, offset, max(length, 0))

result = GetResultValue(SliceBufferSafe(self.buffer, offset,
length))
return pyarrow_wrap_buffer(result)

def equals(self, Buffer other):
Expand Down
15 changes: 15 additions & 0 deletions python/pyarrow/tests/test_io.py
Expand Up @@ -518,10 +518,25 @@ def test_buffer_slicing():
with pytest.raises(IndexError):
buf.slice(-1)

with pytest.raises(IndexError):
buf.slice(len(buf) + 1)
assert buf[11:].to_pybytes() == b""

# Slice stop exceeds buffer length
with pytest.raises(IndexError):
buf.slice(1, len(buf))
assert buf[1:11].to_pybytes() == buf.to_pybytes()[1:]

# Negative length
with pytest.raises(IndexError):
buf.slice(1, -1)

# Test slice notation
assert buf[2:].equals(buf.slice(2))
assert buf[2:5].equals(buf.slice(2, 3))
assert buf[-5:].equals(buf.slice(len(buf) - 5))
assert buf[-5:-2].equals(buf.slice(len(buf) - 5, 3))

with pytest.raises(IndexError):
buf[::-1]
with pytest.raises(IndexError):
Expand Down

0 comments on commit fd580db

Please sign in to comment.