diff --git a/cpp/src/arrow/array/array_base.cc b/cpp/src/arrow/array/array_base.cc index a3cff7dbb3e0b..c31a7b724833d 100644 --- a/cpp/src/arrow/array/array_base.cc +++ b/cpp/src/arrow/array/array_base.cc @@ -271,7 +271,7 @@ Result> Array::SliceSafe(int64_t offset, int64_t length) Result> Array::SliceSafe(int64_t offset) const { if (offset < 0) { // Avoid UBSAN in subtraction below - return Status::Invalid("Negative buffer slice offset"); + return Status::IndexError("Negative array slice offset"); } return SliceSafe(offset, data_->length - offset); } diff --git a/cpp/src/arrow/array/array_test.cc b/cpp/src/arrow/array/array_test.cc index dc1bd62474753..698ed1e4bed3f 100644 --- a/cpp/src/arrow/array/array_test.cc +++ b/cpp/src/arrow/array/array_test.cc @@ -150,12 +150,12 @@ TEST_F(TestArray, TestSliceSafe) { ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7, 0)); check_data(*sliced, {}); - ASSERT_RAISES(Invalid, arr->SliceSafe(8, 0)); - ASSERT_RAISES(Invalid, arr->SliceSafe(0, 8)); - ASSERT_RAISES(Invalid, arr->SliceSafe(-1, 0)); - ASSERT_RAISES(Invalid, arr->SliceSafe(0, -1)); - ASSERT_RAISES(Invalid, arr->SliceSafe(6, 2)); - ASSERT_RAISES(Invalid, arr->SliceSafe(6, std::numeric_limits::max() - 5)); + ASSERT_RAISES(IndexError, arr->SliceSafe(8, 0)); + ASSERT_RAISES(IndexError, arr->SliceSafe(0, 8)); + ASSERT_RAISES(IndexError, arr->SliceSafe(-1, 0)); + ASSERT_RAISES(IndexError, arr->SliceSafe(0, -1)); + ASSERT_RAISES(IndexError, arr->SliceSafe(6, 2)); + ASSERT_RAISES(IndexError, arr->SliceSafe(6, std::numeric_limits::max() - 5)); ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(0)); check_data(*sliced, original_data); @@ -166,8 +166,8 @@ TEST_F(TestArray, TestSliceSafe) { ASSERT_OK_AND_ASSIGN(sliced, arr->SliceSafe(7)); check_data(*sliced, {}); - ASSERT_RAISES(Invalid, arr->SliceSafe(8)); - ASSERT_RAISES(Invalid, arr->SliceSafe(-1)); + ASSERT_RAISES(IndexError, arr->SliceSafe(8)); + ASSERT_RAISES(IndexError, arr->SliceSafe(-1)); } Status MakeArrayFromValidBytes(const std::vector& v, MemoryPool* pool, diff --git a/cpp/src/arrow/buffer.cc b/cpp/src/arrow/buffer.cc index e663ff9dca8ed..4d059e319e5ea 100644 --- a/cpp/src/arrow/buffer.cc +++ b/cpp/src/arrow/buffer.cc @@ -52,7 +52,7 @@ Status CheckBufferSlice(const Buffer& buffer, int64_t offset, int64_t length) { Status CheckBufferSlice(const Buffer& buffer, int64_t offset) { if (ARROW_PREDICT_FALSE(offset < 0)) { // Avoid UBSAN in subtraction below - return Status::Invalid("Negative buffer slice offset"); + return Status::IndexError("Negative buffer slice offset"); } return CheckBufferSlice(buffer, offset, buffer.size() - offset); } diff --git a/cpp/src/arrow/buffer_test.cc b/cpp/src/arrow/buffer_test.cc index a82c90be55c7b..b3301884239bb 100644 --- a/cpp/src/arrow/buffer_test.cc +++ b/cpp/src/arrow/buffer_test.cc @@ -492,12 +492,12 @@ TEST(TestBuffer, SliceBufferSafe) { ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, buf->size(), 0)); AssertBufferEqual(*sliced, ""); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, -1, 0)); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 0, -1)); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 0, buf->size() + 1)); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, 2, buf->size() - 1)); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, buf->size() + 1, 0)); - ASSERT_RAISES(Invalid, + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, -1, 0)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 0, -1)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 0, buf->size() + 1)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 2, buf->size() - 1)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, buf->size() + 1, 0)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, 3, std::numeric_limits::max() - 2)); ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, 0)); @@ -507,8 +507,8 @@ TEST(TestBuffer, SliceBufferSafe) { ASSERT_OK_AND_ASSIGN(sliced, SliceBufferSafe(buf, buf->size())); AssertBufferEqual(*sliced, ""); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, -1)); - ASSERT_RAISES(Invalid, SliceBufferSafe(buf, buf->size() + 1)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, -1)); + ASSERT_RAISES(IndexError, SliceBufferSafe(buf, buf->size() + 1)); } TEST(TestMutableBuffer, Wrap) { diff --git a/cpp/src/arrow/util/int_util_internal.h b/cpp/src/arrow/util/int_util_internal.h index 4136706629ff0..8a19876f2ba41 100644 --- a/cpp/src/arrow/util/int_util_internal.h +++ b/cpp/src/arrow/util/int_util_internal.h @@ -133,18 +133,19 @@ UpcastInt(Integer v) { static inline Status CheckSliceParams(int64_t object_length, int64_t slice_offset, int64_t slice_length, const char* object_name) { if (ARROW_PREDICT_FALSE(slice_offset < 0)) { - return Status::Invalid("Negative ", object_name, " slice offset"); + return Status::IndexError("Negative ", object_name, " slice offset"); } if (ARROW_PREDICT_FALSE(slice_length < 0)) { - return Status::Invalid("Negative ", object_name, " slice length"); + return Status::IndexError("Negative ", object_name, " slice length"); } int64_t offset_plus_length; if (ARROW_PREDICT_FALSE( internal::AddWithOverflow(slice_offset, slice_length, &offset_plus_length))) { - return Status::Invalid(object_name, " slice would overflow"); + return Status::IndexError(object_name, " slice would overflow"); } - if (ARROW_PREDICT_FALSE(slice_offset + slice_length > object_length)) { - return Status::Invalid(object_name, " slice would exceed ", object_name, " length"); + if (ARROW_PREDICT_FALSE(offset_plus_length > object_length)) { + return Status::IndexError(object_name, " slice would exceed ", object_name, + " length"); } return Status::OK(); } diff --git a/python/pyarrow/includes/libarrow.pxd b/python/pyarrow/includes/libarrow.pxd index 7dbd0caa2a43f..4bf8e56331cc3 100644 --- a/python/pyarrow/includes/libarrow.pxd +++ b/python/pyarrow/includes/libarrow.pxd @@ -309,10 +309,10 @@ cdef extern from "arrow/api.h" namespace "arrow" nogil: c_string ToHexString() c_bool Equals(const CBuffer& other) - shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer, - int64_t offset, int64_t length) - shared_ptr[CBuffer] SliceBuffer(const shared_ptr[CBuffer]& buffer, - int64_t offset) + CResult[shared_ptr[CBuffer]] SliceBufferSafe( + const shared_ptr[CBuffer]& buffer, int64_t offset) + CResult[shared_ptr[CBuffer]] SliceBufferSafe( + const shared_ptr[CBuffer]& buffer, int64_t offset, int64_t length) cdef cppclass CMutableBuffer" arrow::MutableBuffer"(CBuffer): CMutableBuffer(const uint8_t* data, int64_t size) diff --git a/python/pyarrow/io.pxi b/python/pyarrow/io.pxi index f6c2b42193b7a..89f7ec1cfe2d4 100644 --- a/python/pyarrow/io.pxi +++ b/python/pyarrow/io.pxi @@ -1056,10 +1056,10 @@ cdef class Buffer(_Weakrefable): raise IndexError('Offset must be non-negative') if length is None: - result = SliceBuffer(self.buffer, offset) + result = GetResultValue(SliceBufferSafe(self.buffer, offset)) else: - result = SliceBuffer(self.buffer, offset, max(length, 0)) - + result = GetResultValue(SliceBufferSafe(self.buffer, offset, + length)) return pyarrow_wrap_buffer(result) def equals(self, Buffer other): diff --git a/python/pyarrow/tests/test_io.py b/python/pyarrow/tests/test_io.py index 33c87cdf78174..b368410dd0b70 100644 --- a/python/pyarrow/tests/test_io.py +++ b/python/pyarrow/tests/test_io.py @@ -518,10 +518,25 @@ def test_buffer_slicing(): with pytest.raises(IndexError): buf.slice(-1) + with pytest.raises(IndexError): + buf.slice(len(buf) + 1) + assert buf[11:].to_pybytes() == b"" + + # Slice stop exceeds buffer length + with pytest.raises(IndexError): + buf.slice(1, len(buf)) + assert buf[1:11].to_pybytes() == buf.to_pybytes()[1:] + + # Negative length + with pytest.raises(IndexError): + buf.slice(1, -1) + # Test slice notation assert buf[2:].equals(buf.slice(2)) assert buf[2:5].equals(buf.slice(2, 3)) assert buf[-5:].equals(buf.slice(len(buf) - 5)) + assert buf[-5:-2].equals(buf.slice(len(buf) - 5, 3)) + with pytest.raises(IndexError): buf[::-1] with pytest.raises(IndexError):