Skip to content

Commit

Permalink
ARROW-7985: [C++] Fix builder capacity check
Browse files Browse the repository at this point in the history
It should be ok to resize below the current allocated builder capacity, just not below the current populated length.

Closes #6595 from pitrou/ARROW-7985-fix-builder-capacity-check

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Benjamin Kietzman <bengilgit@gmail.com>
  • Loading branch information
pitrou authored and bkietz committed Mar 12, 2020
1 parent 590a168 commit 2849b64
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 19 deletions.
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_adaptive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void AdaptiveIntBuilderBase::Reset() {
}

Status AdaptiveIntBuilderBase::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(CheckCapacity(capacity));
capacity = std::max(capacity, kMinBuilderCapacity);

int64_t nbytes = capacity * int_size_;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ Status ArrayBuilder::AppendToBitmap(int64_t num_bits, bool value) {
}

Status ArrayBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(CheckCapacity(capacity));
capacity_ = capacity;
return null_bitmap_builder_.Resize(capacity);
}
Expand Down
13 changes: 8 additions & 5 deletions cpp/src/arrow/array/builder_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,13 +190,16 @@ class ARROW_EXPORT ArrayBuilder {
return Status::OK();
}

static Status CheckCapacity(int64_t new_capacity, int64_t old_capacity) {
if (new_capacity < 0) {
return Status::Invalid("Resize capacity must be positive");
// Check the requested capacity for validity
Status CheckCapacity(int64_t new_capacity) {
if (ARROW_PREDICT_FALSE(new_capacity < 0)) {
return Status::Invalid(
"Resize capacity must be positive (requested: ", new_capacity, ")");
}

if (new_capacity < old_capacity) {
return Status::Invalid("Resize cannot downsize");
if (ARROW_PREDICT_FALSE(new_capacity < length_)) {
return Status::Invalid("Resize cannot downsize (requested: ", new_capacity,
", current length: ", length_, ")");
}

return Status::OK();
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_binary.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void FixedSizeBinaryBuilder::Reset() {
}

Status FixedSizeBinaryBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(CheckCapacity(capacity));
RETURN_NOT_OK(byte_builder_.Resize(capacity * byte_width_));
return ArrayBuilder::Resize(capacity);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_binary.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ class BaseBinaryBuilder : public ArrayBuilder {
return Status::CapacityError("BinaryBuilder cannot reserve space for more than ",
memory_limit(), " child elements, got ", capacity);
}
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));

// One more than requested for offsets
ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/builder_dict.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ class DictionaryBuilderBase : public ArrayBuilder {
}

Status Resize(int64_t capacity) override {
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
capacity = std::max(capacity, kMinBuilderCapacity);
ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
capacity_ = indices_builder_.capacity();
Expand Down Expand Up @@ -356,7 +356,7 @@ class DictionaryBuilderBase<BuilderType, NullType> : public ArrayBuilder {
}

Status Resize(int64_t capacity) override {
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
capacity = std::max(capacity, kMinBuilderCapacity);

ARROW_RETURN_NOT_OK(indices_builder_.Resize(capacity));
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_nested.cc
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ Status FixedSizeListBuilder::AppendNulls(int64_t length) {
}

Status FixedSizeListBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(CheckCapacity(capacity));
return ArrayBuilder::Resize(capacity);
}

Expand Down
4 changes: 2 additions & 2 deletions cpp/src/arrow/array/builder_nested.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ class BaseListBuilder : public ArrayBuilder {
return Status::CapacityError("List array cannot reserve space for more than ",
maximum_elements(), " got ", capacity);
}
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));

// one more then requested for offsets
// One more than requested for offsets
ARROW_RETURN_NOT_OK(offsets_builder_.Resize(capacity + 1));
return ArrayBuilder::Resize(capacity);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_primitive.cc
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ void BooleanBuilder::Reset() {
}

Status BooleanBuilder::Resize(int64_t capacity) {
RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
RETURN_NOT_OK(CheckCapacity(capacity));
capacity = std::max(capacity, kMinBuilderCapacity);
RETURN_NOT_OK(data_builder_.Resize(capacity));
return ArrayBuilder::Resize(capacity);
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/builder_primitive.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ class NumericBuilder : public ArrayBuilder {
void Reset() override { data_builder_.Reset(); }

Status Resize(int64_t capacity) override {
ARROW_RETURN_NOT_OK(CheckCapacity(capacity, capacity_));
ARROW_RETURN_NOT_OK(CheckCapacity(capacity));
capacity = std::max(capacity, kMinBuilderCapacity);
ARROW_RETURN_NOT_OK(data_builder_.Resize(capacity));
return ArrayBuilder::Resize(capacity);
Expand Down
23 changes: 23 additions & 0 deletions cpp/src/arrow/array_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -435,6 +435,27 @@ class TestListArray : public TestBuilder {
ASSERT_RAISES(Invalid, ValidateOffsets(2, {0, 7, 4}, values));
}

void TestCornerCases() {
// ARROW-7985
ASSERT_OK(builder_->AppendNull());
Done();
auto expected = ArrayFromJSON(type_, "[null]");
AssertArraysEqual(*result_, *expected);

SetUp();
ASSERT_OK(builder_->Append());
Done();
expected = ArrayFromJSON(type_, "[[]]");
AssertArraysEqual(*result_, *expected);

SetUp();
ASSERT_OK(builder_->AppendNull());
ASSERT_OK(builder_->value_builder()->Reserve(100));
Done();
expected = ArrayFromJSON(type_, "[null]");
AssertArraysEqual(*result_, *expected);
}

protected:
std::shared_ptr<DataType> value_type_;

Expand Down Expand Up @@ -474,6 +495,8 @@ TYPED_TEST(TestListArray, TestFlattenNonEmptyBackingNulls) {

TYPED_TEST(TestListArray, ValidateOffsets) { this->TestValidateOffsets(); }

TYPED_TEST(TestListArray, CornerCases) { this->TestCornerCases(); }

// ----------------------------------------------------------------------
// Map tests

Expand Down
14 changes: 11 additions & 3 deletions cpp/src/arrow/array_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -397,9 +397,17 @@ TEST_F(TestBuilder, TestResizeDownsize) {

ASSERT_OK(builder.Resize(1000));
ASSERT_EQ(1000, builder.capacity());

// Can't downsize.
ASSERT_RAISES(Invalid, builder.Resize(500));
ASSERT_EQ(0, builder.length());
ASSERT_OK(builder.AppendNulls(500));
ASSERT_EQ(1000, builder.capacity());
ASSERT_EQ(500, builder.length());

// Can downsize below current capacity
ASSERT_OK(builder.Resize(500));
// ... but not below current populated length
ASSERT_RAISES(Invalid, builder.Resize(499));
ASSERT_GE(500, builder.capacity());
ASSERT_EQ(500, builder.length());
}

template <typename Attrs>
Expand Down

0 comments on commit 2849b64

Please sign in to comment.