Skip to content

Commit

Permalink
GH-14875: [C++] C Data Interface: check imported buffer for non-null (#…
Browse files Browse the repository at this point in the history
…14814)

The C data interface may expose null data pointers for zero-sized buffers.
Make sure that all buffer pointers remain non-null internally.

Followup to GH-14805

* Closes: #14875

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
pitrou committed Jan 17, 2023
1 parent 627caf3 commit 7319250
Show file tree
Hide file tree
Showing 3 changed files with 167 additions and 16 deletions.
11 changes: 7 additions & 4 deletions cpp/src/arrow/array/validate.cc
Expand Up @@ -459,14 +459,17 @@ struct ValidateArrayImpl {
if (buffer == nullptr) {
continue;
}
int64_t min_buffer_size = -1;
int64_t min_buffer_size = 0;
switch (spec.kind) {
case DataTypeLayout::BITMAP:
min_buffer_size = bit_util::BytesForBits(length_plus_offset);
// If length == 0, buffer size can be 0 regardless of offset
if (data.length > 0) {
min_buffer_size = bit_util::BytesForBits(length_plus_offset);
}
break;
case DataTypeLayout::FIXED_WIDTH:
if (MultiplyWithOverflow(length_plus_offset, spec.byte_width,
&min_buffer_size)) {
if (data.length > 0 && MultiplyWithOverflow(length_plus_offset, spec.byte_width,
&min_buffer_size)) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
Expand Down
40 changes: 31 additions & 9 deletions cpp/src/arrow/c/bridge.cc
Expand Up @@ -31,6 +31,7 @@
#include "arrow/c/util_internal.h"
#include "arrow/extension_type.h"
#include "arrow/memory_pool.h"
#include "arrow/memory_pool_internal.h" // for kZeroSizeArea
#include "arrow/record_batch.h"
#include "arrow/result.h"
#include "arrow/stl_allocator.h"
Expand Down Expand Up @@ -60,6 +61,8 @@ using internal::SchemaExportTraits;

using internal::ToChars;

using memory_pool::internal::kZeroSizeArea;

namespace {

Status ExportingNotImplemented(const DataType& type) {
Expand Down Expand Up @@ -1265,7 +1268,8 @@ class ImportedBuffer : public Buffer {
};

struct ArrayImporter {
explicit ArrayImporter(const std::shared_ptr<DataType>& type) : type_(type) {}
explicit ArrayImporter(const std::shared_ptr<DataType>& type)
: type_(type), zero_size_buffer_(std::make_shared<Buffer>(kZeroSizeArea, 0)) {}

Status Import(struct ArrowArray* src) {
if (ArrowArrayIsReleased(src)) {
Expand Down Expand Up @@ -1529,7 +1533,7 @@ struct ArrayImporter {
}

Status ImportNullBitmap(int32_t buffer_id = 0) {
RETURN_NOT_OK(ImportBitsBuffer(buffer_id));
RETURN_NOT_OK(ImportBitsBuffer(buffer_id, /*is_null_bitmap=*/true));
if (data_->null_count > 0 && data_->buffers[buffer_id] == nullptr) {
return Status::Invalid(
"ArrowArray struct has null bitmap buffer but non-zero null_count ",
Expand All @@ -1538,15 +1542,20 @@ struct ArrayImporter {
return Status::OK();
}

Status ImportBitsBuffer(int32_t buffer_id) {
Status ImportBitsBuffer(int32_t buffer_id, bool is_null_bitmap = false) {
// Compute visible size of buffer
int64_t buffer_size = bit_util::BytesForBits(c_struct_->length + c_struct_->offset);
return ImportBuffer(buffer_id, buffer_size);
int64_t buffer_size =
(c_struct_->length > 0)
? bit_util::BytesForBits(c_struct_->length + c_struct_->offset)
: 0;
return ImportBuffer(buffer_id, buffer_size, is_null_bitmap);
}

Status ImportFixedSizeBuffer(int32_t buffer_id, int64_t byte_width) {
// Compute visible size of buffer
int64_t buffer_size = byte_width * (c_struct_->length + c_struct_->offset);
int64_t buffer_size = (c_struct_->length > 0)
? byte_width * (c_struct_->length + c_struct_->offset)
: 0;
return ImportBuffer(buffer_id, buffer_size);
}

Expand All @@ -1563,17 +1572,27 @@ struct ArrayImporter {
int64_t byte_width = 1) {
auto offsets = data_->GetValues<OffsetType>(offsets_buffer_id);
// Compute visible size of buffer
int64_t buffer_size = byte_width * offsets[c_struct_->length];
int64_t buffer_size =
(c_struct_->length > 0) ? byte_width * offsets[c_struct_->length] : 0;
return ImportBuffer(buffer_id, buffer_size);
}

Status ImportBuffer(int32_t buffer_id, int64_t buffer_size) {
Status ImportBuffer(int32_t buffer_id, int64_t buffer_size,
bool is_null_bitmap = false) {
std::shared_ptr<Buffer>* out = &data_->buffers[buffer_id];
auto data = reinterpret_cast<const uint8_t*>(c_struct_->buffers[buffer_id]);
if (data != nullptr) {
*out = std::make_shared<ImportedBuffer>(data, buffer_size, import_);
} else {
} else if (is_null_bitmap) {
out->reset();
} else {
// Ensure that imported buffers are never null (except for the null bitmap)
if (buffer_size != 0) {
return Status::Invalid(
"ArrowArrayStruct contains null data pointer "
"for a buffer with non-zero computed size");
}
*out = zero_size_buffer_;
}
return Status::OK();
}
Expand All @@ -1585,6 +1604,9 @@ struct ArrayImporter {
std::shared_ptr<ImportedArrayData> import_;
std::shared_ptr<ArrayData> data_;
std::vector<ArrayImporter> child_importers_;

// For imported null buffer pointers
std::shared_ptr<Buffer> zero_size_buffer_;
};

} // namespace
Expand Down
132 changes: 129 additions & 3 deletions cpp/src/arrow/c/bridge_test.cc
Expand Up @@ -124,6 +124,24 @@ class ReleaseCallback {
using SchemaReleaseCallback = ReleaseCallback<SchemaExportTraits>;
using ArrayReleaseCallback = ReleaseCallback<ArrayExportTraits>;

// Whether c_struct or any of its descendents have non-null data pointers.
bool HasData(const ArrowArray* c_struct) {
for (int64_t i = 0; i < c_struct->n_buffers; ++i) {
if (c_struct->buffers[i] != nullptr) {
return true;
}
}
if (c_struct->dictionary && HasData(c_struct->dictionary)) {
return true;
}
for (int64_t i = 0; i < c_struct->n_children; ++i) {
if (HasData(c_struct->children[i])) {
return true;
}
}
return false;
}

static const std::vector<std::string> kMetadataKeys1{"key1", "key2"};
static const std::vector<std::string> kMetadataValues1{"", "bar"};

Expand Down Expand Up @@ -1659,6 +1677,8 @@ static const uint8_t bits_buffer1[] = {0xed, 0xed};
static const void* buffers_no_nulls_no_data[1] = {nullptr};
static const void* buffers_nulls_no_data1[1] = {bits_buffer1};

static const void* all_buffers_omitted[3] = {nullptr, nullptr, nullptr};

static const uint8_t data_buffer1[] = {1, 2, 3, 4, 5, 6, 7, 8,
9, 10, 11, 12, 13, 14, 15, 16};
static const uint8_t data_buffer2[] = "abcdefghijklmnopqrstuvwxyz";
Expand Down Expand Up @@ -1724,10 +1744,13 @@ static const uint8_t string_data_buffer1[] = "foobarquuxxyzzy";
static const int32_t string_offsets_buffer1[] = {0, 3, 3, 6, 10, 15};
static const void* string_buffers_no_nulls1[3] = {nullptr, string_offsets_buffer1,
string_data_buffer1};
static const void* string_buffers_omitted[3] = {nullptr, string_offsets_buffer1, nullptr};

static const int64_t large_string_offsets_buffer1[] = {0, 3, 3, 6, 10};
static const void* large_string_buffers_no_nulls1[3] = {
nullptr, large_string_offsets_buffer1, string_data_buffer1};
static const void* large_string_buffers_omitted[3] = {
nullptr, large_string_offsets_buffer1, nullptr};

static const int32_t list_offsets_buffer1[] = {0, 2, 2, 5, 6, 8};
static const void* list_buffers_no_nulls1[2] = {nullptr, list_offsets_buffer1};
Expand Down Expand Up @@ -1901,9 +1924,9 @@ class TestArrayImport : public ::testing::Test {
Reset(); // for further tests

ASSERT_OK(array->ValidateFull());
// Special case: Null array doesn't have any data, so it needn't
// keep the ArrowArray struct alive.
if (type->id() != Type::NA) {
// Special case: arrays without data (such as Null arrays) needn't keep
// the ArrowArray struct alive.
if (HasData(&c_struct_)) {
cb.AssertNotCalled();
}
AssertArraysEqual(*expected, *array, true);
Expand Down Expand Up @@ -1990,6 +2013,10 @@ TEST_F(TestArrayImport, Primitive) {
CheckImport(ArrayFromJSON(boolean(), "[true, null, false]"));
FillPrimitive(3, 1, 0, primitive_buffers_nulls1_8);
CheckImport(ArrayFromJSON(boolean(), "[true, null, false]"));

// Empty array with null data pointers
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(int32(), "[]"));
}

TEST_F(TestArrayImport, Temporal) {
Expand Down Expand Up @@ -2070,6 +2097,12 @@ TEST_F(TestArrayImport, PrimitiveWithOffset) {

FillPrimitive(4, 0, 7, primitive_buffers_no_nulls1_8);
CheckImport(ArrayFromJSON(boolean(), "[false, false, true, false]"));

// Empty array with null data pointers
FillPrimitive(0, 0, 2, all_buffers_omitted);
CheckImport(ArrayFromJSON(int32(), "[]"));
FillPrimitive(0, 0, 3, all_buffers_omitted);
CheckImport(ArrayFromJSON(boolean(), "[]"));
}

TEST_F(TestArrayImport, NullWithOffset) {
Expand All @@ -2092,10 +2125,48 @@ TEST_F(TestArrayImport, String) {
FillStringLike(4, 0, 0, large_string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(large_binary(), R"(["foo", "", "bar", "quux"])"));

// Empty array with null data pointers
FillStringLike(0, 0, 0, string_buffers_omitted);
CheckImport(ArrayFromJSON(utf8(), "[]"));
FillStringLike(0, 0, 0, large_string_buffers_omitted);
CheckImport(ArrayFromJSON(large_binary(), "[]"));
}

TEST_F(TestArrayImport, StringWithOffset) {
FillStringLike(3, 0, 1, string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(utf8(), R"(["", "bar", "quux"])"));
FillStringLike(2, 0, 2, large_string_buffers_no_nulls1);
CheckImport(ArrayFromJSON(large_utf8(), R"(["bar", "quux"])"));

// Empty array with null data pointers
FillStringLike(0, 0, 1, string_buffers_omitted);
CheckImport(ArrayFromJSON(utf8(), "[]"));
}

TEST_F(TestArrayImport, FixedSizeBinary) {
FillPrimitive(2, 0, 0, primitive_buffers_no_nulls2);
CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["abc", "def"])"));
FillPrimitive(2, 0, 0, primitive_buffers_no_nulls3);
CheckImport(ArrayFromJSON(decimal(15, 4), R"(["12345.6789", "98765.4321"])"));

// Empty array with null data pointers
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]"));
FillPrimitive(0, 0, 0, all_buffers_omitted);
CheckImport(ArrayFromJSON(decimal(15, 4), "[]"));
}

TEST_F(TestArrayImport, FixedSizeBinaryWithOffset) {
FillPrimitive(1, 0, 1, primitive_buffers_no_nulls2);
CheckImport(ArrayFromJSON(fixed_size_binary(3), R"(["def"])"));
FillPrimitive(1, 0, 1, primitive_buffers_no_nulls3);
CheckImport(ArrayFromJSON(decimal(15, 4), R"(["98765.4321"])"));

// Empty array with null data pointers
FillPrimitive(0, 0, 1, all_buffers_omitted);
CheckImport(ArrayFromJSON(fixed_size_binary(3), "[]"));
FillPrimitive(0, 0, 1, all_buffers_omitted);
CheckImport(ArrayFromJSON(decimal(15, 4), "[]"));
}

TEST_F(TestArrayImport, List) {
Expand All @@ -2117,6 +2188,11 @@ TEST_F(TestArrayImport, List) {
FillFixedSizeListLike(3, 0, 0, buffers_no_nulls_no_data);
CheckImport(
ArrayFromJSON(fixed_size_list(int8(), 3), "[[1, 2, 3], [4, 5, 6], [7, 8, 9]]"));

// Empty child array with null data pointers
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillFixedSizeListLike(0, 0, 0, buffers_no_nulls_no_data);
CheckImport(ArrayFromJSON(fixed_size_list(int8(), 3), "[]"));
}

TEST_F(TestArrayImport, NestedList) {
Expand Down Expand Up @@ -2205,6 +2281,15 @@ TEST_F(TestArrayImport, SparseUnion) {
FillUnionLike(UnionMode::SPARSE, 4, 0, 0, 2, sparse_union_buffers1_legacy,
/*legacy=*/true);
CheckImport(expected);

// Empty array with null data pointers
expected = ArrayFromJSON(type, "[]");
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::SPARSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false);
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::SPARSE, 0, 0, 3, 2, all_buffers_omitted, /*legacy=*/false);
}

TEST_F(TestArrayImport, DenseUnion) {
Expand All @@ -2223,6 +2308,15 @@ TEST_F(TestArrayImport, DenseUnion) {
FillUnionLike(UnionMode::DENSE, 5, 0, 0, 2, dense_union_buffers1_legacy,
/*legacy=*/true);
CheckImport(expected);

// Empty array with null data pointers
expected = ArrayFromJSON(type, "[]");
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::DENSE, 0, 0, 0, 2, all_buffers_omitted, /*legacy=*/false);
FillStringLike(AddChild(), 0, 0, 0, string_buffers_omitted);
FillPrimitive(AddChild(), 0, 0, 0, all_buffers_omitted);
FillUnionLike(UnionMode::DENSE, 0, 0, 3, 2, all_buffers_omitted, /*legacy=*/false);
}

TEST_F(TestArrayImport, StructWithOffset) {
Expand Down Expand Up @@ -2359,6 +2453,29 @@ TEST_F(TestArrayImport, PrimitiveError) {
// Zero null bitmap but non-zero null_count
FillPrimitive(3, 1, 0, primitive_buffers_no_nulls1_8);
CheckImportError(int8());

// Null data pointers with non-zero length
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(int8());
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(boolean());
FillPrimitive(1, 0, 0, all_buffers_omitted);
CheckImportError(fixed_size_binary(3));
}

TEST_F(TestArrayImport, StringError) {
// Bad number of buffers
FillStringLike(4, 0, 0, string_buffers_no_nulls1);
c_struct_.n_buffers = 2;
CheckImportError(utf8());

// Null data pointers with non-zero length
FillStringLike(4, 0, 0, string_buffers_omitted);
CheckImportError(utf8());

// Null offsets pointer
FillStringLike(0, 0, 0, all_buffers_omitted);
CheckImportError(utf8());
}

TEST_F(TestArrayImport, StructError) {
Expand All @@ -2369,6 +2486,13 @@ TEST_F(TestArrayImport, StructError) {
CheckImportError(struct_({field("strs", utf8())}));
}

TEST_F(TestArrayImport, ListError) {
// Null offsets pointer
FillPrimitive(AddChild(), 0, 0, 0, primitive_buffers_no_nulls1_8);
FillListLike(0, 0, 0, all_buffers_omitted);
CheckImportError(list(int8()));
}

TEST_F(TestArrayImport, MapError) {
// Bad number of (struct) children in map child
FillStringLike(AddChild(), 5, 0, 0, string_buffers_no_nulls1);
Expand Down Expand Up @@ -2859,8 +2983,10 @@ TEST_F(TestArrayRoundtrip, UnknownNullCount) {
TEST_F(TestArrayRoundtrip, List) {
TestWithJSON(list(int32()), "[]");
TestWithJSON(list(int32()), "[[4, 5], [6, null], null]");
TestWithJSON(fixed_size_list(int32(), 3), "[[4, 5, 6], null, [7, 8, null]]");

TestWithJSONSliced(list(int32()), "[[4, 5], [6, null], null]");
TestWithJSONSliced(fixed_size_list(int32(), 3), "[[4, 5, 6], null, [7, 8, null]]");
}

TEST_F(TestArrayRoundtrip, Struct) {
Expand Down

0 comments on commit 7319250

Please sign in to comment.