Skip to content

Commit

Permalink
ARROW-7815: [C++] Improve input validation
Browse files Browse the repository at this point in the history
This should fix the following issues:
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20260
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20282
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20307
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20324
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20330
https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=20575

Closes #6396 from pitrou/ARROW-7815-fuzz-issues and squashes the following commits:

0efa846 <Antoine Pitrou> Update testing submodule
fd8e865 <Antoine Pitrou> ARROW-7815:  Improve input validation

Authored-by: Antoine Pitrou <antoine@python.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
pitrou authored and wesm committed Feb 12, 2020
1 parent 9f0c70c commit 3bc01ec
Show file tree
Hide file tree
Showing 12 changed files with 224 additions and 49 deletions.
24 changes: 12 additions & 12 deletions cpp/src/arrow/array.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1101,16 +1101,16 @@ struct ViewDataImpl {
}
while (true) {
// Skip exhausted layout (might be empty layout)
while (in_buffer_idx >= in_layouts[in_layout_idx].bit_widths.size()) {
while (in_buffer_idx >= in_layouts[in_layout_idx].buffers.size()) {
in_buffer_idx = 0;
++in_layout_idx;
if (in_layout_idx >= in_layouts.size()) {
input_exhausted = true;
return;
}
}
auto bit_width = in_layouts[in_layout_idx].bit_widths[in_buffer_idx];
if (bit_width > 0 || bit_width == DataTypeLayout::kVariableSizeBuffer) {
const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
if (in_spec.kind != DataTypeLayout::ALWAYS_NULL) {
return;
}
// Skip always-null input buffers
Expand Down Expand Up @@ -1157,19 +1157,19 @@ struct ViewDataImpl {
}

// No type has a purely empty layout
DCHECK_GT(out_layout.bit_widths.size(), 0);
DCHECK_GT(out_layout.buffers.size(), 0);

if (out_layout.bit_widths[0] == 0) {
if (out_layout.buffers[0].kind == DataTypeLayout::ALWAYS_NULL) {
// Assuming null type or equivalent.
DCHECK_EQ(out_layout.bit_widths.size(), 1);
DCHECK_EQ(out_layout.buffers.size(), 1);
*out = ArrayData::Make(out_type, out_length, {nullptr}, out_length);
return Status::OK();
}

std::vector<std::shared_ptr<Buffer>> out_buffers;

// Process null bitmap
DCHECK_EQ(out_layout.bit_widths[0], 1);
DCHECK_EQ(out_layout.buffers[0].kind, DataTypeLayout::BITMAP);
if (in_buffer_idx == 0) {
// Copy input null bitmap
RETURN_NOT_OK(CheckInputAvailable());
Expand All @@ -1191,11 +1191,11 @@ struct ViewDataImpl {
}

// Process other buffers in output layout
for (size_t out_buffer_idx = 1; out_buffer_idx < out_layout.bit_widths.size();
for (size_t out_buffer_idx = 1; out_buffer_idx < out_layout.buffers.size();
++out_buffer_idx) {
auto out_bit_width = out_layout.bit_widths[out_buffer_idx];
const auto& out_spec = out_layout.buffers[out_buffer_idx];
// If always-null buffer is expected, just construct it
if (out_bit_width == DataTypeLayout::kAlwaysNullBuffer) {
if (out_spec.kind == DataTypeLayout::ALWAYS_NULL) {
out_buffers.push_back(nullptr);
continue;
}
Expand All @@ -1211,8 +1211,8 @@ struct ViewDataImpl {
}

RETURN_NOT_OK(CheckInputAvailable());
auto in_bit_width = in_layouts[in_layout_idx].bit_widths[in_buffer_idx];
if (out_bit_width != in_bit_width) {
const auto& in_spec = in_layouts[in_layout_idx].buffers[in_buffer_idx];
if (out_spec != in_spec) {
return InvalidView("incompatible layouts");
}
// Copy input buffer
Expand Down
65 changes: 55 additions & 10 deletions cpp/src/arrow/array/validate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,10 @@ struct ValidateArrayVisitor {
if (array.length() > 0 && !array.values()) {
return Status::Invalid("values is null");
}
if (array.value_length() < 0) {
return Status::Invalid("FixedSizeListArray has negative value length ",
array.value_length());
}
if (array.values()->length() != array.length() * array.value_length()) {
return Status::Invalid(
"Values Length (", array.values()->length(), ") is not equal to the length (",
Expand All @@ -119,6 +123,15 @@ struct ValidateArrayVisitor {
const auto& struct_type = checked_cast<const StructType&>(*array.type());
// Validate fields
for (int i = 0; i < array.num_fields(); ++i) {
// array.field() may crash due to an assertion in ArrayData::Slice(),
// so check invariants before
const auto& field_data = *array.data()->child_data[i];
if (field_data.length < array.offset()) {
return Status::Invalid("Struct child array #", i,
" has length smaller than struct array offset (",
field_data.length, " < ", array.offset(), ")");
}

auto it = array.field(i);
if (it->length() != array.length()) {
return Status::Invalid("Struct child array #", i,
Expand Down Expand Up @@ -146,6 +159,17 @@ struct ValidateArrayVisitor {
const auto& union_type = *array.union_type();
// Validate fields
for (int i = 0; i < array.num_fields(); ++i) {
if (union_type.mode() == UnionMode::SPARSE) {
// array.child() may crash due to an assertion in ArrayData::Slice(),
// so check invariants before
const auto& child_data = *array.data()->child_data[i];
if (child_data.length < array.offset()) {
return Status::Invalid("Sparse union child array #", i,
" has length smaller than union array offset (",
child_data.length, " < ", array.offset(), ")");
}
}

auto it = array.child(i);
if (union_type.mode() == UnionMode::SPARSE && it->length() != array.length()) {
return Status::Invalid("Sparse union child array #", i,
Expand Down Expand Up @@ -213,6 +237,10 @@ struct ValidateArrayVisitor {
if (array.length() > 0) {
const auto first_offset = array.value_offset(0);
const auto last_offset = array.value_offset(array.length());
// This early test avoids undefined behaviour when computing `data_extent`
if (first_offset < 0 || last_offset < 0) {
return Status::Invalid("Negative offsets in list array");
}
const auto data_extent = last_offset - first_offset;
if (data_extent > 0 && !array.values()) {
return Status::Invalid("values is null");
Expand Down Expand Up @@ -269,25 +297,42 @@ Status ValidateArray(const Array& array) {
return Status::Invalid("Array length is negative");
}

if (data.buffers.size() != layout.bit_widths.size()) {
return Status::Invalid("Expected ", layout.bit_widths.size(),
if (data.buffers.size() != layout.buffers.size()) {
return Status::Invalid("Expected ", layout.buffers.size(),
" buffers in array "
"of type ",
type.ToString(), ", got ", data.buffers.size());
}
// This check is required to avoid addition overflow below
if (HasAdditionOverflow(array.length(), array.offset())) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
for (int i = 0; i < static_cast<int>(data.buffers.size()); ++i) {
const auto& buffer = data.buffers[i];
const auto bit_width = layout.bit_widths[i];
if (buffer == nullptr || bit_width <= 0) {
const auto& spec = layout.buffers[i];

if (buffer == nullptr) {
continue;
}
if (internal::HasAdditionOverflow(array.length(), array.offset()) ||
internal::HasMultiplyOverflow(array.length() + array.offset(), bit_width)) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
int64_t min_buffer_size = -1;
switch (spec.kind) {
case DataTypeLayout::BITMAP:
min_buffer_size = BitUtil::BytesForBits(array.length() + array.offset());
break;
case DataTypeLayout::FIXED_WIDTH:
if (HasMultiplyOverflow(array.length() + array.offset(), spec.byte_width)) {
return Status::Invalid("Array of type ", type.ToString(),
" has impossibly large length and offset");
}
min_buffer_size = spec.byte_width * (array.length() + array.offset());
break;
case DataTypeLayout::ALWAYS_NULL:
// XXX Should we raise on non-null buffer?
continue;
default:
continue;
}
const auto min_buffer_size =
BitUtil::BytesForBits(bit_width * (array.length() + array.offset()));
if (buffer->size() < min_buffer_size) {
return Status::Invalid("Buffer #", i, " too small in array of type ",
type.ToString(), " and length ", array.length(),
Expand Down
7 changes: 7 additions & 0 deletions cpp/src/arrow/array_list_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -981,4 +981,11 @@ TEST_F(TestFixedSizeListArray, TestBuilderPreserveFieldName) {
ASSERT_EQ("counts", type.value_field()->name());
}

TEST_F(TestFixedSizeListArray, NegativeLength) {
type_ = fixed_size_list(value_type_, -42);
auto values = ArrayFromJSON(value_type_, "[]");
result_ = std::make_shared<FixedSizeListArray>(type_, 0, values);
ASSERT_RAISES(Invalid, result_->ValidateFull());
}

} // namespace arrow
25 changes: 25 additions & 0 deletions cpp/src/arrow/array_struct_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,31 @@ TEST(StructArray, FromFields) {
ASSERT_RAISES(Invalid, res);
}

TEST(StructArray, Validate) {
auto a = ArrayFromJSON(int32(), "[4, 5]");
auto type = struct_({field("a", int32())});
auto children = std::vector<std::shared_ptr<Array>>{a};

auto arr = std::make_shared<StructArray>(type, 2, children);
ASSERT_OK(arr->ValidateFull());
arr = std::make_shared<StructArray>(type, 1, children, nullptr, 0, /*offset=*/1);
ASSERT_OK(arr->ValidateFull());
arr = std::make_shared<StructArray>(type, 0, children, nullptr, 0, /*offset=*/2);
ASSERT_OK(arr->ValidateFull());

// Length + offset < child length, but it's ok
arr = std::make_shared<StructArray>(type, 1, children, nullptr, 0, /*offset=*/0);
ASSERT_OK(arr->ValidateFull());

// Length + offset > child length
arr = std::make_shared<StructArray>(type, 1, children, nullptr, 0, /*offset=*/2);
ASSERT_RAISES(Invalid, arr->ValidateFull());

// Offset > child length
arr = std::make_shared<StructArray>(type, 0, children, nullptr, 0, /*offset=*/3);
ASSERT_RAISES(Invalid, arr->ValidateFull());
}

// ----------------------------------------------------------------------------------
// Struct test
class TestStructBuilder : public TestBuilder {
Expand Down
32 changes: 32 additions & 0 deletions cpp/src/arrow/array_union_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,38 @@ TEST(TestUnionArray, TestSliceEquals) {
CheckUnion(batch->column(2));
}

TEST(TestSparseUnionArray, Validate) {
auto a = ArrayFromJSON(int32(), "[4, 5]");
auto type = union_({field("a", int32())}, UnionMode::SPARSE);
auto children = std::vector<std::shared_ptr<Array>>{a};
auto type_ids_array = ArrayFromJSON(int8(), "[0, 0, 0]");
auto type_ids = type_ids_array->data()->buffers[1];

auto arr = std::make_shared<UnionArray>(type, 2, children, type_ids);
ASSERT_OK(arr->ValidateFull());
arr = std::make_shared<UnionArray>(type, 1, children, type_ids, nullptr, nullptr, 0,
/*offset=*/1);
ASSERT_OK(arr->ValidateFull());
arr = std::make_shared<UnionArray>(type, 0, children, type_ids, nullptr, nullptr, 0,
/*offset=*/2);
ASSERT_OK(arr->ValidateFull());

// Length + offset < child length, but it's ok
arr = std::make_shared<UnionArray>(type, 1, children, type_ids, nullptr, nullptr, 0,
/*offset=*/0);
ASSERT_OK(arr->ValidateFull());

// Length + offset > child length
arr = std::make_shared<UnionArray>(type, 1, children, type_ids, nullptr, nullptr, 0,
/*offset=*/2);
ASSERT_RAISES(Invalid, arr->ValidateFull());

// Offset > child length
arr = std::make_shared<UnionArray>(type, 0, children, type_ids, nullptr, nullptr, 0,
/*offset=*/3);
ASSERT_RAISES(Invalid, arr->ValidateFull());
}

// -------------------------------------------------------------------------
// Tests for MakeDense and MakeSparse

Expand Down
8 changes: 6 additions & 2 deletions cpp/src/arrow/type.cc
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,13 @@ Status UnionType::ValidateParameters(const std::vector<std::shared_ptr<Field>>&

DataTypeLayout UnionType::layout() const {
if (mode_ == UnionMode::SPARSE) {
return {{1, CHAR_BIT, DataTypeLayout::kAlwaysNullBuffer}, false};
return DataTypeLayout({DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(uint8_t)),
DataTypeLayout::AlwaysNull()});
} else {
return {{1, CHAR_BIT, sizeof(int32_t) * CHAR_BIT}, false};
return DataTypeLayout({DataTypeLayout::Bitmap(),
DataTypeLayout::FixedWidth(sizeof(uint8_t)),
DataTypeLayout::FixedWidth(sizeof(int32_t))});
}
}

Expand Down
Loading

0 comments on commit 3bc01ec

Please sign in to comment.