Skip to content

Commit

Permalink
ARROW-7740: [C++] Fix StructArray::Flatten corruption
Browse files Browse the repository at this point in the history
Flatten would modify in-place the data_->child_data ArrayData if the currently flattened array is a slice. The parent's child_data values would also be modified (and corrupted with wrong offset/length).

Closes #6792 from fsaintjacques/ARROW-7740

Lead-authored-by: François Saint-Jacques <fsaintjacques@gmail.com>
Co-authored-by: Wes McKinney <wesm+git@apache.org>
Signed-off-by: Wes McKinney <wesm+git@apache.org>
  • Loading branch information
fsaintjacques and wesm committed Apr 1, 2020
1 parent 57c525f commit b9b9ece
Show file tree
Hide file tree
Showing 8 changed files with 96 additions and 3 deletions.
4 changes: 3 additions & 1 deletion cpp/src/arrow/array.cc
Expand Up @@ -796,7 +796,9 @@ Status StructArray::Flatten(MemoryPool* pool, ArrayVector* out) const {
flattened.reserve(data_->child_data.size());
std::shared_ptr<Buffer> null_bitmap = data_->buffers[0];

for (auto& child_data : data_->child_data) {
for (const auto& child_data_ptr : data_->child_data) {
auto child_data = child_data_ptr->Copy();

std::shared_ptr<Buffer> flattened_null_bitmap;
int64_t flattened_null_count = kUnknownNullCount;

Expand Down
19 changes: 19 additions & 0 deletions cpp/src/arrow/array_struct_test.cc
Expand Up @@ -195,6 +195,25 @@ TEST(StructArray, Validate) {
ASSERT_RAISES(Invalid, arr->ValidateFull());
}

/// ARROW-7740: Flattening a slice shouldn't affect the parent array.
TEST(StructArray, FlattenOfSlice) {
auto a = ArrayFromJSON(int32(), "[4, 5]");
auto type = struct_({field("a", int32())});
auto children = std::vector<std::shared_ptr<Array>>{a};

auto arr = std::make_shared<StructArray>(type, 2, children);
ASSERT_OK(arr->ValidateFull());

auto slice = internal::checked_pointer_cast<StructArray>(arr->Slice(0, 1));
ASSERT_OK(slice->ValidateFull());

ArrayVector flattened;
ASSERT_OK(slice->Flatten(default_memory_pool(), &flattened));

ASSERT_OK(slice->ValidateFull());
ASSERT_OK(arr->ValidateFull());
}

// ----------------------------------------------------------------------------------
// Struct test
class TestStructBuilder : public TestBuilder {
Expand Down
8 changes: 8 additions & 0 deletions r/R/arrowExports.R

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions r/R/table.R
Expand Up @@ -163,6 +163,14 @@ Table <- R6Class("Table", inherit = ArrowObject,

Equals = function(other, check_metadata = TRUE, ...) {
inherits(other, "Table") && Table__Equals(self, other, isTRUE(check_metadata))
},

Validate = function() {
Table__Validate(self)
},

ValidateFull = function() {
Table__ValidateFull(self)
}
),

Expand Down
3 changes: 1 addition & 2 deletions r/src/array_to_vector.cpp
Expand Up @@ -575,8 +575,7 @@ class Converter_List : public Converter {
auto values_array = list_array->values();

auto ingest_one = [&](R_xlen_t i) {
auto slice =
values_array->Slice(list_array->value_offset(i), list_array->value_length(i));
auto slice = list_array->value_slice(i);
SET_VECTOR_ELT(data, i + start, Array__as_vector(slice));
};

Expand Down
32 changes: 32 additions & 0 deletions r/src/arrowExports.cpp

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions r/src/table.cpp
Expand Up @@ -97,6 +97,18 @@ bool Table__Equals(const std::shared_ptr<arrow::Table>& lhs,
return lhs->Equals(*rhs.get(), check_metadata);
}

// [[arrow::export]]
bool Table__Validate(const std::shared_ptr<arrow::Table>& table) {
STOP_IF_NOT_OK(table->Validate());
return true;
}

// [[arrow::export]]
bool Table__ValidateFull(const std::shared_ptr<arrow::Table>& table) {
STOP_IF_NOT_OK(table->ValidateFull());
return true;
}

// [[arrow::export]]
std::shared_ptr<arrow::ChunkedArray> Table__GetColumnByName(
const std::shared_ptr<arrow::Table>& table, const std::string& name) {
Expand Down
13 changes: 13 additions & 0 deletions r/tests/testthat/test-json.R
Expand Up @@ -147,3 +147,16 @@ test_that("Can read json file with nested columns (ARROW-5503)", {
)
)
})

test_that("Can read json file with list<struct<T...>> nested columns (ARROW-7740)", {
tf <- tempfile()
on.exit(unlink(tf))
writeLines('
{"a":[{"b":1.0},{"b":2.0}]}
{"a":[{"b":1.0},{"b":2.0}]}
', tf)

one <- tibble::tibble(b = c(1, 2))
expected <- tibble::tibble(a = c(list(one), list(one)))
expect_equivalent(arrow::read_json_arrow(tf), expected)
})

0 comments on commit b9b9ece

Please sign in to comment.