Skip to content

Commit

Permalink
GH-35344: [C++][Format] Implementation of the LIST_VIEW and LARGE_LIS…
Browse files Browse the repository at this point in the history
…T_VIEW array formats (#35345)

### Rationale for this change

Mailing list discussion: https://lists.apache.org/thread/r28rw5n39jwtvn08oljl09d4q2c1ysvb

### What changes are included in this PR?

Initial implementation of the new format in C++.

### Are these changes tested?

Unit tests being written on every commit adding new functionality. More needs to be implemented for Integration Tests (required) to be implementable.

### Are there any user-facing changes?

A new array format. It should have no impact for users that don't use it.
* Closes: #35344

Authored-by: Felipe Oliveira Carvalho <felipekde@gmail.com>
Signed-off-by: Antoine Pitrou <antoine@python.org>
  • Loading branch information
felipecrv committed Nov 22, 2023
1 parent 8627921 commit 8cc71ab
Show file tree
Hide file tree
Showing 63 changed files with 4,401 additions and 504 deletions.
2 changes: 2 additions & 0 deletions cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ set(ARROW_SRCS
util/hashing.cc
util/int_util.cc
util/io_util.cc
util/list_util.cc
util/logging.cc
util/key_value_metadata.cc
util/memory.cc
Expand Down Expand Up @@ -790,6 +791,7 @@ add_arrow_test(array_test
array/array_binary_test.cc
array/array_dict_test.cc
array/array_list_test.cc
array/array_list_view_test.cc
array/array_run_end_test.cc
array/array_struct_test.cc
array/array_union_test.cc
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/array/array_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ struct ScalarFromArraySlotImpl {
Status Visit(const MonthDayNanoIntervalArray& a) { return Finish(a.Value(index_)); }

template <typename T>
Status Visit(const BaseListArray<T>& a) {
Status Visit(const VarLengthListLikeArray<T>& a) {
return Finish(a.value_slice(index_));
}

Expand Down
446 changes: 393 additions & 53 deletions cpp/src/arrow/array/array_list_test.cc

Large diffs are not rendered by default.

84 changes: 84 additions & 0 deletions cpp/src/arrow/array/array_list_view_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>

#include "arrow/array/array_nested.h"
#include "arrow/array/util.h"
#include "arrow/pretty_print.h"
#include "arrow/testing/gtest_util.h"
#include "arrow/type_fwd.h"
#include "arrow/util/checked_cast.h"

namespace arrow {

using internal::checked_cast;

// ----------------------------------------------------------------------
// List-view array tests

namespace {

class TestListViewArray : public ::testing::Test {
public:
std::shared_ptr<Array> string_values;
std::shared_ptr<Array> int32_values;
std::shared_ptr<Array> int16_values;

void SetUp() override {
string_values = ArrayFromJSON(utf8(), R"(["Hello", "World", null])");
int32_values = ArrayFromJSON(int32(), "[1, 20, 3]");
int16_values = ArrayFromJSON(int16(), "[10, 2, 30]");
}

static std::shared_ptr<Array> Offsets(std::string_view json) {
return ArrayFromJSON(int32(), json);
}

static std::shared_ptr<Array> Sizes(std::string_view json) {
return ArrayFromJSON(int32(), json);
}
};

} // namespace

TEST_F(TestListViewArray, MakeArray) {
ASSERT_OK_AND_ASSIGN(auto list_view_array,
ListViewArray::FromArrays(*Offsets("[0, 0, 1, 2]"),
*Sizes("[2, 1, 1, 1]"), *string_values));
auto array_data = list_view_array->data();
auto new_array = MakeArray(array_data);
ASSERT_ARRAYS_EQUAL(*new_array, *list_view_array);
// Should be the exact same ArrayData object
ASSERT_EQ(new_array->data(), array_data);
ASSERT_NE(std::dynamic_pointer_cast<ListViewArray>(new_array), NULLPTR);
}

TEST_F(TestListViewArray, FromOffsetsAndSizes) {
std::shared_ptr<ListViewArray> list_view_array;

ASSERT_OK_AND_ASSIGN(list_view_array, ListViewArray::FromArrays(
*Offsets("[0, 0, 1, 1000]"),
*Sizes("[2, 1, 1, null]"), *int32_values));
ASSERT_EQ(list_view_array->length(), 4);
ASSERT_ARRAYS_EQUAL(*list_view_array->values(), *int32_values);
ASSERT_EQ(list_view_array->offset(), 0);
ASSERT_EQ(list_view_array->data()->GetNullCount(), 1);
ASSERT_EQ(list_view_array->data()->buffers.size(), 3);
}

} // namespace arrow

0 comments on commit 8cc71ab

Please sign in to comment.