From e5497e4e18d6bc7496378aaa0ef5128f5923dd4d Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 12:25:13 -0500 Subject: [PATCH 01/19] Initial code for rows_to_batches utility function --- cpp/src/arrow/util/CMakeLists.txt | 1 + cpp/src/arrow/util/rows_to_batches.h | 86 ++++++++++++++++++++++ cpp/src/arrow/util/rows_to_batches_test.cc | 69 +++++++++++++++++ 3 files changed, 156 insertions(+) create mode 100644 cpp/src/arrow/util/rows_to_batches.h create mode 100644 cpp/src/arrow/util/rows_to_batches_test.cc diff --git a/cpp/src/arrow/util/CMakeLists.txt b/cpp/src/arrow/util/CMakeLists.txt index 5141e30d0917a..d24935d79f582 100644 --- a/cpp/src/arrow/util/CMakeLists.txt +++ b/cpp/src/arrow/util/CMakeLists.txt @@ -57,6 +57,7 @@ add_arrow_test(utility-test queue_test.cc range_test.cc reflection_test.cc + rows_to_batches_test.cc small_vector_test.cc stl_util_test.cc string_test.cc diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h new file mode 100644 index 0000000000000..e1b7399415e6b --- /dev/null +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -0,0 +1,86 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include "arrow/record_batch.h" +#include "arrow/result.h" +#include "arrow/status.h" +#include "arrow/table_builder.h" +#include "arrow/util/iterator.h" + +#include + +namespace arrow { + +namespace detail { +[[nodiscard]] constexpr inline auto make_default_accessor() { + return [](auto& x) -> Result { return std::ref(x); }; +} + +template +struct is_range : std::false_type {}; + +template +struct is_range())), + decltype(std::end(std::declval()))>> : std::true_type { +}; + +} // namespace detail + +template +typename std::enable_if_t::value, + Result>> +/* Result>> */ rows_to_batches( + const std::shared_ptr& schema, std::reference_wrapper rows, + DataPointConvertor&& data_point_convertor, + RowAccessor&& row_accessor = detail::make_default_accessor()) { + const std::size_t batch_size = 1024; + auto make_next_batch = + [rows_ittr = std::begin(rows.get()), rows_ittr_end = std::end(rows.get()), + schema = schema, row_accessor = std::forward(row_accessor), + data_point_convertor = std::forward( + data_point_convertor)]() mutable -> Result> { + if (rows_ittr == rows_ittr_end) return nullptr; + + ARROW_ASSIGN_OR_RAISE( + auto record_batch_builder, + RecordBatchBuilder::Make(schema, default_memory_pool(), batch_size)); + + for (size_t i = 0; i < batch_size and (rows_ittr != rows_ittr_end); + i++, std::advance(rows_ittr, 1)) { + size_t col_index = 0; + ARROW_ASSIGN_OR_RAISE(auto row, row_accessor(*rows_ittr)); + for (auto& data_point : row.get()) { + ArrayBuilder* array_builder = record_batch_builder->GetField(col_index); + ARROW_RETURN_IF(array_builder == nullptr, + Status::Invalid("array_builder == nullptr")); + + ARROW_RETURN_NOT_OK(data_point_convertor(*array_builder, data_point)); + col_index++; + } + } + + ARROW_ASSIGN_OR_RAISE(auto result, record_batch_builder->Flush()); + return result; + }; + return RecordBatchReader::MakeFromIterator(MakeFunctionIterator(make_next_batch), + schema); +} + +} // namespace arrow diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc new file mode 100644 index 0000000000000..b18805037dcb7 --- /dev/null +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -0,0 +1,69 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include + +#include "arrow/array/builder_primitive.h" +#include "arrow/scalar.h" +#include "arrow/table.h" +#include "arrow/util/rows_to_batches.h" + +namespace arrow::util { + +// clang-format off +const auto test_schema = schema( + {field("field_1", int64()), + field("field_2", int64()), + field("field_3", int64())} ); +// clang-format on + +auto int_convertor = [](ArrayBuilder& array_builder, int value) { + return static_cast(array_builder).Append(value); +}; + +TEST(rows_to_batches, basic_usage) { + std::vector> data = {{1, 2, 4}, {5, 6, 7}}; + + auto batches = rows_to_batches(test_schema, std::ref(data), int_convertor).ValueOrDie(); + + auto table = batches->ToTable().ValueOrDie(); + + std::shared_ptr col = table->column(0); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 1); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 5); + + col = table->column(1); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 6); + + col = table->column(2); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 4); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 7); +} + +} // namespace arrow::util From 1b85fbbd2bb9ad97b7256bccaefb4271b0709a2c Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 13:02:45 -0500 Subject: [PATCH 02/19] Update rows_to_batches to follow style guide --- cpp/src/arrow/util/rows_to_batches.h | 18 +++++++++--------- cpp/src/arrow/util/rows_to_batches_test.cc | 4 ++-- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index e1b7399415e6b..4e030dffff466 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -25,10 +25,10 @@ #include -namespace arrow { +namespace arrow::util { namespace detail { -[[nodiscard]] constexpr inline auto make_default_accessor() { +[[nodiscard]] constexpr inline auto MakeDefaultAccessor() { return [](auto& x) -> Result { return std::ref(x); }; } @@ -43,20 +43,20 @@ struct is_range())), } // namespace detail template + class RowAccessor = decltype(detail::MakeDefaultAccessor())> typename std::enable_if_t::value, Result>> -/* Result>> */ rows_to_batches( +/* Result>> */ RowsToBatches( const std::shared_ptr& schema, std::reference_wrapper rows, DataPointConvertor&& data_point_convertor, - RowAccessor&& row_accessor = detail::make_default_accessor()) { + RowAccessor&& row_accessor = detail::MakeDefaultAccessor()) { const std::size_t batch_size = 1024; auto make_next_batch = [rows_ittr = std::begin(rows.get()), rows_ittr_end = std::end(rows.get()), schema = schema, row_accessor = std::forward(row_accessor), data_point_convertor = std::forward( data_point_convertor)]() mutable -> Result> { - if (rows_ittr == rows_ittr_end) return nullptr; + if (rows_ittr == rows_ittr_end) return NULLPTR; ARROW_ASSIGN_OR_RAISE( auto record_batch_builder, @@ -68,8 +68,8 @@ typename std::enable_if_t::value, ARROW_ASSIGN_OR_RAISE(auto row, row_accessor(*rows_ittr)); for (auto& data_point : row.get()) { ArrayBuilder* array_builder = record_batch_builder->GetField(col_index); - ARROW_RETURN_IF(array_builder == nullptr, - Status::Invalid("array_builder == nullptr")); + ARROW_RETURN_IF(array_builder == NULLPTR, + Status::Invalid("array_builder == NULLPTR")); ARROW_RETURN_NOT_OK(data_point_convertor(*array_builder, data_point)); col_index++; @@ -83,4 +83,4 @@ typename std::enable_if_t::value, schema); } -} // namespace arrow +} // namespace arrow::util diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index b18805037dcb7..e2843883cd6c9 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -37,10 +37,10 @@ auto int_convertor = [](ArrayBuilder& array_builder, int value) { return static_cast(array_builder).Append(value); }; -TEST(rows_to_batches, basic_usage) { +TEST(RowsToBatches, basic_usage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = rows_to_batches(test_schema, std::ref(data), int_convertor).ValueOrDie(); + auto batches = RowsToBatches(test_schema, std::ref(data), int_convertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); From e213096a794394dfdd8f3274501cb045efe34d99 Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 14:38:50 -0500 Subject: [PATCH 03/19] Updates rows_to_batches to follow style guide --- cpp/src/arrow/util/rows_to_batches.h | 59 ++++++++++++++++++++-- cpp/src/arrow/util/rows_to_batches_test.cc | 6 +-- 2 files changed, 57 insertions(+), 8 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 4e030dffff466..45374ed953868 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -28,10 +28,15 @@ namespace arrow::util { namespace detail { -[[nodiscard]] constexpr inline auto MakeDefaultAccessor() { + +// Default identity function row accessor. Used to for the common case where the value +// of each row iterated over is it's self also directly iterable. +[[nodiscard]] constexpr inline auto MakeDefaultRowAccessor() { return [](auto& x) -> Result { return std::ref(x); }; } +// Meta-funciton to check if a type `T` is a range (iterable using `std::begin()` / +// `std::end()`). `is_range::value` will be false if `T` is not a valid range. template struct is_range : std::false_type {}; @@ -42,14 +47,58 @@ struct is_range())), } // namespace detail +/// \brief Utility function for converting any row-based structure into an +/// `arrow::RecordBatchReader` (this can be easily converted to an `arrow::Table` using +/// `arrow::RecordBatchReader::ToTable()`). +/// +/// Examples of supported types: +/// - `std::vector>>` +/// - `std::vector` + +/// If `rows` (client’s row-based structure) is not a valid C++ range, the client will +/// need to either make it iterable, or make an adapter/wrapper that is a valid C++ +/// range. + +/// The client must provide a `DataPointConvertor` callable type that will convert the +/// structure’s data points into the corresponding arrow types. + +/// Complex nested rows can be supported by providing a custom `row_accessor` instead +/// of the default. + +/// Example usage: +/// \code{.cpp} +/// auto IntConvertor = [](ArrayBuilder& array_builder, int value) { +/// return static_cast(array_builder).Append(value); +/// }; +/// std::vector> data = {{1, 2, 4}, {5, 6, 7}}; +/// auto batches = RowsToBatches(test_schema, std::ref(data), IntConvertor); +/// \endcode + +/// \param[in] schema - the schema to be used in the `RecordBatchReader` + +/// \param[in] rows - iterable row-based structure that will be converted to arrow +/// batches + +/// \param[in] data_point_convertor - client provided callable type that will convert +/// the structure’s data points into the corresponding arrow types. The convertor must +/// return an error `Status` if an error happens during conversion. + +/// \param[in] row_accessor - In the common case where the value of each row iterated +/// over is it's self also directly iterable, the client can just use the default. +/// the provided callable must take the values of the otter `rows` range and return a +/// `std::reference_wrapper` to the data points in a given row. +/// see: /ref `MakeDefaultRowAccessor` + +/// \return `Result>>` result will be a +/// `std::shared_ptr>` if not errors occurred, else an error status. template -typename std::enable_if_t::value, - Result>> + class RowAccessor = decltype(detail::MakeDefaultRowAccessor())> +[[nodiscard]] typename std::enable_if_t::value, + Result>> /* Result>> */ RowsToBatches( const std::shared_ptr& schema, std::reference_wrapper rows, DataPointConvertor&& data_point_convertor, - RowAccessor&& row_accessor = detail::MakeDefaultAccessor()) { + RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor()) { const std::size_t batch_size = 1024; auto make_next_batch = [rows_ittr = std::begin(rows.get()), rows_ittr_end = std::end(rows.get()), diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index e2843883cd6c9..943aa22a62358 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -33,14 +33,14 @@ const auto test_schema = schema( field("field_3", int64())} ); // clang-format on -auto int_convertor = [](ArrayBuilder& array_builder, int value) { +auto IntConvertor = [](ArrayBuilder& array_builder, int value) { return static_cast(array_builder).Append(value); }; -TEST(RowsToBatches, basic_usage) { +TEST(RowsToBatches, BasicUsage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(test_schema, std::ref(data), int_convertor).ValueOrDie(); + auto batches = RowsToBatches(test_schema, std::ref(data), IntConvertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); From 0ac421600039ce02cb320fb8ee53d417ac76f4a8 Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 14:42:02 -0500 Subject: [PATCH 04/19] Updates rows_to_batches to follow style guide --- cpp/src/arrow/util/rows_to_batches.h | 2 +- cpp/src/arrow/util/rows_to_batches_test.cc | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 45374ed953868..b03594e983d42 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -71,7 +71,7 @@ struct is_range())), /// return static_cast(array_builder).Append(value); /// }; /// std::vector> data = {{1, 2, 4}, {5, 6, 7}}; -/// auto batches = RowsToBatches(test_schema, std::ref(data), IntConvertor); +/// auto batches = RowsToBatches(kTestSchema, std::ref(data), IntConvertor); /// \endcode /// \param[in] schema - the schema to be used in the `RecordBatchReader` diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index 943aa22a62358..0d2a266177087 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -27,7 +27,7 @@ namespace arrow::util { // clang-format off -const auto test_schema = schema( +const auto kTestSchema = schema( {field("field_1", int64()), field("field_2", int64()), field("field_3", int64())} ); @@ -40,7 +40,7 @@ auto IntConvertor = [](ArrayBuilder& array_builder, int value) { TEST(RowsToBatches, BasicUsage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(test_schema, std::ref(data), IntConvertor).ValueOrDie(); + auto batches = RowsToBatches(kTestSchema, std::ref(data), IntConvertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); From f96cc4ec2703ecb35cd323d684f6c8d7a5b8a799 Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 16:46:25 -0500 Subject: [PATCH 05/19] Add more unit tests for rows_to_batches --- cpp/src/arrow/util/rows_to_batches_test.cc | 103 +++++++++++++++++++++ 1 file changed, 103 insertions(+) diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index 0d2a266177087..bf8299c4bab26 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -20,6 +20,7 @@ #include #include "arrow/array/builder_primitive.h" +#include "arrow/array/builder_binary.h" #include "arrow/scalar.h" #include "arrow/table.h" #include "arrow/util/rows_to_batches.h" @@ -66,4 +67,106 @@ TEST(RowsToBatches, BasicUsage) { 7); } +TEST(RowsToBatches, ConstRange) { + const std::vector> data = {{1, 2, 4}, {5, 6, 7}}; + + auto batches = RowsToBatches(kTestSchema, std::cref(data), IntConvertor).ValueOrDie(); + + auto table = batches->ToTable().ValueOrDie(); + + std::shared_ptr col = table->column(0); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 1); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 5); + + col = table->column(1); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 6); + + col = table->column(2); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 4); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 7); +} + +TEST(RowsToBatches, StructAccessor) { + struct TestStruct { + std::vector values; + }; + std::vector data = {TestStruct{{1, 2, 4}}, TestStruct{{5, 6, 7}}}; + + auto accessor = + [](const TestStruct& s) -> Result>> { + return std::cref(s.values); + }; + + auto batches = + RowsToBatches(kTestSchema, std::ref(data), IntConvertor, accessor).ValueOrDie(); + + auto table = batches->ToTable().ValueOrDie(); + + std::shared_ptr col = table->column(0); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 1); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 5); + + col = table->column(1); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 6); + + col = table->column(2); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 4); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 7); +} + +TEST(RowsToBatches, Variant) { + auto VariantConvertor = [](ArrayBuilder& array_builder, + const std::variant& value) { + if (std::holds_alternative(value)) + return dynamic_cast(array_builder).Append(std::get(value)); + else + return dynamic_cast(array_builder) + .Append(std::get(value).c_str(), + std::get(value).length()); + }; + + const auto test_schema = schema({field("x", int64()), field("y", utf8())}); + std::vector>> data = {{1, std::string("2")}, + {4, std::string("5")}}; + + auto batches = RowsToBatches(test_schema, std::ref(data), VariantConvertor).ValueOrDie(); + auto table = batches->ToTable().ValueOrDie(); + + std::shared_ptr col = table->column(0); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, + 1); + EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, + 4); + + col = table->column(1); + EXPECT_EQ(col->length(), 2); + EXPECT_EQ( + std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->view(), + "2"); + EXPECT_EQ( + std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->view(), + "5"); +} + } // namespace arrow::util From 6bf33d2d213af89f62be1d1bc5d40629f0f72015 Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 25 Jan 2023 17:35:13 -0500 Subject: [PATCH 06/19] Use ChunkedArrayFromJSON() in rows_to_batches unit tests. --- cpp/src/arrow/util/rows_to_batches_test.cc | 108 +++++---------------- 1 file changed, 22 insertions(+), 86 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index bf8299c4bab26..fd62bcd1ad5c7 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -19,81 +19,46 @@ #include -#include "arrow/array/builder_primitive.h" #include "arrow/array/builder_binary.h" +#include "arrow/array/builder_primitive.h" #include "arrow/scalar.h" #include "arrow/table.h" +#include "arrow/testing/gtest_util.h" #include "arrow/util/rows_to_batches.h" namespace arrow::util { -// clang-format off const auto kTestSchema = schema( - {field("field_1", int64()), - field("field_2", int64()), - field("field_3", int64())} ); -// clang-format on + {field("field_1", int64()), field("field_2", int64()), field("field_3", int64())}); auto IntConvertor = [](ArrayBuilder& array_builder, int value) { return static_cast(array_builder).Append(value); }; +bool CompareJson(const arrow::Table& arrow_table, const std::string& json, + const std::string& field_name) { + const auto col = arrow_table.GetColumnByName(field_name); + return arrow::ChunkedArrayFromJSON(col->type(), {json})->Equals(col); +}; + TEST(RowsToBatches, BasicUsage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(kTestSchema, std::ref(data), IntConvertor).ValueOrDie(); - auto table = batches->ToTable().ValueOrDie(); - std::shared_ptr col = table->column(0); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 1); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 5); - - col = table->column(1); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 6); - - col = table->column(2); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 4); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 7); + EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); + EXPECT_TRUE(CompareJson(*table, R"([2, 6])", "field_2")); + EXPECT_TRUE(CompareJson(*table, R"([4, 7])", "field_3")); } TEST(RowsToBatches, ConstRange) { const std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(kTestSchema, std::cref(data), IntConvertor).ValueOrDie(); - auto table = batches->ToTable().ValueOrDie(); - std::shared_ptr col = table->column(0); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 1); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 5); - - col = table->column(1); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 6); - - col = table->column(2); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 4); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 7); + EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); + EXPECT_TRUE(CompareJson(*table, R"([2, 6])", "field_2")); + EXPECT_TRUE(CompareJson(*table, R"([4, 7])", "field_3")); } TEST(RowsToBatches, StructAccessor) { @@ -112,26 +77,9 @@ TEST(RowsToBatches, StructAccessor) { auto table = batches->ToTable().ValueOrDie(); - std::shared_ptr col = table->column(0); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 1); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 5); - - col = table->column(1); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 6); - - col = table->column(2); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 4); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 7); + EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); + EXPECT_TRUE(CompareJson(*table, R"([2, 6])", "field_2")); + EXPECT_TRUE(CompareJson(*table, R"([4, 7])", "field_3")); } TEST(RowsToBatches, Variant) { @@ -149,24 +97,12 @@ TEST(RowsToBatches, Variant) { std::vector>> data = {{1, std::string("2")}, {4, std::string("5")}}; - auto batches = RowsToBatches(test_schema, std::ref(data), VariantConvertor).ValueOrDie(); + auto batches = + RowsToBatches(test_schema, std::ref(data), VariantConvertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); - std::shared_ptr col = table->column(0); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->value, - 1); - EXPECT_EQ(std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->value, - 4); - - col = table->column(1); - EXPECT_EQ(col->length(), 2); - EXPECT_EQ( - std::dynamic_pointer_cast(col->GetScalar(0).ValueOrDie())->view(), - "2"); - EXPECT_EQ( - std::dynamic_pointer_cast(col->GetScalar(1).ValueOrDie())->view(), - "5"); + EXPECT_TRUE(CompareJson(*table, R"([1, 4])", "x")); + EXPECT_TRUE(CompareJson(*table, R"(["2", "5"])", "y")); } } // namespace arrow::util From d2a650fc1644b8faed8e40064c03e1ba35823ac8 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Thu, 9 Feb 2023 10:29:45 -0500 Subject: [PATCH 07/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index b03594e983d42..2d1e1ad913e66 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -98,7 +98,8 @@ template >> */ RowsToBatches( const std::shared_ptr& schema, std::reference_wrapper rows, DataPointConvertor&& data_point_convertor, - RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor()) { + RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), + MemoryPool* pool = default_memory_pool()) { const std::size_t batch_size = 1024; auto make_next_batch = [rows_ittr = std::begin(rows.get()), rows_ittr_end = std::end(rows.get()), From c2925ab5b0507ae7c02080789e793c0c9c6ecfa9 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Thu, 9 Feb 2023 10:32:19 -0500 Subject: [PATCH 08/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 2d1e1ad913e66..1f96a7f765828 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -110,7 +110,7 @@ template Date: Thu, 9 Feb 2023 10:37:59 -0500 Subject: [PATCH 09/19] make batch_size an arg with default value --- cpp/src/arrow/util/rows_to_batches.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 1f96a7f765828..3ff2c33f32c2d 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -99,18 +99,17 @@ template & schema, std::reference_wrapper rows, DataPointConvertor&& data_point_convertor, RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), - MemoryPool* pool = default_memory_pool()) { - const std::size_t batch_size = 1024; + MemoryPool* pool = default_memory_pool(), const std::size_t batch_size = 1024) { auto make_next_batch = - [rows_ittr = std::begin(rows.get()), rows_ittr_end = std::end(rows.get()), - schema = schema, row_accessor = std::forward(row_accessor), + [pool = pool, batch_size = batch_size, rows_ittr = std::begin(rows.get()), + rows_ittr_end = std::end(rows.get()), schema = schema, + row_accessor = std::forward(row_accessor), data_point_convertor = std::forward( data_point_convertor)]() mutable -> Result> { if (rows_ittr == rows_ittr_end) return NULLPTR; - ARROW_ASSIGN_OR_RAISE( - auto record_batch_builder, - RecordBatchBuilder::Make(schema, pool, batch_size)); + ARROW_ASSIGN_OR_RAISE(auto record_batch_builder, + RecordBatchBuilder::Make(schema, pool, batch_size)); for (size_t i = 0; i < batch_size and (rows_ittr != rows_ittr_end); i++, std::advance(rows_ittr, 1)) { From 3461b0b5c857a969fd55aa65e06f3ee04a4a8f9d Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Thu, 9 Feb 2023 10:42:57 -0500 Subject: [PATCH 10/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 3ff2c33f32c2d..225fd1f30c0f0 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -113,7 +113,7 @@ template GetField(col_index); From 91ebfdccc547be6cadb6f368618c4faa7e829b74 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Thu, 9 Feb 2023 10:44:18 -0500 Subject: [PATCH 11/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 225fd1f30c0f0..a0bce4d9bf9c5 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -85,8 +85,9 @@ struct is_range())), /// \param[in] row_accessor - In the common case where the value of each row iterated /// over is it's self also directly iterable, the client can just use the default. -/// the provided callable must take the values of the otter `rows` range and return a -/// `std::reference_wrapper` to the data points in a given row. +/// The provided callable must take the values of the otter `rows` range and return a +/// `std::reference_wrapper` to the data points in a given row. The data points +/// must be in order of their corresponding fields in the schema. /// see: /ref `MakeDefaultRowAccessor` /// \return `Result>>` result will be a From ba9e7350f787f2e3201ff8a1af2b6f9cb52b1d21 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Thu, 9 Feb 2023 10:44:34 -0500 Subject: [PATCH 12/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index a0bce4d9bf9c5..78bc05e18e819 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -112,7 +112,7 @@ template Date: Thu, 9 Feb 2023 10:47:59 -0500 Subject: [PATCH 13/19] fix typo --- cpp/src/arrow/util/rows_to_batches.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 78bc05e18e819..4dffcb3e0917a 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -85,7 +85,7 @@ struct is_range())), /// \param[in] row_accessor - In the common case where the value of each row iterated /// over is it's self also directly iterable, the client can just use the default. -/// The provided callable must take the values of the otter `rows` range and return a +/// The provided callable must take the values of the `rows` range and return a /// `std::reference_wrapper` to the data points in a given row. The data points /// must be in order of their corresponding fields in the schema. /// see: /ref `MakeDefaultRowAccessor` From cb53bd5356e84d1b5923f6800ed66f01d915fa05 Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Fri, 10 Feb 2023 11:47:55 -0500 Subject: [PATCH 14/19] Use instead of --- cpp/src/arrow/util/rows_to_batches.h | 22 ++++++++++++++++++---- cpp/src/arrow/util/rows_to_batches_test.cc | 11 +++++------ 2 files changed, 23 insertions(+), 10 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 4dffcb3e0917a..640120a42a949 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -47,6 +47,20 @@ struct is_range())), } // namespace detail +/// Delete overload for `const Range&& rows` because the data's lifetime must exceed +/// the lifetime of the function call. `data` will be read when client uses the +/// `RecordBatchReader` +template +[[nodiscard]] typename std::enable_if_t::value, + Result>> +/* Result>> */ RowsToBatches( + const std::shared_ptr& schema, const Range&& rows, + DataPointConvertor&& data_point_convertor, + RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), + MemoryPool* pool = default_memory_pool(), + const std::size_t batch_size = 1024) = delete; + /// \brief Utility function for converting any row-based structure into an /// `arrow::RecordBatchReader` (this can be easily converted to an `arrow::Table` using /// `arrow::RecordBatchReader::ToTable()`). @@ -71,7 +85,7 @@ struct is_range())), /// return static_cast(array_builder).Append(value); /// }; /// std::vector> data = {{1, 2, 4}, {5, 6, 7}}; -/// auto batches = RowsToBatches(kTestSchema, std::ref(data), IntConvertor); +/// auto batches = RowsToBatches(kTestSchema, data, IntConvertor); /// \endcode /// \param[in] schema - the schema to be used in the `RecordBatchReader` @@ -97,13 +111,13 @@ template ::value, Result>> /* Result>> */ RowsToBatches( - const std::shared_ptr& schema, std::reference_wrapper rows, + const std::shared_ptr& schema, const Range& rows, DataPointConvertor&& data_point_convertor, RowAccessor&& row_accessor = detail::MakeDefaultRowAccessor(), MemoryPool* pool = default_memory_pool(), const std::size_t batch_size = 1024) { auto make_next_batch = - [pool = pool, batch_size = batch_size, rows_ittr = std::begin(rows.get()), - rows_ittr_end = std::end(rows.get()), schema = schema, + [pool = pool, batch_size = batch_size, rows_ittr = std::begin(rows), + rows_ittr_end = std::end(rows), schema = schema, row_accessor = std::forward(row_accessor), data_point_convertor = std::forward( data_point_convertor)]() mutable -> Result> { diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index fd62bcd1ad5c7..36ab28168f112 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -43,7 +43,7 @@ bool CompareJson(const arrow::Table& arrow_table, const std::string& json, TEST(RowsToBatches, BasicUsage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(kTestSchema, std::ref(data), IntConvertor).ValueOrDie(); + auto batches = RowsToBatches(kTestSchema, data, IntConvertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); @@ -53,7 +53,7 @@ TEST(RowsToBatches, BasicUsage) { TEST(RowsToBatches, ConstRange) { const std::vector> data = {{1, 2, 4}, {5, 6, 7}}; - auto batches = RowsToBatches(kTestSchema, std::cref(data), IntConvertor).ValueOrDie(); + auto batches = RowsToBatches(kTestSchema, data, IntConvertor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); @@ -72,8 +72,7 @@ TEST(RowsToBatches, StructAccessor) { return std::cref(s.values); }; - auto batches = - RowsToBatches(kTestSchema, std::ref(data), IntConvertor, accessor).ValueOrDie(); + auto batches = RowsToBatches(kTestSchema, data, IntConvertor, accessor).ValueOrDie(); auto table = batches->ToTable().ValueOrDie(); @@ -97,8 +96,8 @@ TEST(RowsToBatches, Variant) { std::vector>> data = {{1, std::string("2")}, {4, std::string("5")}}; - auto batches = - RowsToBatches(test_schema, std::ref(data), VariantConvertor).ValueOrDie(); + auto batches = RowsToBatches(test_schema, data, VariantConvertor).ValueOrDie(); + auto table = batches->ToTable().ValueOrDie(); EXPECT_TRUE(CompareJson(*table, R"([1, 4])", "x")); From 33a5787bafa0247ea48ac32b43d061576d818f4b Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Wed, 15 Feb 2023 12:42:22 -0500 Subject: [PATCH 15/19] Support row accessors that return by value in addition to std::reference_wrapper --- cpp/src/arrow/util/rows_to_batches.h | 13 +++++++++++-- cpp/src/arrow/util/rows_to_batches_test.cc | 13 +++++++++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 640120a42a949..65504529e6a42 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -129,8 +129,17 @@ template ::value) + return row; + else + return row.get(); + }(); + + for (auto& data_point : row_unwrapped) { ArrayBuilder* array_builder = record_batch_builder->GetField(col_index); ARROW_RETURN_IF(array_builder == NULLPTR, Status::Invalid("array_builder == NULLPTR")); diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index 36ab28168f112..0f63148747fab 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -79,6 +79,19 @@ TEST(RowsToBatches, StructAccessor) { EXPECT_TRUE(CompareJson(*table, R"([1, 5])", "field_1")); EXPECT_TRUE(CompareJson(*table, R"([2, 6])", "field_2")); EXPECT_TRUE(CompareJson(*table, R"([4, 7])", "field_3")); + + // Test accessor that returns by value instead of using `std::reference_wrapper` + auto accessor_by_value = [](const TestStruct& s) -> Result> { + return std::set(std::begin(s.values), std::end(s.values)); + }; + auto batches_by_value = + RowsToBatches(kTestSchema, data, IntConvertor, accessor_by_value).ValueOrDie(); + + auto table_by_value = batches_by_value->ToTable().ValueOrDie(); + + EXPECT_TRUE(CompareJson(*table_by_value, R"([1, 5])", "field_1")); + EXPECT_TRUE(CompareJson(*table_by_value, R"([2, 6])", "field_2")); + EXPECT_TRUE(CompareJson(*table_by_value, R"([4, 7])", "field_3")); } TEST(RowsToBatches, Variant) { From 0ca631a40c970a9363e9f46b133e7eed72e287a9 Mon Sep 17 00:00:00 2001 From: Will Jones Date: Thu, 16 Feb 2023 14:49:54 -0800 Subject: [PATCH 16/19] Update cpp/src/arrow/util/rows_to_batches_test.cc --- cpp/src/arrow/util/rows_to_batches_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index 0f63148747fab..35f35c8695d0e 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -39,7 +39,7 @@ bool CompareJson(const arrow::Table& arrow_table, const std::string& json, const std::string& field_name) { const auto col = arrow_table.GetColumnByName(field_name); return arrow::ChunkedArrayFromJSON(col->type(), {json})->Equals(col); -}; +} TEST(RowsToBatches, BasicUsage) { std::vector> data = {{1, 2, 4}, {5, 6, 7}}; From ab402c5fc23b24dfe84c85f5ba4b27dc11f5a068 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Fri, 17 Feb 2023 13:09:14 -0500 Subject: [PATCH 17/19] Update cpp/src/arrow/util/rows_to_batches.h Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 65504529e6a42..1d775f24ecda4 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -35,7 +35,7 @@ namespace detail { return [](auto& x) -> Result { return std::ref(x); }; } -// Meta-funciton to check if a type `T` is a range (iterable using `std::begin()` / +// Meta-function to check if a type `T` is a range (iterable using `std::begin()` / // `std::end()`). `is_range::value` will be false if `T` is not a valid range. template struct is_range : std::false_type {}; From 6f0c006d3b27b378dc4ad17e1b61bfeca1d5bea4 Mon Sep 17 00:00:00 2001 From: Michael Hancock Date: Fri, 17 Feb 2023 13:10:26 -0500 Subject: [PATCH 18/19] Update cpp/src/arrow/util/rows_to_batches_test.cc Co-authored-by: Will Jones --- cpp/src/arrow/util/rows_to_batches_test.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches_test.cc b/cpp/src/arrow/util/rows_to_batches_test.cc index 35f35c8695d0e..52415fcbde8f4 100644 --- a/cpp/src/arrow/util/rows_to_batches_test.cc +++ b/cpp/src/arrow/util/rows_to_batches_test.cc @@ -101,8 +101,7 @@ TEST(RowsToBatches, Variant) { return dynamic_cast(array_builder).Append(std::get(value)); else return dynamic_cast(array_builder) - .Append(std::get(value).c_str(), - std::get(value).length()); + .Append(std::get(value)); }; const auto test_schema = schema({field("x", int64()), field("y", utf8())}); From cf615b7f717da69360888a97dfc0db6269650a9b Mon Sep 17 00:00:00 2001 From: Mike Hancock Date: Tue, 21 Feb 2023 11:08:27 -0500 Subject: [PATCH 19/19] Add documentation for pool and batch_size --- cpp/src/arrow/util/rows_to_batches.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/cpp/src/arrow/util/rows_to_batches.h b/cpp/src/arrow/util/rows_to_batches.h index 1d775f24ecda4..8ad254df200ef 100644 --- a/cpp/src/arrow/util/rows_to_batches.h +++ b/cpp/src/arrow/util/rows_to_batches.h @@ -88,12 +88,12 @@ template >>` result will be a /// `std::shared_ptr>` if not errors occurred, else an error status. template