From a3c2d36cad01616726bee174b738bff896cb0685 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 01:30:09 +0100 Subject: [PATCH 01/33] Add GArrowFixedSizeListArray --- c_glib/arrow-glib/array-builder.cpp | 122 +++++++++ c_glib/arrow-glib/array-builder.h | 26 ++ c_glib/arrow-glib/basic-array.cpp | 3 + c_glib/arrow-glib/composite-array.cpp | 250 ++++++++++++++++++ c_glib/arrow-glib/composite-array.h | 44 +++ c_glib/test/helper/buildable.rb | 12 +- .../test-fixed-size-list-array-builder.rb | 93 +++++++ c_glib/test/test-fixed-size-list-array.rb | 81 ++++++ 8 files changed, 630 insertions(+), 1 deletion(-) create mode 100644 c_glib/test/test-fixed-size-list-array-builder.rb create mode 100644 c_glib/test/test-fixed-size-list-array.rb diff --git a/c_glib/arrow-glib/array-builder.cpp b/c_glib/arrow-glib/array-builder.cpp index 87e22c74352..e8300692fff 100644 --- a/c_glib/arrow-glib/array-builder.cpp +++ b/c_glib/arrow-glib/array-builder.cpp @@ -5674,6 +5674,125 @@ garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *b return priv->value_builder; } +typedef struct GArrowFixedSizeListArrayBuilderPrivate_ +{ + GArrowArrayBuilder *value_builder; +} GArrowFixedSizeListArrayBuilderPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFixedSizeListArrayBuilder, + garrow_fixed_size_list_array_builder, + GARROW_TYPE_ARRAY_BUILDER) + +#define GARROW_FIXED_SIZE_LIST_ARRAY_BUILDER_GET_PRIVATE(obj) \ + static_cast( \ + garrow_fixed_size_list_array_builder_get_instance_private( \ + GARROW_FIXED_SIZE_LIST_ARRAY_BUILDER(obj))) + +static void +garrow_fixed_size_list_array_builder_dispose(GObject *object) +{ + auto priv = GARROW_FIXED_SIZE_LIST_ARRAY_BUILDER_GET_PRIVATE(object); + + if (priv->value_builder) { + g_object_unref(priv->value_builder); + priv->value_builder = NULL; + } + + G_OBJECT_CLASS(garrow_fixed_size_list_array_builder_parent_class)->dispose(object); +} + +static void +garrow_fixed_size_list_array_builder_init(GArrowFixedSizeListArrayBuilder *builder) +{ +} + +static void +garrow_fixed_size_list_array_builder_class_init( + GArrowFixedSizeListArrayBuilderClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_fixed_size_list_array_builder_dispose; +} + +/** + * garrow_fixed_size_list_array_builder_new: + * @data_type: A #GArrowFixedSizeListDataType for value. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: A newly created #GArrowFixedSizeListArrayBuilder. + * + * Since: 23.0.0 + */ +GArrowFixedSizeListArrayBuilder * +garrow_fixed_size_list_array_builder_new(GArrowFixedSizeListDataType *data_type, + GError **error) +{ + if (!GARROW_IS_FIXED_SIZE_LIST_DATA_TYPE(data_type)) { + g_set_error( + error, + GARROW_ERROR, + GARROW_ERROR_INVALID, + "[fixed-size-list-array-builder][new] data type must be fixed-size list data type"); + return NULL; + } + + auto arrow_data_type = garrow_data_type_get_raw(GARROW_DATA_TYPE(data_type)); + auto builder = garrow_array_builder_new(arrow_data_type, + error, + "[fixed-size-list-array-builder][new]"); + return GARROW_FIXED_SIZE_LIST_ARRAY_BUILDER(builder); +} + +/** + * garrow_fixed_size_list_array_builder_append_value: + * @builder: A #GArrowFixedSizeListArrayBuilder. + * @error: (nullable): Return location for a #GError or %NULL. + * + * Returns: %TRUE on success, %FALSE if there was an error. + * + * It appends a new list element. To append a new list element, you + * need to call this function then append list element values to + * `value_builder`. `value_builder` is the #GArrowArrayBuilder + * specified to constructor. You can get `value_builder` by + * garrow_fixed_size_list_array_builder_get_value_builder(). + * + * Since: 23.0.0 + */ +gboolean +garrow_fixed_size_list_array_builder_append_value( + GArrowFixedSizeListArrayBuilder *builder, GError **error) +{ + auto arrow_builder = std::static_pointer_cast( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto status = arrow_builder->Append(); + return garrow_error_check(error, + status, + "[fixed-size-list-array-builder][append-value]"); +} + +/** + * garrow_fixed_size_list_array_builder_get_value_builder: + * @builder: A #GArrowFixedSizeListArrayBuilder. + * + * Returns: (transfer none): The #GArrowArrayBuilder for building list element values. + * + * Since: 23.0.0 + */ +GArrowArrayBuilder * +garrow_fixed_size_list_array_builder_get_value_builder( + GArrowFixedSizeListArrayBuilder *builder) +{ + auto priv = GARROW_FIXED_SIZE_LIST_ARRAY_BUILDER_GET_PRIVATE(builder); + if (!priv->value_builder) { + auto arrow_builder = std::static_pointer_cast( + garrow_array_builder_get_raw(GARROW_ARRAY_BUILDER(builder))); + auto arrow_value_builder = arrow_builder->value_builder(); + priv->value_builder = garrow_array_builder_new_raw(arrow_value_builder); + } + return priv->value_builder; +} + G_DEFINE_TYPE(GArrowStructArrayBuilder, garrow_struct_array_builder, GARROW_TYPE_ARRAY_BUILDER) @@ -6779,6 +6898,9 @@ garrow_array_builder_new_raw(std::shared_ptr *arrow_builder case arrow::Type::type::LARGE_LIST: type = GARROW_TYPE_LARGE_LIST_ARRAY_BUILDER; break; + case arrow::Type::type::FIXED_SIZE_LIST: + type = GARROW_TYPE_FIXED_SIZE_LIST_ARRAY_BUILDER; + break; case arrow::Type::type::STRUCT: type = GARROW_TYPE_STRUCT_ARRAY_BUILDER; break; diff --git a/c_glib/arrow-glib/array-builder.h b/c_glib/arrow-glib/array-builder.h index c15c4115031..a4ccb8d2247 100644 --- a/c_glib/arrow-glib/array-builder.h +++ b/c_glib/arrow-glib/array-builder.h @@ -1635,6 +1635,32 @@ GARROW_AVAILABLE_IN_0_16 GArrowArrayBuilder * garrow_large_list_array_builder_get_value_builder(GArrowLargeListArrayBuilder *builder); +#define GARROW_TYPE_FIXED_SIZE_LIST_ARRAY_BUILDER \ + (garrow_fixed_size_list_array_builder_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeListArrayBuilder, + garrow_fixed_size_list_array_builder, + GARROW, + FIXED_SIZE_LIST_ARRAY_BUILDER, + GArrowArrayBuilder) +struct _GArrowFixedSizeListArrayBuilderClass +{ + GArrowArrayBuilderClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowFixedSizeListArrayBuilder * +garrow_fixed_size_list_array_builder_new(GArrowFixedSizeListDataType *data_type, + GError **error); +GARROW_AVAILABLE_IN_23_0 +gboolean +garrow_fixed_size_list_array_builder_append_value( + GArrowFixedSizeListArrayBuilder *builder, GError **error); +GARROW_AVAILABLE_IN_23_0 +GArrowArrayBuilder * +garrow_fixed_size_list_array_builder_get_value_builder( + GArrowFixedSizeListArrayBuilder *builder); + #define GARROW_TYPE_STRUCT_ARRAY_BUILDER (garrow_struct_array_builder_get_type()) GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE(GArrowStructArrayBuilder, diff --git a/c_glib/arrow-glib/basic-array.cpp b/c_glib/arrow-glib/basic-array.cpp index 34b577ea1af..e42d2ed1620 100644 --- a/c_glib/arrow-glib/basic-array.cpp +++ b/c_glib/arrow-glib/basic-array.cpp @@ -4005,6 +4005,9 @@ garrow_array_new_raw_valist(std::shared_ptr *arrow_array, case arrow::Type::type::LARGE_LIST: type = GARROW_TYPE_LARGE_LIST_ARRAY; break; + case arrow::Type::type::FIXED_SIZE_LIST: + type = GARROW_TYPE_FIXED_SIZE_LIST_ARRAY; + break; case arrow::Type::type::STRUCT: type = GARROW_TYPE_STRUCT_ARRAY; break; diff --git a/c_glib/arrow-glib/composite-array.cpp b/c_glib/arrow-glib/composite-array.cpp index d49b3936054..9bc53264b72 100644 --- a/c_glib/arrow-glib/composite-array.cpp +++ b/c_glib/arrow-glib/composite-array.cpp @@ -40,6 +40,11 @@ G_BEGIN_DECLS * It can store zero or more list data. If you don't have Arrow format data, * you need to use #GArrowLargeListArrayBuilder to create a new array. * + * #GArrowFixedSizeListArray is a class for fixed-size list array. + * It can store zero or more list data where each list has the same size. + * If you don't have Arrow format data, you need to use + * #GArrowFixedSizeListArrayBuilder to create a new array. + * * #GArrowStructArray is a class for struct array. It can store zero * or more structs. One struct has one or more fields. If you don't * have Arrow format data, you need to use #GArrowStructArrayBuilder @@ -597,6 +602,251 @@ garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n return reinterpret_cast(value_offsets); } +typedef struct GArrowFixedSizeListArrayPrivate_ +{ + GArrowArray *raw_values; +} GArrowFixedSizeListArrayPrivate; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowFixedSizeListArray, + garrow_fixed_size_list_array, + GARROW_TYPE_ARRAY) + +#define GARROW_FIXED_SIZE_LIST_ARRAY_GET_PRIVATE(obj) \ + static_cast( \ + garrow_fixed_size_list_array_get_instance_private( \ + GARROW_FIXED_SIZE_LIST_ARRAY(obj))) + +static void +garrow_fixed_size_list_array_dispose(GObject *object) +{ + auto priv = GARROW_FIXED_SIZE_LIST_ARRAY_GET_PRIVATE(object); + + if (priv->raw_values) { + g_object_unref(priv->raw_values); + priv->raw_values = NULL; + } + + G_OBJECT_CLASS(garrow_fixed_size_list_array_parent_class)->dispose(object); +} + +static void +garrow_fixed_size_list_array_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FIXED_SIZE_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + priv->raw_values = GARROW_ARRAY(g_value_dup_object(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_fixed_size_list_array_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_FIXED_SIZE_LIST_ARRAY_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_RAW_VALUES: + g_value_set_object(value, priv->raw_values); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_fixed_size_list_array_init(GArrowFixedSizeListArray *object) +{ +} + +static void +garrow_fixed_size_list_array_class_init(GArrowFixedSizeListArrayClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_fixed_size_list_array_dispose; + gobject_class->set_property = garrow_fixed_size_list_array_set_property; + gobject_class->get_property = garrow_fixed_size_list_array_get_property; + + GParamSpec *spec; + spec = g_param_spec_object( + "raw-values", + "Raw values", + "The raw values", + GARROW_TYPE_ARRAY, + static_cast(G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY)); + g_object_class_install_property(gobject_class, PROP_RAW_VALUES, spec); +} + +/** + * garrow_fixed_size_list_array_new: + * @data_type: The data type of the list. + * @length: The number of elements. + * @values: The values as #GArrowArray. + * @null_bitmap: (nullable): The bitmap that shows null elements. The + * N-th element is null when the N-th bit is 0, not null otherwise. + * If the array has no null elements, the bitmap must be %NULL and + * @n_nulls is 0. + * @n_nulls: The number of null elements. If -1 is specified, the + * number of nulls are computed from @null_bitmap. + * + * Returns: A newly created #GArrowFixedSizeListArray. + * + * Since: 23.0.0 + */ +GArrowFixedSizeListArray * +garrow_fixed_size_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls) +{ + const auto arrow_data_type = garrow_data_type_get_raw(data_type); + const auto arrow_values = garrow_array_get_raw(values); + const auto arrow_null_bitmap = garrow_buffer_get_raw(null_bitmap); + auto arrow_fixed_size_list_array = + std::make_shared(arrow_data_type, + length, + arrow_values, + arrow_null_bitmap, + n_nulls); + auto arrow_array = std::static_pointer_cast(arrow_fixed_size_list_array); + return GARROW_FIXED_SIZE_LIST_ARRAY(garrow_array_new_raw(&arrow_array, + "array", + &arrow_array, + "value-data-type", + data_type, + "null-bitmap", + null_bitmap, + "raw-values", + values, + NULL)); +} + +/** + * garrow_fixed_size_list_array_get_value_type: + * @array: A #GArrowFixedSizeListArray. + * + * Returns: (transfer full): The data type of value in each list. + * + * Since: 23.0.0 + */ +GArrowDataType * +garrow_fixed_size_list_array_get_value_type(GArrowFixedSizeListArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + auto arrow_value_type = arrow_fixed_size_list_array->value_type(); + return garrow_data_type_new_raw(&arrow_value_type); +} + +/** + * garrow_fixed_size_list_array_get_value: + * @array: A #GArrowFixedSizeListArray. + * @i: The index of the target value. + * + * Returns: (transfer full): The @i-th list. + * + * Since: 23.0.0 + */ +GArrowArray * +garrow_fixed_size_list_array_get_value(GArrowFixedSizeListArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + auto arrow_list = arrow_fixed_size_list_array->value_slice(i); + return garrow_array_new_raw(&arrow_list, "array", &arrow_list, "parent", array, NULL); +} + +/** + * garrow_fixed_size_list_array_get_values: + * @array: A #GArrowFixedSizeListArray. + * + * Returns: (transfer full): The array containing the list's values. + * + * Since: 23.0.0 + */ +GArrowArray * +garrow_fixed_size_list_array_get_values(GArrowFixedSizeListArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + auto arrow_values = arrow_fixed_size_list_array->values(); + return garrow_array_new_raw(&arrow_values, + "array", + &arrow_values, + "parent", + array, + NULL); +} + +/** + * garrow_fixed_size_list_array_get_value_offset: + * @array: A #GArrowFixedSizeListArray. + * @i: The index of the offset of the target value. + * + * Returns: The target offset in the array containing the list's values. + * + * Since: 23.0.0 + */ +gint64 +garrow_fixed_size_list_array_get_value_offset(GArrowFixedSizeListArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + return arrow_fixed_size_list_array->value_offset(i); +} + +/** + * garrow_fixed_size_list_array_get_value_length: + * @array: A #GArrowFixedSizeListArray. + * @i: The index of the target value (unused, as all lists have the same size). + * + * Returns: The fixed size of each list. + * + * Since: 23.0.0 + */ +gint32 +garrow_fixed_size_list_array_get_value_length(GArrowFixedSizeListArray *array, gint64 i) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + return arrow_fixed_size_list_array->value_length(i); +} + +/** + * garrow_fixed_size_list_array_get_list_size: + * @array: A #GArrowFixedSizeListArray. + * + * Returns: The fixed size of each list. + * + * Since: 23.0.0 + */ +gint32 +garrow_fixed_size_list_array_get_list_size(GArrowFixedSizeListArray *array) +{ + auto arrow_array = garrow_array_get_raw(GARROW_ARRAY(array)); + auto arrow_fixed_size_list_array = + std::static_pointer_cast(arrow_array); + return arrow_fixed_size_list_array->list_type()->list_size(); +} + typedef struct GArrowStructArrayPrivate_ { GPtrArray *fields; diff --git a/c_glib/arrow-glib/composite-array.h b/c_glib/arrow-glib/composite-array.h index b8ba901363d..117ffdf7079 100644 --- a/c_glib/arrow-glib/composite-array.h +++ b/c_glib/arrow-glib/composite-array.h @@ -110,6 +110,50 @@ GARROW_AVAILABLE_IN_2_0 const gint64 * garrow_large_list_array_get_value_offsets(GArrowLargeListArray *array, gint64 *n_offsets); +#define GARROW_TYPE_FIXED_SIZE_LIST_ARRAY (garrow_fixed_size_list_array_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowFixedSizeListArray, + garrow_fixed_size_list_array, + GARROW, + FIXED_SIZE_LIST_ARRAY, + GArrowArray) +struct _GArrowFixedSizeListArrayClass +{ + GArrowArrayClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowFixedSizeListArray * +garrow_fixed_size_list_array_new(GArrowDataType *data_type, + gint64 length, + GArrowArray *values, + GArrowBuffer *null_bitmap, + gint64 n_nulls); + +GARROW_AVAILABLE_IN_23_0 +GArrowDataType * +garrow_fixed_size_list_array_get_value_type(GArrowFixedSizeListArray *array); + +GARROW_AVAILABLE_IN_23_0 +GArrowArray * +garrow_fixed_size_list_array_get_value(GArrowFixedSizeListArray *array, gint64 i); + +GARROW_AVAILABLE_IN_23_0 +GArrowArray * +garrow_fixed_size_list_array_get_values(GArrowFixedSizeListArray *array); + +GARROW_AVAILABLE_IN_23_0 +gint64 +garrow_fixed_size_list_array_get_value_offset(GArrowFixedSizeListArray *array, gint64 i); + +GARROW_AVAILABLE_IN_23_0 +gint32 +garrow_fixed_size_list_array_get_value_length(GArrowFixedSizeListArray *array, gint64 i); + +GARROW_AVAILABLE_IN_23_0 +gint32 +garrow_fixed_size_list_array_get_list_size(GArrowFixedSizeListArray *array); + #define GARROW_TYPE_STRUCT_ARRAY (garrow_struct_array_get_type()) GARROW_AVAILABLE_IN_ALL G_DECLARE_DERIVABLE_TYPE( diff --git a/c_glib/test/helper/buildable.rb b/c_glib/test/helper/buildable.rb index b0156f9c8e2..e7c4f98d539 100644 --- a/c_glib/test/helper/buildable.rb +++ b/c_glib/test/helper/buildable.rb @@ -172,6 +172,16 @@ def build_large_list_array(value_data_type, values_list, field_name: "value") builder.finish end + def build_fixed_size_list_array(value_data_type, list_size, values_list, field_name: "value") + value_field = Arrow::Field.new(field_name, value_data_type) + data_type = Arrow::FixedSizeListDataType.new(value_field, list_size) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + values_list.each do |values| + append_to_builder(builder, values) + end + builder.finish + end + def build_map_array(key_data_type, item_data_type, maps) data_type = Arrow::MapDataType.new(key_data_type, item_data_type) builder = Arrow::MapArrayBuilder.new(data_type) @@ -204,7 +214,7 @@ def append_to_builder(builder, value) append_to_builder(key_builder, k) append_to_builder(item_builder, v) end - when Arrow::ListDataType, Arrow::LargeListDataType + when Arrow::ListDataType, Arrow::LargeListDataType, Arrow::FixedSizeListDataType builder.append_value value_builder = builder.value_builder value.each do |v| diff --git a/c_glib/test/test-fixed-size-list-array-builder.rb b/c_glib/test/test-fixed-size-list-array-builder.rb new file mode 100644 index 00000000000..ae061e974d6 --- /dev/null +++ b/c_glib/test/test-fixed-size-list-array-builder.rb @@ -0,0 +1,93 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeListArrayBuilder < Test::Unit::TestCase + include Helper::Buildable + include Helper::Omittable + + def setup + require_gi_bindings(3, 1, 9) + end + + def test_new + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + assert_equal(data_type, builder.value_data_type) + end + + def test_append_value + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + value_builder = builder.value_builder + + # Append first list: [1, 2] + builder.append_value + value_builder.append_value(1) + value_builder.append_value(2) + + # Append second list: [3, 4] + builder.append_value + value_builder.append_value(3) + value_builder.append_value(4) + + array = builder.finish + assert_equal(2, array.length) + assert_equal([1, 2], array.get_value(0).length.times.collect {|i| + array.get_value(0).get_value(i)}) + assert_equal([3, 4], array.get_value(1).length.times.collect {|i| + array.get_value(1).get_value(i)}) + end + + def test_append_null + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + value_builder = builder.value_builder + + # Append first list: [1, 2] + builder.append_value + value_builder.append_value(1) + value_builder.append_value(2) + + # Append null list + builder.append_null + + # Append third list: [5, 6] + builder.append_value + value_builder.append_value(5) + value_builder.append_value(6) + + array = builder.finish + assert_equal(3, array.length) + assert_equal([1, 2], array.get_value(0).length.times.collect {|i| + array.get_value(0).get_value(i)}) + assert_equal(true, array.null?(1)) + assert_equal([5, 6], array.get_value(2).length.times.collect {|i| + array.get_value(2).get_value(i)}) + end + + def test_value_builder + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + value_builder = builder.value_builder + assert_equal(Arrow::Int8DataType.new, value_builder.value_data_type) + end +end + diff --git a/c_glib/test/test-fixed-size-list-array.rb b/c_glib/test/test-fixed-size-list-array.rb new file mode 100644 index 00000000000..e390ac8393c --- /dev/null +++ b/c_glib/test/test-fixed-size-list-array.rb @@ -0,0 +1,81 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestFixedSizeListArray < Test::Unit::TestCase + include Helper::Buildable + + def test_new + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + data = Arrow::Buffer.new([1, 2, 3, 4].pack("c*")) + nulls = Arrow::Buffer.new([0b1111].pack("C*")) + values = Arrow::Int8Array.new(4, data, nulls, 0) + assert_equal(build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]), + Arrow::FixedSizeListArray.new(data_type, + 2, + values, + Arrow::Buffer.new([0b11].pack("C*")), + -1)) + end + + def test_value + array = build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]) + value = array.get_value(1) + assert_equal([3, 4], + value.length.times.collect {|i| value.get_value(i)}) + end + + def test_value_type + field = Arrow::Field.new("value", Arrow::Int8DataType.new) + data_type = Arrow::FixedSizeListDataType.new(field, 2) + builder = Arrow::FixedSizeListArrayBuilder.new(data_type) + array = builder.finish + assert_equal(Arrow::Int8DataType.new, array.value_type) + end + + def test_values + array = build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]) + values = array.values + assert_equal([1, 2, 3, 4], + values.length.times.collect {|i| values.get_value(i)}) + end + + def test_value_offset + array = build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]) + assert_equal([0, 2], + array.length.times.collect {|i| array.get_value_offset(i)}) + end + + def test_value_length + array = build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]) + # All lists have the same fixed size + assert_equal([2, 2], + array.length.times.collect {|i| array.get_value_length(i)}) + end + + def test_list_size + array = build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]]) + assert_equal(2, array.list_size) + end +end + From 7a1d27b8de1a329cffac9393b1a5639cf7c03b92 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Wed, 3 Dec 2025 18:07:39 +0100 Subject: [PATCH 02/33] Add GArrowAssumeTimezoneOptions --- c_glib/arrow-glib/compute.cpp | 180 ++++++++++++++++++++ c_glib/arrow-glib/compute.h | 51 ++++++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-assume-timezone-options.rb | 61 +++++++ 4 files changed, 298 insertions(+) create mode 100644 c_glib/test/test-assume-timezone-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 5f494f3bc7b..db5be7a6f1c 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -251,6 +251,9 @@ G_BEGIN_DECLS * #GArrowStructFieldOptions is a class to customize the `struct_field` * function. * + * #GArrowAssumeTimezoneOptions is a class to customize the `assume_timezone` + * function. + * * There are many functions to compute data on an array. */ @@ -6338,6 +6341,156 @@ garrow_struct_field_options_new(void) return GARROW_STRUCT_FIELD_OPTIONS(options); } +enum { + PROP_ASSUME_TIMEZONE_OPTIONS_TIMEZONE = 1, + PROP_ASSUME_TIMEZONE_OPTIONS_AMBIGUOUS, + PROP_ASSUME_TIMEZONE_OPTIONS_NONEXISTENT, +}; + +G_DEFINE_TYPE(GArrowAssumeTimezoneOptions, + garrow_assume_timezone_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_assume_timezone_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_assume_timezone_options_get_raw(GARROW_ASSUME_TIMEZONE_OPTIONS(object)); + + switch (prop_id) { + case PROP_ASSUME_TIMEZONE_OPTIONS_TIMEZONE: + options->timezone = g_value_get_string(value); + break; + case PROP_ASSUME_TIMEZONE_OPTIONS_AMBIGUOUS: + options->ambiguous = static_cast( + g_value_get_enum(value)); + break; + case PROP_ASSUME_TIMEZONE_OPTIONS_NONEXISTENT: + options->nonexistent = + static_cast( + g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_assume_timezone_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_assume_timezone_options_get_raw(GARROW_ASSUME_TIMEZONE_OPTIONS(object)); + + switch (prop_id) { + case PROP_ASSUME_TIMEZONE_OPTIONS_TIMEZONE: + g_value_set_string(value, options->timezone.c_str()); + break; + case PROP_ASSUME_TIMEZONE_OPTIONS_AMBIGUOUS: + g_value_set_enum(value, + static_cast(options->ambiguous)); + break; + case PROP_ASSUME_TIMEZONE_OPTIONS_NONEXISTENT: + g_value_set_enum(value, + static_cast(options->nonexistent)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_assume_timezone_options_init(GArrowAssumeTimezoneOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::AssumeTimezoneOptions()); +} + +static void +garrow_assume_timezone_options_class_init(GArrowAssumeTimezoneOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_assume_timezone_options_set_property; + gobject_class->get_property = garrow_assume_timezone_options_get_property; + + arrow::compute::AssumeTimezoneOptions options; + + GParamSpec *spec; + /** + * GArrowAssumeTimezoneOptions:timezone: + * + * Timezone to convert timestamps from. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("timezone", + "Timezone", + "Timezone to convert timestamps from", + options.timezone.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ASSUME_TIMEZONE_OPTIONS_TIMEZONE, + spec); + + /** + * GArrowAssumeTimezoneOptions:ambiguous: + * + * How to interpret ambiguous local times (due to DST shifts). + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("ambiguous", + "Ambiguous", + "How to interpret ambiguous local times (due to DST shifts)", + GARROW_TYPE_ASSUME_TIMEZONE_AMBIGUOUS, + static_cast(options.ambiguous), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ASSUME_TIMEZONE_OPTIONS_AMBIGUOUS, + spec); + + /** + * GArrowAssumeTimezoneOptions:nonexistent: + * + * How to interpret nonexistent local times (due to DST shifts). + * + * Since: 23.0.0 + */ + spec = + g_param_spec_enum("nonexistent", + "Nonexistent", + "How to interpret nonexistent local times (due to DST shifts)", + GARROW_TYPE_ASSUME_TIMEZONE_NONEXISTENT, + static_cast(options.nonexistent), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ASSUME_TIMEZONE_OPTIONS_NONEXISTENT, + spec); +} + +/** + * garrow_assume_timezone_options_new: + * + * Returns: A newly created #GArrowAssumeTimezoneOptions. + * + * Since: 23.0.0 + */ +GArrowAssumeTimezoneOptions * +garrow_assume_timezone_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_ASSUME_TIMEZONE_OPTIONS, NULL); + return GARROW_ASSUME_TIMEZONE_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -6469,6 +6622,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_struct_field_options_new_raw(arrow_struct_field_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "AssumeTimezoneOptions") { + const auto arrow_assume_timezone_options = + static_cast(arrow_options); + auto options = garrow_assume_timezone_options_new_raw(arrow_assume_timezone_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -6987,3 +7145,25 @@ garrow_struct_field_options_get_raw(GArrowStructFieldOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowAssumeTimezoneOptions * +garrow_assume_timezone_options_new_raw( + const arrow::compute::AssumeTimezoneOptions *arrow_options) +{ + return GARROW_ASSUME_TIMEZONE_OPTIONS( + g_object_new(GARROW_TYPE_ASSUME_TIMEZONE_OPTIONS, + "timezone", + arrow_options->timezone.c_str(), + "ambiguous", + static_cast(arrow_options->ambiguous), + "nonexistent", + static_cast(arrow_options->nonexistent), + NULL)); +} + +arrow::compute::AssumeTimezoneOptions * +garrow_assume_timezone_options_get_raw(GArrowAssumeTimezoneOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 0f689d147e3..eb3db5eeb3d 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1122,4 +1122,55 @@ GARROW_AVAILABLE_IN_16_0 GArrowStructFieldOptions * garrow_struct_field_options_new(void); +/** + * GArrowAssumeTimezoneAmbiguous: + * @GARROW_ASSUME_TIMEZONE_AMBIGUOUS_RAISE: Raise an error on ambiguous times. + * @GARROW_ASSUME_TIMEZONE_AMBIGUOUS_EARLIEST: Emit the earliest instant. + * @GARROW_ASSUME_TIMEZONE_AMBIGUOUS_LATEST: Emit the latest instant. + * + * They correspond to the values of + * `arrow::compute::AssumeTimezoneOptions::Ambiguous`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_ASSUME_TIMEZONE_AMBIGUOUS_RAISE, + GARROW_ASSUME_TIMEZONE_AMBIGUOUS_EARLIEST, + GARROW_ASSUME_TIMEZONE_AMBIGUOUS_LATEST, +} GArrowAssumeTimezoneAmbiguous; + +/** + * GArrowAssumeTimezoneNonexistent: + * @GARROW_ASSUME_TIMEZONE_NONEXISTENT_RAISE: Raise an error on nonexistent times. + * @GARROW_ASSUME_TIMEZONE_NONEXISTENT_EARLIEST: Emit the instant just before the DST + * shift. + * @GARROW_ASSUME_TIMEZONE_NONEXISTENT_LATEST: Emit the DST shift instant. + * + * They correspond to the values of + * `arrow::compute::AssumeTimezoneOptions::Nonexistent`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_ASSUME_TIMEZONE_NONEXISTENT_RAISE, + GARROW_ASSUME_TIMEZONE_NONEXISTENT_EARLIEST, + GARROW_ASSUME_TIMEZONE_NONEXISTENT_LATEST, +} GArrowAssumeTimezoneNonexistent; + +#define GARROW_TYPE_ASSUME_TIMEZONE_OPTIONS (garrow_assume_timezone_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowAssumeTimezoneOptions, + garrow_assume_timezone_options, + GARROW, + ASSUME_TIMEZONE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowAssumeTimezoneOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowAssumeTimezoneOptions * +garrow_assume_timezone_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 0abf62f7d2e..55df617dafa 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -175,3 +175,9 @@ garrow_struct_field_options_new_raw( const arrow::compute::StructFieldOptions *arrow_options); arrow::compute::StructFieldOptions * garrow_struct_field_options_get_raw(GArrowStructFieldOptions *options); + +GArrowAssumeTimezoneOptions * +garrow_assume_timezone_options_new_raw( + const arrow::compute::AssumeTimezoneOptions *arrow_options); +arrow::compute::AssumeTimezoneOptions * +garrow_assume_timezone_options_get_raw(GArrowAssumeTimezoneOptions *options); diff --git a/c_glib/test/test-assume-timezone-options.rb b/c_glib/test/test-assume-timezone-options.rb new file mode 100644 index 00000000000..ac2e1b86153 --- /dev/null +++ b/c_glib/test/test-assume-timezone-options.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestAssumeTimezoneOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::AssumeTimezoneOptions.new + end + + def test_timezone_property + assert_equal("UTC", @options.timezone) + @options.timezone = "America/New_York" + assert_equal("America/New_York", @options.timezone) + end + + def test_ambiguous_property + assert_equal(Arrow::AssumeTimezoneAmbiguous::RAISE, @options.ambiguous) + @options.ambiguous = :earliest + assert_equal(Arrow::AssumeTimezoneAmbiguous::EARLIEST, @options.ambiguous) + @options.ambiguous = :latest + assert_equal(Arrow::AssumeTimezoneAmbiguous::LATEST, @options.ambiguous) + end + + def test_nonexistent_property + assert_equal(Arrow::AssumeTimezoneNonexistent::RAISE, @options.nonexistent) + @options.nonexistent = :earliest + assert_equal(Arrow::AssumeTimezoneNonexistent::EARLIEST, @options.nonexistent) + @options.nonexistent = :latest + assert_equal(Arrow::AssumeTimezoneNonexistent::LATEST, @options.nonexistent) + end + + def test_assume_timezone_function + omit("Missing tzdata on Windows") if Gem.win_platform? + args = [ + Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), + ] + @options.timezone = "America/New_York" + @options.ambiguous = :earliest + @options.nonexistent = :earliest + assume_timezone_function = Arrow::Function.find("assume_timezone") + result = assume_timezone_function.execute(args, @options).value + assert_equal(Arrow::TimestampDataType.new(:milli, "America/New_York"), + result.value_data_type) + end +end + From 9a66ed83a1f18cdf0dc5aa38362948e1e7c4d702 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Wed, 3 Dec 2025 23:40:20 +0100 Subject: [PATCH 03/33] Add GArrowCumulativeOptions --- c_glib/arrow-glib/compute.cpp | 203 +++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 ++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-cumulative-options.rb | 64 ++++++++ 4 files changed, 288 insertions(+) create mode 100644 c_glib/test/test-cumulative-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index db5be7a6f1c..25eb50bab2a 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -254,6 +254,10 @@ G_BEGIN_DECLS * #GArrowAssumeTimezoneOptions is a class to customize the `assume_timezone` * function. * + * #GArrowCumulativeOptions is a class to customize the cumulative functions + * such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and + * `cumulative_min`. + * * There are many functions to compute data on an array. */ @@ -6491,6 +6495,173 @@ garrow_assume_timezone_options_new(void) return GARROW_ASSUME_TIMEZONE_OPTIONS(options); } +typedef struct GArrowCumulativeOptionsPrivate_ +{ + GArrowScalar *start; +} GArrowCumulativeOptionsPrivate; + +enum { + PROP_CUMULATIVE_OPTIONS_START = 1, + PROP_CUMULATIVE_OPTIONS_SKIP_NULLS, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowCumulativeOptions, + garrow_cumulative_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_CUMULATIVE_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_cumulative_options_get_instance_private(GARROW_CUMULATIVE_OPTIONS(object))) + +static void +garrow_cumulative_options_dispose(GObject *object) +{ + auto priv = GARROW_CUMULATIVE_OPTIONS_GET_PRIVATE(object); + + if (priv->start) { + g_object_unref(priv->start); + priv->start = nullptr; + } + + G_OBJECT_CLASS(garrow_cumulative_options_parent_class)->dispose(object); +} + +static void +garrow_cumulative_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CUMULATIVE_OPTIONS_GET_PRIVATE(object); + auto options = garrow_cumulative_options_get_raw(GARROW_CUMULATIVE_OPTIONS(object)); + + switch (prop_id) { + case PROP_CUMULATIVE_OPTIONS_START: + { + auto scalar = GARROW_SCALAR(g_value_get_object(value)); + if (priv->start == scalar) { + return; + } + if (priv->start) { + g_object_unref(priv->start); + } + priv->start = scalar; + if (priv->start) { + g_object_ref(priv->start); + options->start = garrow_scalar_get_raw(scalar); + } else { + options->start = std::nullopt; + } + break; + } + case PROP_CUMULATIVE_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cumulative_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_CUMULATIVE_OPTIONS_GET_PRIVATE(object); + auto options = garrow_cumulative_options_get_raw(GARROW_CUMULATIVE_OPTIONS(object)); + + switch (prop_id) { + case PROP_CUMULATIVE_OPTIONS_START: + { + if (priv->start) { + g_value_set_object(value, G_OBJECT(priv->start)); + } else { + g_value_set_object(value, NULL); + } + break; + } + case PROP_CUMULATIVE_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_cumulative_options_init(GArrowCumulativeOptions *object) +{ + auto priv = GARROW_CUMULATIVE_OPTIONS_GET_PRIVATE(object); + priv->start = nullptr; + auto function_options_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + function_options_priv->options = static_cast( + new arrow::compute::CumulativeOptions()); +} + +static void +garrow_cumulative_options_class_init(GArrowCumulativeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_cumulative_options_dispose; + gobject_class->set_property = garrow_cumulative_options_set_property; + gobject_class->get_property = garrow_cumulative_options_get_property; + + arrow::compute::CumulativeOptions options; + + GParamSpec *spec; + /** + * GArrowCumulativeOptions:start: + * + * Optional starting value for cumulative operation computation. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_object("start", + "Start", + "Optional starting value for cumulative operation computation", + GARROW_TYPE_SCALAR, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_CUMULATIVE_OPTIONS_START, spec); + + /** + * GArrowCumulativeOptions:skip-nulls: + * + * If true, nulls in the input are ignored and produce a corresponding null output. + * When false, the first null encountered is propagated through the remaining output. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "skip-nulls", + "Skip nulls", + "If true, nulls in the input are ignored and produce a corresponding null output. " + "When false, the first null encountered is propagated through the remaining output", + options.skip_nulls, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_CUMULATIVE_OPTIONS_SKIP_NULLS, + spec); +} + +/** + * garrow_cumulative_options_new: + * + * Returns: A newly created #GArrowCumulativeOptions. + * + * Since: 23.0.0 + */ +GArrowCumulativeOptions * +garrow_cumulative_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_CUMULATIVE_OPTIONS, NULL); + return GARROW_CUMULATIVE_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -6627,6 +6798,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_assume_timezone_options_new_raw(arrow_assume_timezone_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "CumulativeOptions") { + const auto arrow_cumulative_options = + static_cast(arrow_options); + auto options = garrow_cumulative_options_new_raw(arrow_cumulative_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7167,3 +7343,30 @@ garrow_assume_timezone_options_get_raw(GArrowAssumeTimezoneOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowCumulativeOptions * +garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions *arrow_options) +{ + GArrowScalar *start = nullptr; + if (arrow_options->start.has_value()) { + std::shared_ptr arrow_start = arrow_options->start.value(); + start = garrow_scalar_new_raw(&arrow_start); + } + auto options = GARROW_CUMULATIVE_OPTIONS(g_object_new(GARROW_TYPE_CUMULATIVE_OPTIONS, + "start", + start, + "skip-nulls", + arrow_options->skip_nulls, + NULL)); + if (start) { + g_object_unref(start); + } + return options; +} + +arrow::compute::CumulativeOptions * +garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index eb3db5eeb3d..92b52257981 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1173,4 +1173,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowAssumeTimezoneOptions * garrow_assume_timezone_options_new(void); +#define GARROW_TYPE_CUMULATIVE_OPTIONS (garrow_cumulative_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowCumulativeOptions, + garrow_cumulative_options, + GARROW, + CUMULATIVE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowCumulativeOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowCumulativeOptions * +garrow_cumulative_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 55df617dafa..ab5235b9ccd 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -181,3 +181,8 @@ garrow_assume_timezone_options_new_raw( const arrow::compute::AssumeTimezoneOptions *arrow_options); arrow::compute::AssumeTimezoneOptions * garrow_assume_timezone_options_get_raw(GArrowAssumeTimezoneOptions *options); + +GArrowCumulativeOptions * +garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions *arrow_options); +arrow::compute::CumulativeOptions * +garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options); diff --git a/c_glib/test/test-cumulative-options.rb b/c_glib/test/test-cumulative-options.rb new file mode 100644 index 00000000000..9b25efd55f4 --- /dev/null +++ b/c_glib/test/test-cumulative-options.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestCumulativeOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::CumulativeOptions.new + end + + def test_start_property + assert_nil(@options.start) + start_scalar = Arrow::Int64Scalar.new(10) + @options.start = start_scalar + assert_equal(start_scalar, @options.start) + @options.start = nil + assert_nil(@options.start) + end + + def test_skip_nulls_property + assert do + !@options.skip_nulls? + end + @options.skip_nulls = true + assert do + @options.skip_nulls? + end + end + + def test_cumulative_sum_with_skip_nulls + args = [ + Arrow::ArrayDatum.new(build_int64_array([1, 2, 3, nil, 4, 5])), + ] + @options.skip_nulls = true + cumulative_sum_function = Arrow::Function.find("cumulative_sum") + assert_equal(build_int64_array([1, 3, 6, nil, 10, 15]), + cumulative_sum_function.execute(args, @options).value) + end + + def test_cumulative_sum_with_start + args = [ + Arrow::ArrayDatum.new(build_int64_array([1, 2, 3])), + ] + @options.start = Arrow::Int64Scalar.new(10) + cumulative_sum_function = Arrow::Function.find("cumulative_sum") + assert_equal(build_int64_array([11, 13, 16]), + cumulative_sum_function.execute(args, @options).value) + end +end + From f4c15dae4fdb12798d05dab44abc3e34188baf2d Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 00:13:30 +0100 Subject: [PATCH 04/33] Add GArrowDayOfWeekOptions --- c_glib/arrow-glib/compute.cpp | 146 ++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-day-of-week-options.rb | 63 ++++++++++ 4 files changed, 230 insertions(+) create mode 100644 c_glib/test/test-day-of-week-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 25eb50bab2a..f47021be645 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -258,6 +258,8 @@ G_BEGIN_DECLS * such as `cumulative_sum`, `cumulative_prod`, `cumulative_max`, and * `cumulative_min`. * + * #GArrowDayOfWeekOptions is a class to customize the `day_of_week` function. + * * There are many functions to compute data on an array. */ @@ -6662,6 +6664,127 @@ garrow_cumulative_options_new(void) return GARROW_CUMULATIVE_OPTIONS(options); } +enum { + PROP_DAY_OF_WEEK_OPTIONS_COUNT_FROM_ZERO = 1, + PROP_DAY_OF_WEEK_OPTIONS_WEEK_START, +}; + +G_DEFINE_TYPE(GArrowDayOfWeekOptions, + garrow_day_of_week_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_day_of_week_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_day_of_week_options_get_raw(GARROW_DAY_OF_WEEK_OPTIONS(object)); + + switch (prop_id) { + case PROP_DAY_OF_WEEK_OPTIONS_COUNT_FROM_ZERO: + options->count_from_zero = g_value_get_boolean(value); + break; + case PROP_DAY_OF_WEEK_OPTIONS_WEEK_START: + options->week_start = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_day_of_week_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_day_of_week_options_get_raw(GARROW_DAY_OF_WEEK_OPTIONS(object)); + + switch (prop_id) { + case PROP_DAY_OF_WEEK_OPTIONS_COUNT_FROM_ZERO: + g_value_set_boolean(value, options->count_from_zero); + break; + case PROP_DAY_OF_WEEK_OPTIONS_WEEK_START: + g_value_set_uint(value, options->week_start); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_day_of_week_options_init(GArrowDayOfWeekOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::DayOfWeekOptions()); +} + +static void +garrow_day_of_week_options_class_init(GArrowDayOfWeekOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_day_of_week_options_set_property; + gobject_class->get_property = garrow_day_of_week_options_get_property; + + arrow::compute::DayOfWeekOptions options; + + GParamSpec *spec; + /** + * GArrowDayOfWeekOptions:count-from-zero: + * + * Number days from 0 if true and from 1 if false. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("count-from-zero", + "Count from zero", + "Number days from 0 if true and from 1 if false", + options.count_from_zero, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_DAY_OF_WEEK_OPTIONS_COUNT_FROM_ZERO, + spec); + + /** + * GArrowDayOfWeekOptions:week-start: + * + * What day does the week start with (Monday=1, Sunday=7). + * The numbering is unaffected by the count_from_zero parameter. + * + * Since: 23.0.0 + */ + spec = g_param_spec_uint("week-start", + "Week start", + "What day does the week start with (Monday=1, Sunday=7). The " + "numbering is unaffected by the count_from_zero parameter", + 1, + 7, + options.week_start, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_DAY_OF_WEEK_OPTIONS_WEEK_START, + spec); +} + +/** + * garrow_day_of_week_options_new: + * + * Returns: A newly created #GArrowDayOfWeekOptions. + * + * Since: 23.0.0 + */ +GArrowDayOfWeekOptions * +garrow_day_of_week_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_DAY_OF_WEEK_OPTIONS, NULL); + return GARROW_DAY_OF_WEEK_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -6803,6 +6926,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_cumulative_options_new_raw(arrow_cumulative_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "DayOfWeekOptions") { + const auto arrow_day_of_week_options = + static_cast(arrow_options); + auto options = garrow_day_of_week_options_new_raw(arrow_day_of_week_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7370,3 +7498,21 @@ garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowDayOfWeekOptions * +garrow_day_of_week_options_new_raw(const arrow::compute::DayOfWeekOptions *arrow_options) +{ + return GARROW_DAY_OF_WEEK_OPTIONS(g_object_new(GARROW_TYPE_DAY_OF_WEEK_OPTIONS, + "count-from-zero", + arrow_options->count_from_zero, + "week-start", + arrow_options->week_start, + NULL)); +} + +arrow::compute::DayOfWeekOptions * +garrow_day_of_week_options_get_raw(GArrowDayOfWeekOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 92b52257981..4e56058975b 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1189,4 +1189,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowCumulativeOptions * garrow_cumulative_options_new(void); +#define GARROW_TYPE_DAY_OF_WEEK_OPTIONS (garrow_day_of_week_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDayOfWeekOptions, + garrow_day_of_week_options, + GARROW, + DAY_OF_WEEK_OPTIONS, + GArrowFunctionOptions) +struct _GArrowDayOfWeekOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowDayOfWeekOptions * +garrow_day_of_week_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index ab5235b9ccd..074df5a55ab 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -186,3 +186,8 @@ GArrowCumulativeOptions * garrow_cumulative_options_new_raw(const arrow::compute::CumulativeOptions *arrow_options); arrow::compute::CumulativeOptions * garrow_cumulative_options_get_raw(GArrowCumulativeOptions *options); + +GArrowDayOfWeekOptions * +garrow_day_of_week_options_new_raw(const arrow::compute::DayOfWeekOptions *arrow_options); +arrow::compute::DayOfWeekOptions * +garrow_day_of_week_options_get_raw(GArrowDayOfWeekOptions *options); diff --git a/c_glib/test/test-day-of-week-options.rb b/c_glib/test/test-day-of-week-options.rb new file mode 100644 index 00000000000..a3530fe0505 --- /dev/null +++ b/c_glib/test/test-day-of-week-options.rb @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDayOfWeekOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::DayOfWeekOptions.new + end + + def test_count_from_zero_property + assert do + @options.count_from_zero? + end + @options.count_from_zero = false + assert do + !@options.count_from_zero? + end + end + + def test_week_start_property + assert_equal(1, @options.week_start) + @options.week_start = 7 + assert_equal(7, @options.week_start) + end + + def test_day_of_week_function_with_count_from_zero_false + omit("Missing tzdata on Windows") if Gem.win_platform? + args = [ + Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), + ] + @options.count_from_zero = false + day_of_week_function = Arrow::Function.find("day_of_week") + assert_equal(build_int64_array([6]), + day_of_week_function.execute(args, @options).value) + end + + def test_day_of_week_function_with_week_start + omit("Missing tzdata on Windows") if Gem.win_platform? + args = [ + Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), + ] + @options.week_start = 2 + day_of_week_function = Arrow::Function.find("day_of_week") + assert_equal(build_int64_array([4]), + day_of_week_function.execute(args, @options).value) + end +end + From 8bda36bdb7f08748d173f27938097b3919b74298 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 00:33:38 +0100 Subject: [PATCH 05/33] Add GArrowDictionaryEncodeOptions --- c_glib/arrow-glib/compute.cpp | 130 ++++++++++++++++++ c_glib/arrow-glib/compute.h | 34 +++++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-dictionary-encode-options.rb | 46 +++++++ 4 files changed, 216 insertions(+) create mode 100644 c_glib/test/test-dictionary-encode-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index f47021be645..ac30a99e0f6 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -260,6 +260,9 @@ G_BEGIN_DECLS * * #GArrowDayOfWeekOptions is a class to customize the `day_of_week` function. * + * #GArrowDictionaryEncodeOptions is a class to customize the `dictionary_encode` + * function. + * * There are many functions to compute data on an array. */ @@ -6785,6 +6788,108 @@ garrow_day_of_week_options_new(void) return GARROW_DAY_OF_WEEK_OPTIONS(options); } +enum { + PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR = 1, +}; + +G_DEFINE_TYPE(GArrowDictionaryEncodeOptions, + garrow_dictionary_encode_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_dictionary_encode_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object)); + + switch (prop_id) { + case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR: + options->null_encoding_behavior = + static_cast( + g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dictionary_encode_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_dictionary_encode_options_get_raw(GARROW_DICTIONARY_ENCODE_OPTIONS(object)); + + switch (prop_id) { + case PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR: + g_value_set_enum(value, + static_cast( + options->null_encoding_behavior)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_dictionary_encode_options_init(GArrowDictionaryEncodeOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::DictionaryEncodeOptions()); +} + +static void +garrow_dictionary_encode_options_class_init(GArrowDictionaryEncodeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_dictionary_encode_options_set_property; + gobject_class->get_property = garrow_dictionary_encode_options_get_property; + + arrow::compute::DictionaryEncodeOptions options; + + GParamSpec *spec; + /** + * GArrowDictionaryEncodeOptions:null-encoding-behavior: + * + * How null values will be encoded. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("null-encoding-behavior", + "Null encoding behavior", + "How null values will be encoded", + GARROW_TYPE_DICTIONARY_ENCODE_NULL_ENCODING_BEHAVIOR, + static_cast( + options.null_encoding_behavior), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_DICTIONARY_ENCODE_OPTIONS_NULL_ENCODING_BEHAVIOR, + spec); +} + +/** + * garrow_dictionary_encode_options_new: + * + * Returns: A newly created #GArrowDictionaryEncodeOptions. + * + * Since: 23.0.0 + */ +GArrowDictionaryEncodeOptions * +garrow_dictionary_encode_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS, NULL); + return GARROW_DICTIONARY_ENCODE_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -6931,6 +7036,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_day_of_week_options_new_raw(arrow_day_of_week_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "DictionaryEncodeOptions") { + const auto arrow_dictionary_encode_options = + static_cast(arrow_options); + auto options = + garrow_dictionary_encode_options_new_raw(arrow_dictionary_encode_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7516,3 +7627,22 @@ garrow_day_of_week_options_get_raw(GArrowDayOfWeekOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowDictionaryEncodeOptions * +garrow_dictionary_encode_options_new_raw( + const arrow::compute::DictionaryEncodeOptions *arrow_options) +{ + return GARROW_DICTIONARY_ENCODE_OPTIONS( + g_object_new(GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS, + "null-encoding-behavior", + static_cast( + arrow_options->null_encoding_behavior), + NULL)); +} + +arrow::compute::DictionaryEncodeOptions * +garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 4e56058975b..b58fafd5fea 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1205,4 +1205,38 @@ GARROW_AVAILABLE_IN_23_0 GArrowDayOfWeekOptions * garrow_day_of_week_options_new(void); +/** + * GArrowDictionaryEncodeNullEncodingBehavior: + * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE: The null value will be added to the + * dictionary with a proper index. + * @GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK: The null value will be masked in the + * indices array. + * + * They correspond to the values of + * `arrow::compute::DictionaryEncodeOptions::NullEncodingBehavior`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_DICTIONARY_ENCODE_NULL_ENCODING_ENCODE, + GARROW_DICTIONARY_ENCODE_NULL_ENCODING_MASK, +} GArrowDictionaryEncodeNullEncodingBehavior; + +#define GARROW_TYPE_DICTIONARY_ENCODE_OPTIONS \ + (garrow_dictionary_encode_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowDictionaryEncodeOptions, + garrow_dictionary_encode_options, + GARROW, + DICTIONARY_ENCODE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowDictionaryEncodeOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowDictionaryEncodeOptions * +garrow_dictionary_encode_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 074df5a55ab..a2d38a4588e 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -191,3 +191,9 @@ GArrowDayOfWeekOptions * garrow_day_of_week_options_new_raw(const arrow::compute::DayOfWeekOptions *arrow_options); arrow::compute::DayOfWeekOptions * garrow_day_of_week_options_get_raw(GArrowDayOfWeekOptions *options); + +GArrowDictionaryEncodeOptions * +garrow_dictionary_encode_options_new_raw( + const arrow::compute::DictionaryEncodeOptions *arrow_options); +arrow::compute::DictionaryEncodeOptions * +garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options); diff --git a/c_glib/test/test-dictionary-encode-options.rb b/c_glib/test/test-dictionary-encode-options.rb new file mode 100644 index 00000000000..6ab6cdd2e41 --- /dev/null +++ b/c_glib/test/test-dictionary-encode-options.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestDictionaryEncodeOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::DictionaryEncodeOptions.new + end + + def test_null_encoding_behavior_property + assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::MASK, @options.null_encoding_behavior) + @options.null_encoding_behavior = :encode + assert_equal(Arrow::DictionaryEncodeNullEncodingBehavior::ENCODE, + @options.null_encoding_behavior) + end + + def test_dictionary_encode_function_with_encode + args = [ + Arrow::ArrayDatum.new(build_string_array(["a", "b", nil, "a", "b"])), + ] + @options.null_encoding_behavior = :encode + dictionary_encode_function = Arrow::Function.find("dictionary_encode") + result = dictionary_encode_function.execute(args, @options).value + assert_equal(Arrow::DictionaryDataType.new(Arrow::Int32DataType.new, + Arrow::StringDataType.new, + false), + result.value_data_type) + assert_equal(build_int32_array([0, 1, 2, 0, 1]), result.indices) + end +end + From 829a9e42d7884957651de33be45ee8716c1ae06d Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 00:38:59 +0100 Subject: [PATCH 06/33] Add GArrowElementWiseAggregateOptions --- c_glib/arrow-glib/compute.cpp | 126 ++++++++++++++++++ c_glib/arrow-glib/compute.h | 17 +++ c_glib/arrow-glib/compute.hpp | 6 + .../test-element-wise-aggregate-options.rb | 46 +++++++ 4 files changed, 195 insertions(+) create mode 100644 c_glib/test/test-element-wise-aggregate-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index ac30a99e0f6..30d9a3242bc 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -263,6 +263,9 @@ G_BEGIN_DECLS * #GArrowDictionaryEncodeOptions is a class to customize the `dictionary_encode` * function. * + * #GArrowElementWiseAggregateOptions is a class to customize element-wise + * aggregate functions such as `min_element_wise` and `max_element_wise`. + * * There are many functions to compute data on an array. */ @@ -6890,6 +6893,105 @@ garrow_dictionary_encode_options_new(void) return GARROW_DICTIONARY_ENCODE_OPTIONS(options); } +enum { + PROP_ELEMENT_WISE_AGGREGATE_OPTIONS_SKIP_NULLS = 1, +}; + +G_DEFINE_TYPE(GArrowElementWiseAggregateOptions, + garrow_element_wise_aggregate_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_element_wise_aggregate_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_element_wise_aggregate_options_get_raw( + GARROW_ELEMENT_WISE_AGGREGATE_OPTIONS(object)); + + switch (prop_id) { + case PROP_ELEMENT_WISE_AGGREGATE_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_element_wise_aggregate_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_element_wise_aggregate_options_get_raw( + GARROW_ELEMENT_WISE_AGGREGATE_OPTIONS(object)); + + switch (prop_id) { + case PROP_ELEMENT_WISE_AGGREGATE_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_element_wise_aggregate_options_init(GArrowElementWiseAggregateOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::ElementWiseAggregateOptions()); +} + +static void +garrow_element_wise_aggregate_options_class_init( + GArrowElementWiseAggregateOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_element_wise_aggregate_options_set_property; + gobject_class->get_property = garrow_element_wise_aggregate_options_get_property; + + arrow::compute::ElementWiseAggregateOptions options; + + GParamSpec *spec; + /** + * GArrowElementWiseAggregateOptions:skip-nulls: + * + * Whether to skip (ignore) nulls in the input. + * If false, any null in the input forces the output to null. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip nulls", + "Whether to skip (ignore) nulls in the input. If false, " + "any null in the input forces the output to null", + options.skip_nulls, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ELEMENT_WISE_AGGREGATE_OPTIONS_SKIP_NULLS, + spec); +} + +/** + * garrow_element_wise_aggregate_options_new: + * + * Returns: A newly created #GArrowElementWiseAggregateOptions. + * + * Since: 23.0.0 + */ +GArrowElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_ELEMENT_WISE_AGGREGATE_OPTIONS, NULL); + return GARROW_ELEMENT_WISE_AGGREGATE_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7042,6 +7144,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt auto options = garrow_dictionary_encode_options_new_raw(arrow_dictionary_encode_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ElementWiseAggregateOptions") { + const auto arrow_element_wise_aggregate_options = + static_cast(arrow_options); + auto options = + garrow_element_wise_aggregate_options_new_raw(arrow_element_wise_aggregate_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7646,3 +7754,21 @@ garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_new_raw( + const arrow::compute::ElementWiseAggregateOptions *arrow_options) +{ + return GARROW_ELEMENT_WISE_AGGREGATE_OPTIONS( + g_object_new(GARROW_TYPE_ELEMENT_WISE_AGGREGATE_OPTIONS, + "skip-nulls", + arrow_options->skip_nulls, + NULL)); +} + +arrow::compute::ElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_get_raw(GArrowElementWiseAggregateOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index b58fafd5fea..339c8642fc1 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1239,4 +1239,21 @@ GARROW_AVAILABLE_IN_23_0 GArrowDictionaryEncodeOptions * garrow_dictionary_encode_options_new(void); +#define GARROW_TYPE_ELEMENT_WISE_AGGREGATE_OPTIONS \ + (garrow_element_wise_aggregate_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowElementWiseAggregateOptions, + garrow_element_wise_aggregate_options, + GARROW, + ELEMENT_WISE_AGGREGATE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowElementWiseAggregateOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index a2d38a4588e..492c5a78422 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -197,3 +197,9 @@ garrow_dictionary_encode_options_new_raw( const arrow::compute::DictionaryEncodeOptions *arrow_options); arrow::compute::DictionaryEncodeOptions * garrow_dictionary_encode_options_get_raw(GArrowDictionaryEncodeOptions *options); + +GArrowElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_new_raw( + const arrow::compute::ElementWiseAggregateOptions *arrow_options); +arrow::compute::ElementWiseAggregateOptions * +garrow_element_wise_aggregate_options_get_raw(GArrowElementWiseAggregateOptions *options); diff --git a/c_glib/test/test-element-wise-aggregate-options.rb b/c_glib/test/test-element-wise-aggregate-options.rb new file mode 100644 index 00000000000..379dcd3dfb5 --- /dev/null +++ b/c_glib/test/test-element-wise-aggregate-options.rb @@ -0,0 +1,46 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestElementWiseAggregateOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ElementWiseAggregateOptions.new + end + + def test_skip_nulls_property + assert do + @options.skip_nulls? + end + @options.skip_nulls = false + assert do + !@options.skip_nulls? + end + end + + def test_min_element_wise_function_with_skip_nulls_false + args = [ + Arrow::ArrayDatum.new(build_int64_array([1, nil, 3])), + Arrow::ArrayDatum.new(build_int64_array([4, 1, 5])), + ] + @options.skip_nulls = false + min_element_wise_function = Arrow::Function.find("min_element_wise") + assert_equal(build_int64_array([1, nil, 3]), + min_element_wise_function.execute(args, @options).value) + end +end + From e5e62fedbbd611afb13a3aac91f4c7a048031dad Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 00:47:22 +0100 Subject: [PATCH 07/33] Add GArrowExtractRegexOptions --- c_glib/arrow-glib/compute.cpp | 121 ++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 6 ++ c_glib/test/test-extract-regex-options.rb | 53 ++++++++++ 4 files changed, 196 insertions(+) create mode 100644 c_glib/test/test-extract-regex-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 30d9a3242bc..35ee6d2e9dd 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -266,6 +266,9 @@ G_BEGIN_DECLS * #GArrowElementWiseAggregateOptions is a class to customize element-wise * aggregate functions such as `min_element_wise` and `max_element_wise`. * + * #GArrowExtractRegexOptions is a class to customize the `extract_regex` + * function. + * * There are many functions to compute data on an array. */ @@ -6992,6 +6995,102 @@ garrow_element_wise_aggregate_options_new(void) return GARROW_ELEMENT_WISE_AGGREGATE_OPTIONS(options); } +enum { + PROP_EXTRACT_REGEX_OPTIONS_PATTERN = 1, +}; + +G_DEFINE_TYPE(GArrowExtractRegexOptions, + garrow_extract_regex_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_extract_regex_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_extract_regex_options_get_raw(GARROW_EXTRACT_REGEX_OPTIONS(object)); + + switch (prop_id) { + case PROP_EXTRACT_REGEX_OPTIONS_PATTERN: + options->pattern = g_value_get_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extract_regex_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_extract_regex_options_get_raw(GARROW_EXTRACT_REGEX_OPTIONS(object)); + + switch (prop_id) { + case PROP_EXTRACT_REGEX_OPTIONS_PATTERN: + g_value_set_string(value, options->pattern.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extract_regex_options_init(GArrowExtractRegexOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::ExtractRegexOptions()); +} + +static void +garrow_extract_regex_options_class_init(GArrowExtractRegexOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_extract_regex_options_set_property; + gobject_class->get_property = garrow_extract_regex_options_get_property; + + arrow::compute::ExtractRegexOptions options; + + GParamSpec *spec; + /** + * GArrowExtractRegexOptions:pattern: + * + * Regular expression with named capture fields. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("pattern", + "Pattern", + "Regular expression with named capture fields", + options.pattern.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_EXTRACT_REGEX_OPTIONS_PATTERN, + spec); +} + +/** + * garrow_extract_regex_options_new: + * + * Returns: A newly created #GArrowExtractRegexOptions. + * + * Since: 23.0.0 + */ +GArrowExtractRegexOptions * +garrow_extract_regex_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_EXTRACT_REGEX_OPTIONS, NULL); + return GARROW_EXTRACT_REGEX_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7150,6 +7249,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt auto options = garrow_element_wise_aggregate_options_new_raw(arrow_element_wise_aggregate_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ExtractRegexOptions") { + const auto arrow_extract_regex_options = + static_cast(arrow_options); + auto options = garrow_extract_regex_options_new_raw(arrow_extract_regex_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7772,3 +7876,20 @@ garrow_element_wise_aggregate_options_get_raw(GArrowElementWiseAggregateOptions return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowExtractRegexOptions * +garrow_extract_regex_options_new_raw( + const arrow::compute::ExtractRegexOptions *arrow_options) +{ + return GARROW_EXTRACT_REGEX_OPTIONS(g_object_new(GARROW_TYPE_EXTRACT_REGEX_OPTIONS, + "pattern", + arrow_options->pattern.c_str(), + NULL)); +} + +arrow::compute::ExtractRegexOptions * +garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 339c8642fc1..da2afa15034 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1256,4 +1256,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowElementWiseAggregateOptions * garrow_element_wise_aggregate_options_new(void); +#define GARROW_TYPE_EXTRACT_REGEX_OPTIONS (garrow_extract_regex_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowExtractRegexOptions, + garrow_extract_regex_options, + GARROW, + EXTRACT_REGEX_OPTIONS, + GArrowFunctionOptions) +struct _GArrowExtractRegexOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowExtractRegexOptions * +garrow_extract_regex_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 492c5a78422..ff6cc0ebc34 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -203,3 +203,9 @@ garrow_element_wise_aggregate_options_new_raw( const arrow::compute::ElementWiseAggregateOptions *arrow_options); arrow::compute::ElementWiseAggregateOptions * garrow_element_wise_aggregate_options_get_raw(GArrowElementWiseAggregateOptions *options); + +GArrowExtractRegexOptions * +garrow_extract_regex_options_new_raw( + const arrow::compute::ExtractRegexOptions *arrow_options); +arrow::compute::ExtractRegexOptions * +garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options); diff --git a/c_glib/test/test-extract-regex-options.rb b/c_glib/test/test-extract-regex-options.rb new file mode 100644 index 00000000000..e2bfb0a6921 --- /dev/null +++ b/c_glib/test/test-extract-regex-options.rb @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestExtractRegexOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ExtractRegexOptions.new + end + + def test_pattern_property + assert_equal("", @options.pattern) + @options.pattern = "(?P\\d{4})-(?P\\d{2})" + assert_equal("(?P\\d{4})-(?P\\d{2})", @options.pattern) + end + + def test_extract_regex_function + omit("RE2 is not available") unless Arrow::Function.find("extract_regex") + args = [ + Arrow::ArrayDatum.new(build_string_array(["2023-01-15", "2024-12-31"])), + ] + @options.pattern = "(?P\\d{4})-(?P\\d{2})-(?P\\d{2})" + extract_regex_function = Arrow::Function.find("extract_regex") + result = extract_regex_function.execute(args, @options).value + fields = [ + Arrow::Field.new("year", Arrow::StringDataType.new), + Arrow::Field.new("month", Arrow::StringDataType.new), + Arrow::Field.new("day", Arrow::StringDataType.new), + ] + assert_equal(Arrow::StructDataType.new(fields), + result.value_data_type) + assert_equal(build_struct_array(fields, [ + {"year" => "2023", "month" => "01", "day" => "15"}, + {"year" => "2024", "month" => "12", "day" => "31"}, + ]), + result) + end +end + From 2b5753ff61450a6f272c92c8630641c7b93b7446 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 01:09:43 +0100 Subject: [PATCH 08/33] Add GArrowExtractRegexSpanOptions --- c_glib/arrow-glib/compute.cpp | 123 ++++++++++++++++++ c_glib/arrow-glib/compute.h | 17 +++ c_glib/arrow-glib/compute.hpp | 6 + .../test/test-extract-regex-span-options.rb | 65 +++++++++ 4 files changed, 211 insertions(+) create mode 100644 c_glib/test/test-extract-regex-span-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 35ee6d2e9dd..68c4024170a 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -269,6 +269,9 @@ G_BEGIN_DECLS * #GArrowExtractRegexOptions is a class to customize the `extract_regex` * function. * + * #GArrowExtractRegexSpanOptions is a class to customize the `extract_regex_span` + * function. + * * There are many functions to compute data on an array. */ @@ -7091,6 +7094,102 @@ garrow_extract_regex_options_new(void) return GARROW_EXTRACT_REGEX_OPTIONS(options); } +enum { + PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN = 1, +}; + +G_DEFINE_TYPE(GArrowExtractRegexSpanOptions, + garrow_extract_regex_span_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_extract_regex_span_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object)); + + switch (prop_id) { + case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN: + options->pattern = g_value_get_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extract_regex_span_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_extract_regex_span_options_get_raw(GARROW_EXTRACT_REGEX_SPAN_OPTIONS(object)); + + switch (prop_id) { + case PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN: + g_value_set_string(value, options->pattern.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_extract_regex_span_options_init(GArrowExtractRegexSpanOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::ExtractRegexSpanOptions()); +} + +static void +garrow_extract_regex_span_options_class_init(GArrowExtractRegexSpanOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_extract_regex_span_options_set_property; + gobject_class->get_property = garrow_extract_regex_span_options_get_property; + + arrow::compute::ExtractRegexSpanOptions options; + + GParamSpec *spec; + /** + * GArrowExtractRegexSpanOptions:pattern: + * + * Regular expression with named capture fields. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("pattern", + "Pattern", + "Regular expression with named capture fields", + options.pattern.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_EXTRACT_REGEX_SPAN_OPTIONS_PATTERN, + spec); +} + +/** + * garrow_extract_regex_span_options_new: + * + * Returns: A newly created #GArrowExtractRegexSpanOptions. + * + * Since: 23.0.0 + */ +GArrowExtractRegexSpanOptions * +garrow_extract_regex_span_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS, NULL); + return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7254,6 +7353,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_extract_regex_options_new_raw(arrow_extract_regex_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ExtractRegexSpanOptions") { + const auto arrow_extract_regex_span_options = + static_cast(arrow_options); + auto options = + garrow_extract_regex_span_options_new_raw(arrow_extract_regex_span_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -7893,3 +7998,21 @@ garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowExtractRegexSpanOptions * +garrow_extract_regex_span_options_new_raw( + const arrow::compute::ExtractRegexSpanOptions *arrow_options) +{ + return GARROW_EXTRACT_REGEX_SPAN_OPTIONS( + g_object_new(GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS, + "pattern", + arrow_options->pattern.c_str(), + NULL)); +} + +arrow::compute::ExtractRegexSpanOptions * +garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index da2afa15034..aa089080005 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1272,4 +1272,21 @@ GARROW_AVAILABLE_IN_23_0 GArrowExtractRegexOptions * garrow_extract_regex_options_new(void); +#define GARROW_TYPE_EXTRACT_REGEX_SPAN_OPTIONS \ + (garrow_extract_regex_span_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowExtractRegexSpanOptions, + garrow_extract_regex_span_options, + GARROW, + EXTRACT_REGEX_SPAN_OPTIONS, + GArrowFunctionOptions) +struct _GArrowExtractRegexSpanOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowExtractRegexSpanOptions * +garrow_extract_regex_span_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index ff6cc0ebc34..650a04052c5 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -209,3 +209,9 @@ garrow_extract_regex_options_new_raw( const arrow::compute::ExtractRegexOptions *arrow_options); arrow::compute::ExtractRegexOptions * garrow_extract_regex_options_get_raw(GArrowExtractRegexOptions *options); + +GArrowExtractRegexSpanOptions * +garrow_extract_regex_span_options_new_raw( + const arrow::compute::ExtractRegexSpanOptions *arrow_options); +arrow::compute::ExtractRegexSpanOptions * +garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options); diff --git a/c_glib/test/test-extract-regex-span-options.rb b/c_glib/test/test-extract-regex-span-options.rb new file mode 100644 index 00000000000..b5228b90702 --- /dev/null +++ b/c_glib/test/test-extract-regex-span-options.rb @@ -0,0 +1,65 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestExtractRegexSpanOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ExtractRegexSpanOptions.new + end + + def test_pattern_property + assert_equal("", @options.pattern) + @options.pattern = "(?P\\d{4})-(?P\\d{2})" + assert_equal("(?P\\d{4})-(?P\\d{2})", @options.pattern) + end + + def test_extract_regex_span_function + omit("RE2 is not available") unless Arrow::Function.find("extract_regex_span") + args = [ + Arrow::ArrayDatum.new(build_string_array(["2023-01-15", "2024-12-31"])), + ] + @options.pattern = "(?P\\d{4})-(?P\\d{2})-(?P\\d{2})" + extract_regex_span_function = Arrow::Function.find("extract_regex_span") + result = extract_regex_span_function.execute(args, @options).value + fields = [ + Arrow::Field.new("year", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)), + Arrow::Field.new("month", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)), + Arrow::Field.new("day", Arrow::FixedSizeListDataType.new(Arrow::Int32DataType.new, 2)), + ] + assert_equal(Arrow::StructDataType.new(fields), + result.value_data_type) + # The result contains [index, length] pairs for each capture group + # year: [0, 4] (starts at index 0, length 4) + # month: [5, 2] (starts at index 5, length 2) + # day: [8, 2] (starts at index 8, length 2) + assert_equal(build_struct_array(fields, [ + { + "year" => [0, 4], + "month" => [5, 2], + "day" => [8, 2], + }, + { + "year" => [0, 4], + "month" => [5, 2], + "day" => [8, 2], + }, + ]), + result) + end +end + From b138539ebcdc485abbf0fe70ebe1862db155e4d4 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 01:44:03 +0100 Subject: [PATCH 09/33] Add GArrowJoinOptions --- c_glib/arrow-glib/compute.cpp | 146 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 32 +++++++ c_glib/arrow-glib/compute.hpp | 5 ++ c_glib/test/test-join-options.rb | 64 ++++++++++++++ 4 files changed, 247 insertions(+) create mode 100644 c_glib/test/test-join-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 68c4024170a..3fc61dbdddf 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -272,6 +272,9 @@ G_BEGIN_DECLS * #GArrowExtractRegexSpanOptions is a class to customize the `extract_regex_span` * function. * + * #GArrowJoinOptions is a class to customize the `binary_join_element_wise` + * function. + * * There are many functions to compute data on an array. */ @@ -7190,6 +7193,125 @@ garrow_extract_regex_span_options_new(void) return GARROW_EXTRACT_REGEX_SPAN_OPTIONS(options); } +enum { + PROP_JOIN_OPTIONS_NULL_HANDLING = 1, + PROP_JOIN_OPTIONS_NULL_REPLACEMENT, +}; + +G_DEFINE_TYPE(GArrowJoinOptions, garrow_join_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_join_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_join_options_get_raw(GARROW_JOIN_OPTIONS(object)); + + switch (prop_id) { + case PROP_JOIN_OPTIONS_NULL_HANDLING: + options->null_handling = + static_cast( + g_value_get_enum(value)); + break; + case PROP_JOIN_OPTIONS_NULL_REPLACEMENT: + options->null_replacement = g_value_get_string(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_join_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_join_options_get_raw(GARROW_JOIN_OPTIONS(object)); + + switch (prop_id) { + case PROP_JOIN_OPTIONS_NULL_HANDLING: + g_value_set_enum(value, + static_cast(options->null_handling)); + break; + case PROP_JOIN_OPTIONS_NULL_REPLACEMENT: + g_value_set_string(value, options->null_replacement.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_join_options_init(GArrowJoinOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::JoinOptions()); +} + +static void +garrow_join_options_class_init(GArrowJoinOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_join_options_set_property; + gobject_class->get_property = garrow_join_options_get_property; + + arrow::compute::JoinOptions options; + + GParamSpec *spec; + /** + * GArrowJoinOptions:null-handling: + * + * How to handle null values. (A null separator always results in a null output.) + * + * Since: 23.0.0 + */ + spec = + g_param_spec_enum("null-handling", + "Null handling", + "How to handle null values", + GARROW_TYPE_JOIN_NULL_HANDLING_BEHAVIOR, + static_cast(options.null_handling), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_JOIN_OPTIONS_NULL_HANDLING, spec); + + /** + * GArrowJoinOptions:null-replacement: + * + * Replacement string for null values when null-handling is REPLACE. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string( + "null-replacement", + "Null replacement", + "Replacement string for null values when null-handling is REPLACE", + options.null_replacement.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_JOIN_OPTIONS_NULL_REPLACEMENT, + spec); +} + +/** + * garrow_join_options_new: + * + * Returns: A newly created #GArrowJoinOptions. + * + * Since: 23.0.0 + */ +GArrowJoinOptions * +garrow_join_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_JOIN_OPTIONS, NULL); + return GARROW_JOIN_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7359,6 +7481,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt auto options = garrow_extract_regex_span_options_new_raw(arrow_extract_regex_span_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "JoinOptions") { + const auto arrow_join_options = + static_cast(arrow_options); + auto options = garrow_join_options_new_raw(arrow_join_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -8016,3 +8143,22 @@ garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowJoinOptions * +garrow_join_options_new_raw(const arrow::compute::JoinOptions *arrow_options) +{ + return GARROW_JOIN_OPTIONS(g_object_new( + GARROW_TYPE_JOIN_OPTIONS, + "null-handling", + static_cast(arrow_options->null_handling), + "null-replacement", + arrow_options->null_replacement.c_str(), + NULL)); +} + +arrow::compute::JoinOptions * +garrow_join_options_get_raw(GArrowJoinOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index aa089080005..c338e9f57eb 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1289,4 +1289,36 @@ GARROW_AVAILABLE_IN_23_0 GArrowExtractRegexSpanOptions * garrow_extract_regex_span_options_new(void); +/** + * GArrowJoinNullHandlingBehavior: + * @GARROW_JOIN_NULL_HANDLING_EMIT_NULL: A null in any input results in a null in the + * output. + * @GARROW_JOIN_NULL_HANDLING_SKIP: Nulls in inputs are skipped. + * @GARROW_JOIN_NULL_HANDLING_REPLACE: Nulls in inputs are replaced with the replacement + * string. + * + * They correspond to the values of + * `arrow::compute::JoinOptions::NullHandlingBehavior`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_JOIN_NULL_HANDLING_EMIT_NULL, + GARROW_JOIN_NULL_HANDLING_SKIP, + GARROW_JOIN_NULL_HANDLING_REPLACE, +} GArrowJoinNullHandlingBehavior; + +#define GARROW_TYPE_JOIN_OPTIONS (garrow_join_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowJoinOptions, garrow_join_options, GARROW, JOIN_OPTIONS, GArrowFunctionOptions) +struct _GArrowJoinOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowJoinOptions * +garrow_join_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 650a04052c5..173750bd1fb 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -215,3 +215,8 @@ garrow_extract_regex_span_options_new_raw( const arrow::compute::ExtractRegexSpanOptions *arrow_options); arrow::compute::ExtractRegexSpanOptions * garrow_extract_regex_span_options_get_raw(GArrowExtractRegexSpanOptions *options); + +GArrowJoinOptions * +garrow_join_options_new_raw(const arrow::compute::JoinOptions *arrow_options); +arrow::compute::JoinOptions * +garrow_join_options_get_raw(GArrowJoinOptions *options); diff --git a/c_glib/test/test-join-options.rb b/c_glib/test/test-join-options.rb new file mode 100644 index 00000000000..3303a703ddf --- /dev/null +++ b/c_glib/test/test-join-options.rb @@ -0,0 +1,64 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestJoinOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::JoinOptions.new + end + + def test_null_handling_property + assert_equal(Arrow::JoinNullHandlingBehavior::EMIT_NULL, @options.null_handling) + @options.null_handling = Arrow::JoinNullHandlingBehavior::SKIP + assert_equal(Arrow::JoinNullHandlingBehavior::SKIP, @options.null_handling) + @options.null_handling = Arrow::JoinNullHandlingBehavior::REPLACE + assert_equal(Arrow::JoinNullHandlingBehavior::REPLACE, @options.null_handling) + end + + def test_null_replacement_property + assert_equal("", @options.null_replacement) + @options.null_replacement = "NULL" + assert_equal("NULL", @options.null_replacement) + end + + def test_binary_join_element_wise_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["a", "b", nil])), + Arrow::ArrayDatum.new(build_string_array(["x", "y", "z"])), + Arrow::ScalarDatum.new(Arrow::StringScalar.new(Arrow::Buffer.new("-"))), + ] + binary_join_element_wise_function = Arrow::Function.find("binary_join_element_wise") + + @options.null_handling = Arrow::JoinNullHandlingBehavior::EMIT_NULL + result = binary_join_element_wise_function.execute(args, @options).value + assert_equal(build_string_array(["a-x", "b-y", nil]), + result) + + @options.null_handling = Arrow::JoinNullHandlingBehavior::SKIP + result = binary_join_element_wise_function.execute(args, @options).value + assert_equal(build_string_array(["a-x", "b-y", "z"]), + result) + + @options.null_handling = Arrow::JoinNullHandlingBehavior::REPLACE + @options.null_replacement = "NULL" + result = binary_join_element_wise_function.execute(args, @options).value + assert_equal(build_string_array(["a-x", "b-y", "NULL-z"]), + result) + end +end + From 948c2554048f7b49f719171f120f3ac2465272b3 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 02:15:19 +0100 Subject: [PATCH 10/33] Add GArrowListFlattenOptions --- c_glib/arrow-glib/compute.cpp | 120 +++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 6 ++ c_glib/test/test-list-flatten-options.rb | 58 +++++++++++ 4 files changed, 200 insertions(+) create mode 100644 c_glib/test/test-list-flatten-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 3fc61dbdddf..84a1b04cccf 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -275,6 +275,9 @@ G_BEGIN_DECLS * #GArrowJoinOptions is a class to customize the `binary_join_element_wise` * function. * + * #GArrowListFlattenOptions is a class to customize the `list_flatten` + * function. + * * There are many functions to compute data on an array. */ @@ -7312,6 +7315,101 @@ garrow_join_options_new(void) return GARROW_JOIN_OPTIONS(options); } +enum { + PROP_LIST_FLATTEN_OPTIONS_RECURSIVE = 1, +}; + +G_DEFINE_TYPE(GArrowListFlattenOptions, + garrow_list_flatten_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_list_flatten_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_list_flatten_options_get_raw(GARROW_LIST_FLATTEN_OPTIONS(object)); + + switch (prop_id) { + case PROP_LIST_FLATTEN_OPTIONS_RECURSIVE: + options->recursive = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_flatten_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_list_flatten_options_get_raw(GARROW_LIST_FLATTEN_OPTIONS(object)); + + switch (prop_id) { + case PROP_LIST_FLATTEN_OPTIONS_RECURSIVE: + g_value_set_boolean(value, options->recursive); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_flatten_options_init(GArrowListFlattenOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::ListFlattenOptions()); +} + +static void +garrow_list_flatten_options_class_init(GArrowListFlattenOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_list_flatten_options_set_property; + gobject_class->get_property = garrow_list_flatten_options_get_property; + + arrow::compute::ListFlattenOptions options; + + GParamSpec *spec; + /** + * GArrowListFlattenOptions:recursive: + * + * If true, the list is flattened recursively until a non-list array is formed. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "recursive", + "Recursive", + "If true, the list is flattened recursively until a non-list array is formed", + options.recursive, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_LIST_FLATTEN_OPTIONS_RECURSIVE, + spec); +} + +/** + * garrow_list_flatten_options_new: + * + * Returns: A newly created #GArrowListFlattenOptions. + * + * Since: 23.0.0 + */ +GArrowListFlattenOptions * +garrow_list_flatten_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_LIST_FLATTEN_OPTIONS, NULL); + return GARROW_LIST_FLATTEN_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7486,6 +7584,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_join_options_new_raw(arrow_join_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ListFlattenOptions") { + const auto arrow_list_flatten_options = + static_cast(arrow_options); + auto options = garrow_list_flatten_options_new_raw(arrow_list_flatten_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -8162,3 +8265,20 @@ garrow_join_options_get_raw(GArrowJoinOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowListFlattenOptions * +garrow_list_flatten_options_new_raw( + const arrow::compute::ListFlattenOptions *arrow_options) +{ + return GARROW_LIST_FLATTEN_OPTIONS(g_object_new(GARROW_TYPE_LIST_FLATTEN_OPTIONS, + "recursive", + arrow_options->recursive, + NULL)); +} + +arrow::compute::ListFlattenOptions * +garrow_list_flatten_options_get_raw(GArrowListFlattenOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index c338e9f57eb..22d8204ab15 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1321,4 +1321,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowJoinOptions * garrow_join_options_new(void); +#define GARROW_TYPE_LIST_FLATTEN_OPTIONS (garrow_list_flatten_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowListFlattenOptions, + garrow_list_flatten_options, + GARROW, + LIST_FLATTEN_OPTIONS, + GArrowFunctionOptions) +struct _GArrowListFlattenOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowListFlattenOptions * +garrow_list_flatten_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 173750bd1fb..1f43cf4ebb7 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -220,3 +220,9 @@ GArrowJoinOptions * garrow_join_options_new_raw(const arrow::compute::JoinOptions *arrow_options); arrow::compute::JoinOptions * garrow_join_options_get_raw(GArrowJoinOptions *options); + +GArrowListFlattenOptions * +garrow_list_flatten_options_new_raw( + const arrow::compute::ListFlattenOptions *arrow_options); +arrow::compute::ListFlattenOptions * +garrow_list_flatten_options_get_raw(GArrowListFlattenOptions *options); diff --git a/c_glib/test/test-list-flatten-options.rb b/c_glib/test/test-list-flatten-options.rb new file mode 100644 index 00000000000..b0bea17734b --- /dev/null +++ b/c_glib/test/test-list-flatten-options.rb @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestListFlattenOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ListFlattenOptions.new + end + + def test_recursive_property + assert do + !@options.recursive? + end + @options.recursive = true + assert do + @options.recursive? + end + end + + def test_list_flatten_function_recursive + list_data_type = Arrow::ListDataType.new(Arrow::Field.new("value", Arrow::Int8DataType.new)) + nested_list = build_list_array(list_data_type, [[[1, 2], [3]], [[4, 5]]]) + + args = [ + Arrow::ArrayDatum.new(nested_list), + ] + list_flatten_function = Arrow::Function.find("list_flatten") + + @options.recursive = false + result = list_flatten_function.execute(args, @options).value + assert_equal(list_data_type, + result.value_data_type) + assert_equal(build_list_array(Arrow::Int8DataType.new, [[1, 2], [3], [4, 5]]), + result) + + @options.recursive = true + result = list_flatten_function.execute(args, @options).value + assert_equal(Arrow::Int8DataType.new, + result.value_data_type) + assert_equal(build_int8_array([1, 2, 3, 4, 5]), + result) + end +end From 1585f66042dcbe5d67d059e3cbd6191a370329ac Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 09:42:17 +0100 Subject: [PATCH 11/33] Add GArrowListSliceOptions --- c_glib/arrow-glib/compute.cpp | 247 +++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 37 ++++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-list-slice-options.rb | 124 +++++++++++++ 4 files changed, 413 insertions(+) create mode 100644 c_glib/test/test-list-slice-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 84a1b04cccf..8b2611a4e88 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -278,6 +278,9 @@ G_BEGIN_DECLS * #GArrowListFlattenOptions is a class to customize the `list_flatten` * function. * + * #GArrowListSliceOptions is a class to customize the `list_slice` + * function. + * * There are many functions to compute data on an array. */ @@ -7410,6 +7413,215 @@ garrow_list_flatten_options_new(void) return GARROW_LIST_FLATTEN_OPTIONS(options); } +enum { + PROP_LIST_SLICE_OPTIONS_START = 1, + PROP_LIST_SLICE_OPTIONS_STOP, + PROP_LIST_SLICE_OPTIONS_STEP, + PROP_LIST_SLICE_OPTIONS_RETURN_FIXED_SIZE_LIST, +}; + +G_DEFINE_TYPE(GArrowListSliceOptions, + garrow_list_slice_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_list_slice_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_list_slice_options_get_raw(GARROW_LIST_SLICE_OPTIONS(object)); + + switch (prop_id) { + case PROP_LIST_SLICE_OPTIONS_START: + options->start = g_value_get_int64(value); + break; + case PROP_LIST_SLICE_OPTIONS_STOP: + { + gint64 stop_value = g_value_get_int64(value); + if (stop_value == -1) { + options->stop = std::nullopt; + } else { + options->stop = stop_value; + } + } + break; + case PROP_LIST_SLICE_OPTIONS_STEP: + options->step = g_value_get_int64(value); + break; + case PROP_LIST_SLICE_OPTIONS_RETURN_FIXED_SIZE_LIST: + { + auto return_fixed_size_list_value = + static_cast(g_value_get_enum(value)); + switch (return_fixed_size_list_value) { + case GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO: + options->return_fixed_size_list = std::nullopt; + break; + case GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_FALSE: + options->return_fixed_size_list = false; + break; + case GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE: + options->return_fixed_size_list = true; + break; + default: + options->return_fixed_size_list = std::nullopt; + break; + } + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_slice_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_list_slice_options_get_raw(GARROW_LIST_SLICE_OPTIONS(object)); + + switch (prop_id) { + case PROP_LIST_SLICE_OPTIONS_START: + g_value_set_int64(value, options->start); + break; + case PROP_LIST_SLICE_OPTIONS_STOP: + if (options->stop.has_value()) { + g_value_set_int64(value, options->stop.value()); + } else { + g_value_set_int64(value, -1); // Sentinel value for "not set" + } + break; + case PROP_LIST_SLICE_OPTIONS_STEP: + g_value_set_int64(value, options->step); + break; + case PROP_LIST_SLICE_OPTIONS_RETURN_FIXED_SIZE_LIST: + if (options->return_fixed_size_list.has_value()) { + if (options->return_fixed_size_list.value()) { + g_value_set_enum(value, GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE); + } else { + g_value_set_enum(value, GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_FALSE); + } + } else { + // When not set (nullopt), return AUTO (default) + g_value_set_enum(value, GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_list_slice_options_init(GArrowListSliceOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::ListSliceOptions()); +} + +static void +garrow_list_slice_options_class_init(GArrowListSliceOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_list_slice_options_set_property; + gobject_class->get_property = garrow_list_slice_options_get_property; + + arrow::compute::ListSliceOptions options; + + GParamSpec *spec; + /** + * GArrowListSliceOptions:start: + * + * The start of list slicing. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("start", + "Start", + "The start of list slicing", + G_MININT64, + G_MAXINT64, + options.start, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_LIST_SLICE_OPTIONS_START, spec); + + /** + * GArrowListSliceOptions:stop: + * + * Optional stop of list slicing. If not set (value is -1), then slice to end. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64( + "stop", + "Stop", + "Optional stop of list slicing. If not set (value is -1), then slice to end", + -1, // Use -1 as sentinel for "not set" + G_MAXINT64, + -1, // Default to -1 (not set) + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_LIST_SLICE_OPTIONS_STOP, spec); + + /** + * GArrowListSliceOptions:step: + * + * Slicing step. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("step", + "Step", + "Slicing step", + G_MININT64, + G_MAXINT64, + options.step, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_LIST_SLICE_OPTIONS_STEP, spec); + + /** + * GArrowListSliceOptions:return-fixed-size-list: + * + * Whether to return a FixedSizeListArray. If + * #GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE and stop is after a list element's + * length, nulls will be appended to create the requested slice size. If + * #GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO (default), will return whatever type + * it got in. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum( + "return-fixed-size-list", + "Return fixed size list", + "Whether to return a FixedSizeListArray. If TRUE and stop is after a list element's " + "length, nulls will be appended to create the requested slice size. If AUTO " + "(default), will return whatever type it got in", + GARROW_TYPE_LIST_SLICE_RETURN_FIXED_SIZE_LIST, + GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_LIST_SLICE_OPTIONS_RETURN_FIXED_SIZE_LIST, + spec); +} + +/** + * garrow_list_slice_options_new: + * + * Returns: A newly created #GArrowListSliceOptions. + * + * Since: 23.0.0 + */ +GArrowListSliceOptions * +garrow_list_slice_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_LIST_SLICE_OPTIONS, NULL); + return GARROW_LIST_SLICE_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -8282,3 +8494,38 @@ garrow_list_flatten_options_get_raw(GArrowListFlattenOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowListSliceOptions * +garrow_list_slice_options_new_raw(const arrow::compute::ListSliceOptions *arrow_options) +{ + gint64 stop_value = -1; + if (arrow_options->stop.has_value()) { + stop_value = arrow_options->stop.value(); + } + GArrowListSliceReturnFixedSizeList return_fixed_size_list_value = + GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO; + if (arrow_options->return_fixed_size_list.has_value()) { + if (arrow_options->return_fixed_size_list.value()) { + return_fixed_size_list_value = GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE; + } else { + return_fixed_size_list_value = GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_FALSE; + } + } + return GARROW_LIST_SLICE_OPTIONS(g_object_new(GARROW_TYPE_LIST_SLICE_OPTIONS, + "start", + arrow_options->start, + "stop", + stop_value, + "step", + arrow_options->step, + "return-fixed-size-list", + return_fixed_size_list_value, + NULL)); +} + +arrow::compute::ListSliceOptions * +garrow_list_slice_options_get_raw(GArrowListSliceOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 22d8204ab15..bb57cc799a8 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1337,4 +1337,41 @@ GARROW_AVAILABLE_IN_23_0 GArrowListFlattenOptions * garrow_list_flatten_options_new(void); +/** + * GArrowListSliceReturnFixedSizeList: + * @GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO: Return the same type which was passed + * in (default). + * @GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_FALSE: Explicitly return the same type which + * was passed in. + * @GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE: Return a FixedSizeListArray. If stop is + * after a list element's length, nulls will be appended to create the requested slice + * size. + * + * They correspond to the values of + * `std::optional` for `arrow::compute::ListSliceOptions::return_fixed_size_list`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_AUTO, + GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_FALSE, + GARROW_LIST_SLICE_RETURN_FIXED_SIZE_LIST_TRUE, +} GArrowListSliceReturnFixedSizeList; + +#define GARROW_TYPE_LIST_SLICE_OPTIONS (garrow_list_slice_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowListSliceOptions, + garrow_list_slice_options, + GARROW, + LIST_SLICE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowListSliceOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowListSliceOptions * +garrow_list_slice_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 1f43cf4ebb7..0357dabd1f6 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -226,3 +226,8 @@ garrow_list_flatten_options_new_raw( const arrow::compute::ListFlattenOptions *arrow_options); arrow::compute::ListFlattenOptions * garrow_list_flatten_options_get_raw(GArrowListFlattenOptions *options); + +GArrowListSliceOptions * +garrow_list_slice_options_new_raw(const arrow::compute::ListSliceOptions *arrow_options); +arrow::compute::ListSliceOptions * +garrow_list_slice_options_get_raw(GArrowListSliceOptions *options); diff --git a/c_glib/test/test-list-slice-options.rb b/c_glib/test/test-list-slice-options.rb new file mode 100644 index 00000000000..a25f0f0c71c --- /dev/null +++ b/c_glib/test/test-list-slice-options.rb @@ -0,0 +1,124 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestListSliceOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ListSliceOptions.new + end + + def test_start_property + assert_equal(0, @options.start) + @options.start = 2 + assert_equal(2, @options.start) + end + + def test_stop_property + # Default is -1 (not set) + assert_equal(-1, @options.stop) + @options.stop = 5 + assert_equal(5, @options.stop) + # Setting to -1 means "not set" + @options.stop = -1 + assert_equal(-1, @options.stop) + end + + def test_step_property + assert_equal(1, @options.step) + @options.step = 2 + assert_equal(2, @options.step) + end + + def test_return_fixed_size_list_property + assert_equal(Arrow::ListSliceReturnFixedSizeList::AUTO, + @options.return_fixed_size_list) + @options.return_fixed_size_list = :true + assert_equal(Arrow::ListSliceReturnFixedSizeList::TRUE, + @options.return_fixed_size_list) + @options.return_fixed_size_list = :false + assert_equal(Arrow::ListSliceReturnFixedSizeList::FALSE, + @options.return_fixed_size_list) + @options.return_fixed_size_list = :auto + assert_equal(Arrow::ListSliceReturnFixedSizeList::AUTO, + @options.return_fixed_size_list) + end + + def test_list_slice_function_with_step + args = [ + Arrow::ArrayDatum.new(build_list_array(Arrow::Int8DataType.new, + [[1, 2, 3, 4, 5], [6, 7, 8, 9]])), + ] + @options.step = 2 + list_slice_function = Arrow::Function.find("list_slice") + result = list_slice_function.execute(args, @options).value + assert_equal(build_list_array(Arrow::Int8DataType.new, [[1, 3, 5], [6, 8]]), + result) + end + + def test_list_slice_function_without_stop + args = [ + Arrow::ArrayDatum.new(build_list_array(Arrow::Int8DataType.new, + [[1, 2, 3], [4, 5]])), + ] + @options.start = 1 + @options.stop = -1 + list_slice_function = Arrow::Function.find("list_slice") + result = list_slice_function.execute(args, @options).value + assert_equal(build_list_array(Arrow::Int8DataType.new, [[2, 3], [5]]), + result) + end + + def test_list_slice_function_with_return_fixed_size_list_auto + args = [ + Arrow::ArrayDatum.new(build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]])), + ] + @options.start = 1 + list_slice_function = Arrow::Function.find("list_slice") + result = list_slice_function.execute(args, @options).value + assert_equal(build_fixed_size_list_array(Arrow::Int8DataType.new, 1, [[2], [4]]), + result) + end + + def test_list_slice_function_with_return_fixed_size_list_true + args = [ + Arrow::ArrayDatum.new(build_list_array(Arrow::Int8DataType.new, + [[1, 2, 3], [4, 5]])), + ] + @options.start = 1 + @options.stop = 3 + @options.return_fixed_size_list = :true + list_slice_function = Arrow::Function.find("list_slice") + result = list_slice_function.execute(args, @options).value + assert_equal(build_fixed_size_list_array(Arrow::Int8DataType.new, 2, [[2, 3], [5, nil]]), + result) + end + + def test_list_slice_function_with_return_fixed_size_list_false + args = [ + Arrow::ArrayDatum.new(build_fixed_size_list_array(Arrow::Int8DataType.new, 2, + [[1, 2], [3, 4]])), + ] + @options.start = 1 + @options.return_fixed_size_list = :false + list_slice_function = Arrow::Function.find("list_slice") + result = list_slice_function.execute(args, @options).value + assert_equal(build_list_array(Arrow::Int8DataType.new, [[2], [4]]), + result) + end +end From f3d799df8e87dcd40e6fdb716f407ac696133eed Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 15:30:24 +0100 Subject: [PATCH 12/33] Add GArrowMakeStructOptions --- c_glib/arrow-glib/compute.cpp | 140 ++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-make-struct-options.rb | 57 ++++++++++ 4 files changed, 219 insertions(+) create mode 100644 c_glib/test/test-make-struct-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 8b2611a4e88..bd912e3f09e 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -281,6 +281,9 @@ G_BEGIN_DECLS * #GArrowListSliceOptions is a class to customize the `list_slice` * function. * + * #GArrowMakeStructOptions is a class to customize the `make_struct` + * function. + * * There are many functions to compute data on an array. */ @@ -7622,6 +7625,120 @@ garrow_list_slice_options_new(void) return GARROW_LIST_SLICE_OPTIONS(options); } +enum { + PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES = 1, +}; + +G_DEFINE_TYPE(GArrowMakeStructOptions, + garrow_make_struct_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_make_struct_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object))); + + switch (prop_id) { + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES: + { + auto strv = static_cast(g_value_get_boxed(value)); + options->field_names.clear(); + if (strv) { + for (gchar **p = strv; *p; ++p) { + options->field_names.emplace_back(*p); + } + } + // Keep nullability and metadata vectors in sync with names. + options->field_nullability.assign(options->field_names.size(), true); + options->field_metadata.assign(options->field_names.size(), NULLPTR); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_make_struct_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(object))); + + switch (prop_id) { + case PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES: + { + const auto &names = options->field_names; + auto strv = static_cast(g_new0(gchar *, names.size() + 1)); + for (gsize i = 0; i < names.size(); ++i) { + strv[i] = g_strdup(names[i].c_str()); + } + g_value_take_boxed(value, strv); + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_make_struct_options_init(GArrowMakeStructOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::MakeStructOptions()); +} + +static void +garrow_make_struct_options_class_init(GArrowMakeStructOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_make_struct_options_set_property; + gobject_class->get_property = garrow_make_struct_options_get_property; + + arrow::compute::MakeStructOptions options; + + GParamSpec *spec; + /** + * GArrowMakeStructOptions:field-names: + * + * Names for wrapped columns. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boxed("field-names", + "Field names", + "Names for wrapped columns", + G_TYPE_STRV, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_MAKE_STRUCT_OPTIONS_FIELD_NAMES, + spec); +} + +/** + * garrow_make_struct_options_new: + * + * Returns: A newly created #GArrowMakeStructOptions. + * + * Since: 23.0.0 + */ +GArrowMakeStructOptions * +garrow_make_struct_options_new(void) +{ + auto options = g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, NULL); + return GARROW_MAKE_STRUCT_OPTIONS(options); +} + G_END_DECLS arrow::Result @@ -7801,6 +7918,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_list_flatten_options_new_raw(arrow_list_flatten_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "MakeStructOptions") { + const auto arrow_make_struct_options = + static_cast(arrow_options); + auto options = garrow_make_struct_options_new_raw(arrow_make_struct_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -8529,3 +8651,21 @@ garrow_list_slice_options_get_raw(GArrowListSliceOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowMakeStructOptions * +garrow_make_struct_options_new_raw(const arrow::compute::MakeStructOptions *arrow_options) +{ + auto options = + GARROW_MAKE_STRUCT_OPTIONS(g_object_new(GARROW_TYPE_MAKE_STRUCT_OPTIONS, NULL)); + auto arrow_new_options = static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); + *arrow_new_options = *arrow_options; + return options; +} + +arrow::compute::MakeStructOptions * +garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index bb57cc799a8..c9aefc3dcd0 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1374,4 +1374,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowListSliceOptions * garrow_list_slice_options_new(void); +#define GARROW_TYPE_MAKE_STRUCT_OPTIONS (garrow_make_struct_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowMakeStructOptions, + garrow_make_struct_options, + GARROW, + MAKE_STRUCT_OPTIONS, + GArrowFunctionOptions) +struct _GArrowMakeStructOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowMakeStructOptions * +garrow_make_struct_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 0357dabd1f6..e07561dd380 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -231,3 +231,9 @@ GArrowListSliceOptions * garrow_list_slice_options_new_raw(const arrow::compute::ListSliceOptions *arrow_options); arrow::compute::ListSliceOptions * garrow_list_slice_options_get_raw(GArrowListSliceOptions *options); + +GArrowMakeStructOptions * +garrow_make_struct_options_new_raw( + const arrow::compute::MakeStructOptions *arrow_options); +arrow::compute::MakeStructOptions * +garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options); diff --git a/c_glib/test/test-make-struct-options.rb b/c_glib/test/test-make-struct-options.rb new file mode 100644 index 00000000000..82df78895f2 --- /dev/null +++ b/c_glib/test/test-make-struct-options.rb @@ -0,0 +1,57 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMakeStructOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::MakeStructOptions.new + end + + def test_field_names_property + assert_equal([], @options.field_names) + @options.field_names = ["a", "b", "c"] + assert_equal(["a", "b", "c"], @options.field_names) + end + + def test_make_struct_function + a = build_int8_array([1, 2, 3]) + b = build_boolean_array([true, false, nil]) + args = [ + Arrow::ArrayDatum.new(a), + Arrow::ArrayDatum.new(b), + ] + @options.field_names = ["a", "b"] + make_struct_function = Arrow::Function.find("make_struct") + result = make_struct_function.execute(args, @options).value + + expected = build_struct_array( + [ + Arrow::Field.new("a", Arrow::Int8DataType.new), + Arrow::Field.new("b", Arrow::BooleanDataType.new), + ], + [ + {"a" => 1, "b" => true}, + {"a" => 2, "b" => false}, + {"a" => 3, "b" => nil}, + ] + ) + assert_equal(expected, result) + end +end + + From 9af1cf0031e3bb2a306ca769eb92114a637312cd Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 15:52:08 +0100 Subject: [PATCH 13/33] Add GArrowMapLookupOptions --- c_glib/arrow-glib/compute.cpp | 196 +++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 34 +++++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-map-lookup-options.rb | 77 ++++++++++ 4 files changed, 312 insertions(+) create mode 100644 c_glib/test/test-map-lookup-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index bd912e3f09e..eacb2c1297a 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -284,6 +284,9 @@ G_BEGIN_DECLS * #GArrowMakeStructOptions is a class to customize the `make_struct` * function. * + * #GArrowMapLookupOptions is a class to customize the `map_lookup` + * function. + * * There are many functions to compute data on an array. */ @@ -7739,6 +7742,169 @@ garrow_make_struct_options_new(void) return GARROW_MAKE_STRUCT_OPTIONS(options); } +typedef struct GArrowMapLookupOptionsPrivate_ +{ + GArrowScalar *query_key; +} GArrowMapLookupOptionsPrivate; + +enum { + PROP_MAP_LOOKUP_OPTIONS_QUERY_KEY = 1, + PROP_MAP_LOOKUP_OPTIONS_OCCURRENCE, +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowMapLookupOptions, + garrow_map_lookup_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_MAP_LOOKUP_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_map_lookup_options_get_instance_private(GARROW_MAP_LOOKUP_OPTIONS(object))) + +static void +garrow_map_lookup_options_dispose(GObject *object) +{ + auto priv = GARROW_MAP_LOOKUP_OPTIONS_GET_PRIVATE(object); + + if (priv->query_key) { + g_object_unref(priv->query_key); + priv->query_key = NULL; + } + + G_OBJECT_CLASS(garrow_map_lookup_options_parent_class)->dispose(object); +} + +static void +garrow_map_lookup_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_MAP_LOOKUP_OPTIONS_GET_PRIVATE(object); + auto options = garrow_map_lookup_options_get_raw(GARROW_MAP_LOOKUP_OPTIONS(object)); + + switch (prop_id) { + case PROP_MAP_LOOKUP_OPTIONS_QUERY_KEY: + { + auto query_key = g_value_get_object(value); + if (priv->query_key != query_key) { + if (priv->query_key) { + g_object_unref(priv->query_key); + } + priv->query_key = GARROW_SCALAR(query_key); + if (priv->query_key) { + g_object_ref(priv->query_key); + options->query_key = garrow_scalar_get_raw(priv->query_key); + } else { + options->query_key = nullptr; + } + } + } + break; + case PROP_MAP_LOOKUP_OPTIONS_OCCURRENCE: + options->occurrence = + static_cast(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_map_lookup_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto priv = GARROW_MAP_LOOKUP_OPTIONS_GET_PRIVATE(object); + auto options = garrow_map_lookup_options_get_raw(GARROW_MAP_LOOKUP_OPTIONS(object)); + + switch (prop_id) { + case PROP_MAP_LOOKUP_OPTIONS_QUERY_KEY: + g_value_set_object(value, priv->query_key); + break; + case PROP_MAP_LOOKUP_OPTIONS_OCCURRENCE: + g_value_set_enum(value, static_cast(options->occurrence)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_map_lookup_options_init(GArrowMapLookupOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::MapLookupOptions()); +} + +static void +garrow_map_lookup_options_class_init(GArrowMapLookupOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_map_lookup_options_dispose; + gobject_class->set_property = garrow_map_lookup_options_set_property; + gobject_class->get_property = garrow_map_lookup_options_get_property; + + arrow::compute::MapLookupOptions options; + + GParamSpec *spec; + /** + * GArrowMapLookupOptions:query-key: + * + * The key to lookup in the map. + * + * Since: 23.0.0 + */ + spec = g_param_spec_object("query-key", + "Query key", + "The key to lookup in the map", + GARROW_TYPE_SCALAR, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MAP_LOOKUP_OPTIONS_QUERY_KEY, spec); + + /** + * GArrowMapLookupOptions:occurrence: + * + * Whether to return the first, last, or all matching values. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("occurrence", + "Occurrence", + "Whether to return the first, last, or all matching values", + GARROW_TYPE_MAP_LOOKUP_OCCURRENCE, + static_cast(options.occurrence), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_MAP_LOOKUP_OPTIONS_OCCURRENCE, + spec); +} + +/** + * garrow_map_lookup_options_new: + * @query_key: (nullable): A #GArrowScalar to be looked up. + * @occurrence: A #GArrowMapLookupOccurrence. + * + * Returns: A newly created #GArrowMapLookupOptions. + * + * Since: 23.0.0 + */ +GArrowMapLookupOptions * +garrow_map_lookup_options_new(GArrowScalar *query_key, + GArrowMapLookupOccurrence occurrence) +{ + return GARROW_MAP_LOOKUP_OPTIONS(g_object_new(GARROW_TYPE_MAP_LOOKUP_OPTIONS, + "query-key", + query_key, + "occurrence", + occurrence, + NULL)); +} + G_END_DECLS arrow::Result @@ -7923,6 +8089,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_make_struct_options_new_raw(arrow_make_struct_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "MapLookupOptions") { + const auto arrow_map_lookup_options = + static_cast(arrow_options); + auto options = garrow_map_lookup_options_new_raw(arrow_map_lookup_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -8669,3 +8840,28 @@ garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowMapLookupOptions * +garrow_map_lookup_options_new_raw(const arrow::compute::MapLookupOptions *arrow_options) +{ + GArrowScalar *query_key = nullptr; + if (arrow_options->query_key) { + auto arrow_query_key = arrow_options->query_key; + query_key = garrow_scalar_new_raw(&arrow_query_key); + } + GArrowMapLookupOccurrence occurrence = + static_cast(arrow_options->occurrence); + return GARROW_MAP_LOOKUP_OPTIONS(g_object_new(GARROW_TYPE_MAP_LOOKUP_OPTIONS, + "query-key", + query_key, + "occurrence", + occurrence, + NULL)); +} + +arrow::compute::MapLookupOptions * +garrow_map_lookup_options_get_raw(GArrowMapLookupOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index c9aefc3dcd0..acee68c11ae 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1390,4 +1390,38 @@ GARROW_AVAILABLE_IN_23_0 GArrowMakeStructOptions * garrow_make_struct_options_new(void); +/** + * GArrowMapLookupOccurrence: + * @GARROW_MAP_LOOKUP_OCCURRENCE_FIRST: Return the first matching value. + * @GARROW_MAP_LOOKUP_OCCURRENCE_LAST: Return the last matching value. + * @GARROW_MAP_LOOKUP_OCCURRENCE_ALL: Return all matching values. + * + * They correspond to the values of + * `arrow::compute::MapLookupOptions::Occurrence`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_MAP_LOOKUP_OCCURRENCE_FIRST, + GARROW_MAP_LOOKUP_OCCURRENCE_LAST, + GARROW_MAP_LOOKUP_OCCURRENCE_ALL, +} GArrowMapLookupOccurrence; + +#define GARROW_TYPE_MAP_LOOKUP_OPTIONS (garrow_map_lookup_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowMapLookupOptions, + garrow_map_lookup_options, + GARROW, + MAP_LOOKUP_OPTIONS, + GArrowFunctionOptions) +struct _GArrowMapLookupOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowMapLookupOptions * +garrow_map_lookup_options_new(GArrowScalar *query_key, + GArrowMapLookupOccurrence occurrence); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index e07561dd380..df340cb6cff 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -237,3 +237,8 @@ garrow_make_struct_options_new_raw( const arrow::compute::MakeStructOptions *arrow_options); arrow::compute::MakeStructOptions * garrow_make_struct_options_get_raw(GArrowMakeStructOptions *options); + +GArrowMapLookupOptions * +garrow_map_lookup_options_new_raw(const arrow::compute::MapLookupOptions *arrow_options); +arrow::compute::MapLookupOptions * +garrow_map_lookup_options_get_raw(GArrowMapLookupOptions *options); diff --git a/c_glib/test/test-map-lookup-options.rb b/c_glib/test/test-map-lookup-options.rb new file mode 100644 index 00000000000..5af68e378d9 --- /dev/null +++ b/c_glib/test/test-map-lookup-options.rb @@ -0,0 +1,77 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestMapLookupOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @query_key = Arrow::Int32Scalar.new(1) + @options = Arrow::MapLookupOptions.new(@query_key, + Arrow::MapLookupOccurrence::FIRST) + end + + def test_query_key_property + assert_equal(@query_key, @options.query_key) + new_query_key = Arrow::Int32Scalar.new(2) + @options.query_key = new_query_key + assert_equal(new_query_key, @options.query_key) + end + + def test_occurrence_property + assert_equal(Arrow::MapLookupOccurrence::FIRST, @options.occurrence) + @options.occurrence = :last + assert_equal(Arrow::MapLookupOccurrence::LAST, @options.occurrence) + @options.occurrence = :all + assert_equal(Arrow::MapLookupOccurrence::ALL, @options.occurrence) + @options.occurrence = :first + assert_equal(Arrow::MapLookupOccurrence::FIRST, @options.occurrence) + end + + def build_map_with_duplicate_keys + end + + def test_map_lookup_function + map_array = build_map_array(Arrow::Int32DataType.new, + Arrow::StringDataType.new, + [[ + [1, "first_one"], + [2, "two"], + [1, nil], + [3, "three"], + [1, "second_one"], + [1, "last_one"], + ]]) + args = [Arrow::ArrayDatum.new(map_array)] + map_lookup_function = Arrow::Function.find("map_lookup") + @options.query_key = Arrow::Int32Scalar.new(1) + + @options.occurrence = :first + result = map_lookup_function.execute(args, @options).value + assert_equal(build_string_array(["first_one"]), result) + + @options.occurrence = :last + result = map_lookup_function.execute(args, @options).value + assert_equal(build_string_array(["last_one"]), result) + + @options.occurrence = :all + result = map_lookup_function.execute(args, @options).value + assert_equal(build_list_array(Arrow::StringDataType.new, + [["first_one", nil, "second_one", "last_one"]]), + result) + end +end + From fe05d09ec1ebaaeaad43a8933eefc28f77f3b5e9 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 16:03:39 +0100 Subject: [PATCH 14/33] Add GArrowModeOptions --- c_glib/arrow-glib/compute.cpp | 164 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-mode-options.rb | 69 +++++++++++++ 4 files changed, 251 insertions(+) create mode 100644 c_glib/test/test-mode-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index eacb2c1297a..f31172ac8da 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -287,6 +287,8 @@ G_BEGIN_DECLS * #GArrowMapLookupOptions is a class to customize the `map_lookup` * function. * + * #GArrowModeOptions is a class to customize the `mode` function. + * * There are many functions to compute data on an array. */ @@ -7905,6 +7907,142 @@ garrow_map_lookup_options_new(GArrowScalar *query_key, NULL)); } +enum { + PROP_MODE_OPTIONS_N = 1, + PROP_MODE_OPTIONS_SKIP_NULLS, + PROP_MODE_OPTIONS_MIN_COUNT, +}; + +G_DEFINE_TYPE(GArrowModeOptions, garrow_mode_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_mode_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_mode_options_get_raw(GARROW_MODE_OPTIONS(object)); + + switch (prop_id) { + case PROP_MODE_OPTIONS_N: + options->n = g_value_get_int64(value); + break; + case PROP_MODE_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + case PROP_MODE_OPTIONS_MIN_COUNT: + options->min_count = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_mode_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_mode_options_get_raw(GARROW_MODE_OPTIONS(object)); + + switch (prop_id) { + case PROP_MODE_OPTIONS_N: + g_value_set_int64(value, options->n); + break; + case PROP_MODE_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + case PROP_MODE_OPTIONS_MIN_COUNT: + g_value_set_uint(value, options->min_count); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_mode_options_init(GArrowModeOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::ModeOptions()); +} + +static void +garrow_mode_options_class_init(GArrowModeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_mode_options_set_property; + gobject_class->get_property = garrow_mode_options_get_property; + + arrow::compute::ModeOptions options; + + GParamSpec *spec; + /** + * GArrowModeOptions:n: + * + * Number of distinct most-common values to return. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("n", + "N", + "Number of distinct most-common values to return", + 1, + G_MAXINT64, + options.n, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MODE_OPTIONS_N, spec); + + /** + * GArrowModeOptions:skip-nulls: + * + * Whether NULLs are skipped or not. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip NULLs", + "Whether NULLs are skipped or not", + options.skip_nulls, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MODE_OPTIONS_SKIP_NULLS, spec); + + /** + * GArrowModeOptions:min-count: + * + * If less than this many non-null values are observed, emit null. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_uint("min-count", + "Min count", + "If less than this many non-null values are observed, emit null", + 0, + G_MAXUINT, + options.min_count, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_MODE_OPTIONS_MIN_COUNT, spec); +} + +/** + * garrow_mode_options_new: + * + * Returns: A newly created #GArrowModeOptions. + * + * Since: 23.0.0 + */ +GArrowModeOptions * +garrow_mode_options_new(void) +{ + return GARROW_MODE_OPTIONS(g_object_new(GARROW_TYPE_MODE_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8094,6 +8232,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_map_lookup_options_new_raw(arrow_map_lookup_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ModeOptions") { + const auto arrow_mode_options = + static_cast(arrow_options); + auto options = garrow_mode_options_new_raw(arrow_mode_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -8865,3 +9008,24 @@ garrow_map_lookup_options_get_raw(GArrowMapLookupOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowModeOptions * +garrow_mode_options_new_raw(const arrow::compute::ModeOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_MODE_OPTIONS, + "n", + arrow_options->n, + "skip-nulls", + arrow_options->skip_nulls, + "min-count", + arrow_options->min_count, + NULL); + return GARROW_MODE_OPTIONS(options); +} + +arrow::compute::ModeOptions * +garrow_mode_options_get_raw(GArrowModeOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index acee68c11ae..71d7f850f26 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1424,4 +1424,17 @@ GArrowMapLookupOptions * garrow_map_lookup_options_new(GArrowScalar *query_key, GArrowMapLookupOccurrence occurrence); +#define GARROW_TYPE_MODE_OPTIONS (garrow_mode_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowModeOptions, garrow_mode_options, GARROW, MODE_OPTIONS, GArrowFunctionOptions) +struct _GArrowModeOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowModeOptions * +garrow_mode_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index df340cb6cff..af754dfd7ac 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -242,3 +242,8 @@ GArrowMapLookupOptions * garrow_map_lookup_options_new_raw(const arrow::compute::MapLookupOptions *arrow_options); arrow::compute::MapLookupOptions * garrow_map_lookup_options_get_raw(GArrowMapLookupOptions *options); + +GArrowModeOptions * +garrow_mode_options_new_raw(const arrow::compute::ModeOptions *arrow_options); +arrow::compute::ModeOptions * +garrow_mode_options_get_raw(GArrowModeOptions *options); diff --git a/c_glib/test/test-mode-options.rb b/c_glib/test/test-mode-options.rb new file mode 100644 index 00000000000..2acf0add9bd --- /dev/null +++ b/c_glib/test/test-mode-options.rb @@ -0,0 +1,69 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestModeOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ModeOptions.new + end + + def test_n + assert_equal(1, @options.n) + @options.n = 2 + assert_equal(2, @options.n) + end + + def test_skip_nulls + assert do + @options.skip_nulls? + end + @options.skip_nulls = false + assert do + not @options.skip_nulls? + end + end + + def test_min_count + assert_equal(0, @options.min_count) + @options.min_count = 1 + assert_equal(1, @options.min_count) + end + + def test_mode_function_with_all_options + args = [ + Arrow::ArrayDatum.new(build_int32_array([1, 2, 2, 3, 3, 3, 4])), + ] + @options.n = 2 + @options.skip_nulls = false + @options.min_count = 2 + mode_function = Arrow::Function.find("mode") + result = mode_function.execute(args, @options).value + expected = build_struct_array( + [ + Arrow::Field.new("mode", Arrow::Int32DataType.new), + Arrow::Field.new("count", Arrow::Int64DataType.new), + ], + [ + {"mode" => 3, "count" => 3}, + {"mode" => 2, "count" => 2}, + ] + ) + assert_equal(expected, result) + end +end + From 05de1cdd79ec9d4920d3702c55e9b9e9e46dd055 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 16:09:55 +0100 Subject: [PATCH 15/33] Add GArrowNullOptions --- c_glib/arrow-glib/compute.cpp | 112 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 ++++ c_glib/arrow-glib/compute.hpp | 5 ++ c_glib/test/test-null-options.rb | 45 +++++++++++++ 4 files changed, 175 insertions(+) create mode 100644 c_glib/test/test-null-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index f31172ac8da..b558c5d0819 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -289,6 +289,8 @@ G_BEGIN_DECLS * * #GArrowModeOptions is a class to customize the `mode` function. * + * #GArrowNullOptions is a class to customize the `is_null` function. + * * There are many functions to compute data on an array. */ @@ -8043,6 +8045,95 @@ garrow_mode_options_new(void) return GARROW_MODE_OPTIONS(g_object_new(GARROW_TYPE_MODE_OPTIONS, NULL)); } +enum { + PROP_NULL_OPTIONS_NAN_IS_NULL = 1, +}; + +G_DEFINE_TYPE(GArrowNullOptions, garrow_null_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_null_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_null_options_get_raw(GARROW_NULL_OPTIONS(object)); + + switch (prop_id) { + case PROP_NULL_OPTIONS_NAN_IS_NULL: + options->nan_is_null = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_null_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_null_options_get_raw(GARROW_NULL_OPTIONS(object)); + + switch (prop_id) { + case PROP_NULL_OPTIONS_NAN_IS_NULL: + g_value_set_boolean(value, options->nan_is_null); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_null_options_init(GArrowNullOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::NullOptions()); +} + +static void +garrow_null_options_class_init(GArrowNullOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_null_options_set_property; + gobject_class->get_property = garrow_null_options_get_property; + + arrow::compute::NullOptions options; + + GParamSpec *spec; + /** + * GArrowNullOptions:nan-is-null: + * + * Whether floating-point NaN values are considered null. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("nan-is-null", + "NaN is null", + "Whether floating-point NaN values are considered null", + options.nan_is_null, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_NULL_OPTIONS_NAN_IS_NULL, spec); +} + +/** + * garrow_null_options_new: + * + * Returns: A newly created #GArrowNullOptions. + * + * Since: 23.0.0 + */ +GArrowNullOptions * +garrow_null_options_new(void) +{ + return GARROW_NULL_OPTIONS(g_object_new(GARROW_TYPE_NULL_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8237,6 +8328,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_mode_options_new_raw(arrow_mode_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "NullOptions") { + const auto arrow_null_options = + static_cast(arrow_options); + auto options = garrow_null_options_new_raw(arrow_null_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9029,3 +9125,19 @@ garrow_mode_options_get_raw(GArrowModeOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowNullOptions * +garrow_null_options_new_raw(const arrow::compute::NullOptions *arrow_options) +{ + return GARROW_NULL_OPTIONS(g_object_new(GARROW_TYPE_NULL_OPTIONS, + "nan-is-null", + arrow_options->nan_is_null, + NULL)); +} + +arrow::compute::NullOptions * +garrow_null_options_get_raw(GArrowNullOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 71d7f850f26..9d255f09ea1 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1437,4 +1437,17 @@ GARROW_AVAILABLE_IN_23_0 GArrowModeOptions * garrow_mode_options_new(void); +#define GARROW_TYPE_NULL_OPTIONS (garrow_null_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowNullOptions, garrow_null_options, GARROW, NULL_OPTIONS, GArrowFunctionOptions) +struct _GArrowNullOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowNullOptions * +garrow_null_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index af754dfd7ac..30f5bd9d068 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -247,3 +247,8 @@ GArrowModeOptions * garrow_mode_options_new_raw(const arrow::compute::ModeOptions *arrow_options); arrow::compute::ModeOptions * garrow_mode_options_get_raw(GArrowModeOptions *options); + +GArrowNullOptions * +garrow_null_options_new_raw(const arrow::compute::NullOptions *arrow_options); +arrow::compute::NullOptions * +garrow_null_options_get_raw(GArrowNullOptions *options); diff --git a/c_glib/test/test-null-options.rb b/c_glib/test/test-null-options.rb new file mode 100644 index 00000000000..b873a897357 --- /dev/null +++ b/c_glib/test/test-null-options.rb @@ -0,0 +1,45 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestNullOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::NullOptions.new + end + + def test_nan_is_null_property + assert do + !@options.nan_is_null? + end + @options.nan_is_null = true + assert do + @options.nan_is_null? + end + end + + def test_is_null_function + args = [ + Arrow::ArrayDatum.new(build_float_array([1.0, Float::NAN, 2.0, nil, 4.0])), + ] + is_null_function = Arrow::Function.find("is_null") + @options.nan_is_null = true + result = is_null_function.execute(args, @options).value + assert_equal(build_boolean_array([false, true, false, true, false]), result) + end +end + From 8a7e4a2acc9503ff855b2971f27ad497bebc766d Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 16:16:20 +0100 Subject: [PATCH 16/33] Add GArrowPadOptions --- c_glib/arrow-glib/compute.cpp | 170 ++++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-pad-options.rb | 63 ++++++++++++ 4 files changed, 251 insertions(+) create mode 100644 c_glib/test/test-pad-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index b558c5d0819..733bafe1449 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -291,6 +291,10 @@ G_BEGIN_DECLS * * #GArrowNullOptions is a class to customize the `is_null` function. * + * #GArrowPadOptions is a class to customize the padding functions such as + * `utf8_lpad`, `utf8_rpad`, `utf8_center`, `ascii_lpad`, `ascii_rpad`, and + * `ascii_center`. + * * There are many functions to compute data on an array. */ @@ -8134,6 +8138,147 @@ garrow_null_options_new(void) return GARROW_NULL_OPTIONS(g_object_new(GARROW_TYPE_NULL_OPTIONS, NULL)); } +enum { + PROP_PAD_OPTIONS_WIDTH = 1, + PROP_PAD_OPTIONS_PADDING, + PROP_PAD_OPTIONS_LEAN_LEFT_ON_ODD_PADDING, +}; + +G_DEFINE_TYPE(GArrowPadOptions, garrow_pad_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_pad_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pad_options_get_raw(GARROW_PAD_OPTIONS(object)); + + switch (prop_id) { + case PROP_PAD_OPTIONS_WIDTH: + options->width = g_value_get_int64(value); + break; + case PROP_PAD_OPTIONS_PADDING: + options->padding = g_value_get_string(value); + break; + case PROP_PAD_OPTIONS_LEAN_LEFT_ON_ODD_PADDING: + options->lean_left_on_odd_padding = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pad_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pad_options_get_raw(GARROW_PAD_OPTIONS(object)); + + switch (prop_id) { + case PROP_PAD_OPTIONS_WIDTH: + g_value_set_int64(value, options->width); + break; + case PROP_PAD_OPTIONS_PADDING: + g_value_set_string(value, options->padding.c_str()); + break; + case PROP_PAD_OPTIONS_LEAN_LEFT_ON_ODD_PADDING: + g_value_set_boolean(value, options->lean_left_on_odd_padding); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pad_options_init(GArrowPadOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::PadOptions()); +} + +static void +garrow_pad_options_class_init(GArrowPadOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_pad_options_set_property; + gobject_class->get_property = garrow_pad_options_get_property; + + arrow::compute::PadOptions options; + + GParamSpec *spec; + /** + * GArrowPadOptions:width: + * + * The desired string length. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("width", + "Width", + "The desired string length", + 0, + G_MAXINT64, + options.width, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PAD_OPTIONS_WIDTH, spec); + + /** + * GArrowPadOptions:padding: + * + * What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII). + * + * Since: 23.0.0 + */ + spec = g_param_spec_string( + "padding", + "Padding", + "What to pad the string with. Should be one codepoint (Unicode)/byte (ASCII)", + options.padding.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PAD_OPTIONS_PADDING, spec); + + /** + * GArrowPadOptions:lean-left-on-odd-padding: + * + * What to do if there is an odd number of padding characters (in case of centered + * padding). Defaults to aligning on the left (i.e. adding the extra padding character + * on the right). + * + * Since: 23.0.0 + */ + spec = + g_param_spec_boolean("lean-left-on-odd-padding", + "Lean left on odd padding", + "What to do if there is an odd number of padding characters (in " + "case of centered padding). Defaults to aligning on the left " + "(i.e. adding the extra padding character on the right)", + options.lean_left_on_odd_padding, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_PAD_OPTIONS_LEAN_LEFT_ON_ODD_PADDING, + spec); +} + +/** + * garrow_pad_options_new: + * + * Returns: A newly created #GArrowPadOptions. + * + * Since: 23.0.0 + */ +GArrowPadOptions * +garrow_pad_options_new(void) +{ + return GARROW_PAD_OPTIONS(g_object_new(GARROW_TYPE_PAD_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8333,6 +8478,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_null_options_new_raw(arrow_null_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "PadOptions") { + const auto arrow_pad_options = + static_cast(arrow_options); + auto options = garrow_pad_options_new_raw(arrow_pad_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9141,3 +9291,23 @@ garrow_null_options_get_raw(GArrowNullOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowPadOptions * +garrow_pad_options_new_raw(const arrow::compute::PadOptions *arrow_options) +{ + return GARROW_PAD_OPTIONS(g_object_new(GARROW_TYPE_PAD_OPTIONS, + "width", + arrow_options->width, + "padding", + arrow_options->padding.c_str(), + "lean-left-on-odd-padding", + arrow_options->lean_left_on_odd_padding, + NULL)); +} + +arrow::compute::PadOptions * +garrow_pad_options_get_raw(GArrowPadOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 9d255f09ea1..00ffcbe222d 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1450,4 +1450,17 @@ GARROW_AVAILABLE_IN_23_0 GArrowNullOptions * garrow_null_options_new(void); +#define GARROW_TYPE_PAD_OPTIONS (garrow_pad_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowPadOptions, garrow_pad_options, GARROW, PAD_OPTIONS, GArrowFunctionOptions) +struct _GArrowPadOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowPadOptions * +garrow_pad_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 30f5bd9d068..49abe4d201f 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -252,3 +252,8 @@ GArrowNullOptions * garrow_null_options_new_raw(const arrow::compute::NullOptions *arrow_options); arrow::compute::NullOptions * garrow_null_options_get_raw(GArrowNullOptions *options); + +GArrowPadOptions * +garrow_pad_options_new_raw(const arrow::compute::PadOptions *arrow_options); +arrow::compute::PadOptions * +garrow_pad_options_get_raw(GArrowPadOptions *options); diff --git a/c_glib/test/test-pad-options.rb b/c_glib/test/test-pad-options.rb new file mode 100644 index 00000000000..75e98e63430 --- /dev/null +++ b/c_glib/test/test-pad-options.rb @@ -0,0 +1,63 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPadOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::PadOptions.new + end + + def test_width_property + assert_equal(0, @options.width) + @options.width = 5 + assert_equal(5, @options.width) + end + + def test_padding_property + assert_equal(" ", @options.padding) + @options.padding = "0" + assert_equal("0", @options.padding) + end + + def test_lean_left_on_odd_padding_property + assert do + @options.lean_left_on_odd_padding? + end + @options.lean_left_on_odd_padding = false + assert do + not @options.lean_left_on_odd_padding? + end + end + + def test_utf8_center_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["a", "ab", "abc"])), + ] + utf8_center_function = Arrow::Function.find("utf8_center") + + @options.width = 5 + @options.padding = " " + result = utf8_center_function.execute(args, @options).value + assert_equal(build_string_array([" a ", " ab ", " abc "]), result) + + @options.lean_left_on_odd_padding = false + result = utf8_center_function.execute(args, @options).value + assert_equal(build_string_array([" a ", " ab ", " abc "]), result) + end +end + From 4f4ed40e3b5f4545c812b895136d3db359adce18 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 16:21:57 +0100 Subject: [PATCH 17/33] Add GArrowPairwiseOptions --- c_glib/arrow-glib/compute.cpp | 116 +++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 ++++ c_glib/arrow-glib/compute.hpp | 5 ++ c_glib/test/test-pairwise-options.rb | 43 ++++++++++ 4 files changed, 180 insertions(+) create mode 100644 c_glib/test/test-pairwise-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 733bafe1449..c2a2e62785a 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -295,6 +295,9 @@ G_BEGIN_DECLS * `utf8_lpad`, `utf8_rpad`, `utf8_center`, `ascii_lpad`, `ascii_rpad`, and * `ascii_center`. * + * #GArrowPairwiseOptions is a class to customize the pairwise functions such as + * `pairwise_diff` and `pairwise_diff_checked`. + * * There are many functions to compute data on an array. */ @@ -8279,6 +8282,100 @@ garrow_pad_options_new(void) return GARROW_PAD_OPTIONS(g_object_new(GARROW_TYPE_PAD_OPTIONS, NULL)); } +enum { + PROP_PAIRWISE_OPTIONS_PERIODS = 1, +}; + +G_DEFINE_TYPE(GArrowPairwiseOptions, + garrow_pairwise_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_pairwise_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pairwise_options_get_raw(GARROW_PAIRWISE_OPTIONS(object)); + + switch (prop_id) { + case PROP_PAIRWISE_OPTIONS_PERIODS: + options->periods = g_value_get_int64(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pairwise_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pairwise_options_get_raw(GARROW_PAIRWISE_OPTIONS(object)); + + switch (prop_id) { + case PROP_PAIRWISE_OPTIONS_PERIODS: + g_value_set_int64(value, options->periods); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pairwise_options_init(GArrowPairwiseOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::PairwiseOptions()); +} + +static void +garrow_pairwise_options_class_init(GArrowPairwiseOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_pairwise_options_set_property; + gobject_class->get_property = garrow_pairwise_options_get_property; + + arrow::compute::PairwiseOptions options; + + GParamSpec *spec; + /** + * GArrowPairwiseOptions:periods: + * + * Periods to shift for applying the binary operation, accepts negative values. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64( + "periods", + "Periods", + "Periods to shift for applying the binary operation, accepts negative values", + G_MININT64, + G_MAXINT64, + options.periods, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PAIRWISE_OPTIONS_PERIODS, spec); +} + +/** + * garrow_pairwise_options_new: + * + * Returns: A newly created #GArrowPairwiseOptions. + * + * Since: 23.0.0 + */ +GArrowPairwiseOptions * +garrow_pairwise_options_new(void) +{ + return GARROW_PAIRWISE_OPTIONS(g_object_new(GARROW_TYPE_PAIRWISE_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8483,6 +8580,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_pad_options_new_raw(arrow_pad_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "PairwiseOptions") { + const auto arrow_pairwise_options = + static_cast(arrow_options); + auto options = garrow_pairwise_options_new_raw(arrow_pairwise_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9311,3 +9413,17 @@ garrow_pad_options_get_raw(GArrowPadOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowPairwiseOptions * +garrow_pairwise_options_new_raw(const arrow::compute::PairwiseOptions *arrow_options) +{ + return GARROW_PAIRWISE_OPTIONS( + g_object_new(GARROW_TYPE_PAIRWISE_OPTIONS, "periods", arrow_options->periods, NULL)); +} + +arrow::compute::PairwiseOptions * +garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 00ffcbe222d..7e64258fb63 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1463,4 +1463,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowPadOptions * garrow_pad_options_new(void); +#define GARROW_TYPE_PAIRWISE_OPTIONS (garrow_pairwise_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowPairwiseOptions, + garrow_pairwise_options, + GARROW, + PAIRWISE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowPairwiseOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowPairwiseOptions * +garrow_pairwise_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 49abe4d201f..3da4281bba4 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -257,3 +257,8 @@ GArrowPadOptions * garrow_pad_options_new_raw(const arrow::compute::PadOptions *arrow_options); arrow::compute::PadOptions * garrow_pad_options_get_raw(GArrowPadOptions *options); + +GArrowPairwiseOptions * +garrow_pairwise_options_new_raw(const arrow::compute::PairwiseOptions *arrow_options); +arrow::compute::PairwiseOptions * +garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options); diff --git a/c_glib/test/test-pairwise-options.rb b/c_glib/test/test-pairwise-options.rb new file mode 100644 index 00000000000..1e6f5b02c3f --- /dev/null +++ b/c_glib/test/test-pairwise-options.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPairwiseOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::PairwiseOptions.new + end + + def test_periods_property + assert_equal(1, @options.periods) + @options.periods = 2 + assert_equal(2, @options.periods) + @options.periods = -1 + assert_equal(-1, @options.periods) + end + + def test_pairwise_diff_function + args = [ + Arrow::ArrayDatum.new(build_int32_array([1, 2, 4, 7, 11])), + ] + pairwise_diff_function = Arrow::Function.find("pairwise_diff") + @options.periods = 2 + result = pairwise_diff_function.execute(args, @options).value + assert_equal(build_int32_array([nil, nil, 3, 5, 7]), result) + end +end + From e93ec78f0a7bd5b84c362582f7469db0bd763d9c Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 16:54:15 +0100 Subject: [PATCH 18/33] Add GArrowPartitionNthOptions --- c_glib/arrow-glib/compute.cpp | 151 ++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-partition-nth-options.rb | 52 ++++++++ 4 files changed, 225 insertions(+) create mode 100644 c_glib/test/test-partition-nth-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index c2a2e62785a..d6b4f000ab7 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -298,6 +298,9 @@ G_BEGIN_DECLS * #GArrowPairwiseOptions is a class to customize the pairwise functions such as * `pairwise_diff` and `pairwise_diff_checked`. * + * #GArrowPartitionNthOptions is a class to customize the `partition_nth_indices` + * function. + * * There are many functions to compute data on an array. */ @@ -8376,6 +8379,129 @@ garrow_pairwise_options_new(void) return GARROW_PAIRWISE_OPTIONS(g_object_new(GARROW_TYPE_PAIRWISE_OPTIONS, NULL)); } +enum { + PROP_PARTITION_NTH_OPTIONS_PIVOT = 1, + PROP_PARTITION_NTH_OPTIONS_NULL_PLACEMENT, +}; + +G_DEFINE_TYPE(GArrowPartitionNthOptions, + garrow_partition_nth_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_partition_nth_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_partition_nth_options_get_raw(GARROW_PARTITION_NTH_OPTIONS(object)); + + switch (prop_id) { + case PROP_PARTITION_NTH_OPTIONS_PIVOT: + options->pivot = g_value_get_int64(value); + break; + case PROP_PARTITION_NTH_OPTIONS_NULL_PLACEMENT: + options->null_placement = + static_cast(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_partition_nth_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_partition_nth_options_get_raw(GARROW_PARTITION_NTH_OPTIONS(object)); + + switch (prop_id) { + case PROP_PARTITION_NTH_OPTIONS_PIVOT: + g_value_set_int64(value, options->pivot); + break; + case PROP_PARTITION_NTH_OPTIONS_NULL_PLACEMENT: + g_value_set_enum(value, static_cast(options->null_placement)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_partition_nth_options_init(GArrowPartitionNthOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::PartitionNthOptions()); +} + +static void +garrow_partition_nth_options_class_init(GArrowPartitionNthOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_partition_nth_options_set_property; + gobject_class->get_property = garrow_partition_nth_options_get_property; + + arrow::compute::PartitionNthOptions options; + + GParamSpec *spec; + /** + * GArrowPartitionNthOptions:pivot: + * + * The index into the equivalent sorted array of the partition pivot element. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64( + "pivot", + "Pivot", + "The index into the equivalent sorted array of the partition pivot element", + 0, + G_MAXINT64, + options.pivot, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_PARTITION_NTH_OPTIONS_PIVOT, spec); + + /** + * GArrowPartitionNthOptions:null-placement: + * + * Whether nulls and NaNs are partitioned at the start or at the end. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_enum("null-placement", + "Null placement", + "Whether nulls and NaNs are partitioned at the start or at the end", + GARROW_TYPE_NULL_PLACEMENT, + static_cast(options.null_placement), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_PARTITION_NTH_OPTIONS_NULL_PLACEMENT, + spec); +} + +/** + * garrow_partition_nth_options_new: + * + * Returns: A newly created #GArrowPartitionNthOptions. + * + * Since: 23.0.0 + */ +GArrowPartitionNthOptions * +garrow_partition_nth_options_new(void) +{ + return GARROW_PARTITION_NTH_OPTIONS( + g_object_new(GARROW_TYPE_PARTITION_NTH_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8585,6 +8711,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_pairwise_options_new_raw(arrow_pairwise_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "PartitionNthOptions") { + const auto arrow_partition_nth_options = + static_cast(arrow_options); + auto options = garrow_partition_nth_options_new_raw(arrow_partition_nth_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9427,3 +9558,23 @@ garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowPartitionNthOptions * +garrow_partition_nth_options_new_raw( + const arrow::compute::PartitionNthOptions *arrow_options) +{ + return GARROW_PARTITION_NTH_OPTIONS( + g_object_new(GARROW_TYPE_PARTITION_NTH_OPTIONS, + "pivot", + arrow_options->pivot, + "null-placement", + static_cast(arrow_options->null_placement), + NULL)); +} + +arrow::compute::PartitionNthOptions * +garrow_partition_nth_options_get_raw(GArrowPartitionNthOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 7e64258fb63..7acc635d2b2 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1479,4 +1479,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowPairwiseOptions * garrow_pairwise_options_new(void); +#define GARROW_TYPE_PARTITION_NTH_OPTIONS (garrow_partition_nth_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowPartitionNthOptions, + garrow_partition_nth_options, + GARROW, + PARTITION_NTH_OPTIONS, + GArrowFunctionOptions) +struct _GArrowPartitionNthOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowPartitionNthOptions * +garrow_partition_nth_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 3da4281bba4..818e182759e 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -262,3 +262,9 @@ GArrowPairwiseOptions * garrow_pairwise_options_new_raw(const arrow::compute::PairwiseOptions *arrow_options); arrow::compute::PairwiseOptions * garrow_pairwise_options_get_raw(GArrowPairwiseOptions *options); + +GArrowPartitionNthOptions * +garrow_partition_nth_options_new_raw( + const arrow::compute::PartitionNthOptions *arrow_options); +arrow::compute::PartitionNthOptions * +garrow_partition_nth_options_get_raw(GArrowPartitionNthOptions *options); diff --git a/c_glib/test/test-partition-nth-options.rb b/c_glib/test/test-partition-nth-options.rb new file mode 100644 index 00000000000..bfe69c947ed --- /dev/null +++ b/c_glib/test/test-partition-nth-options.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPartitionNthOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::PartitionNthOptions.new + end + + def test_pivot_property + assert_equal(0, @options.pivot) + @options.pivot = 2 + assert_equal(2, @options.pivot) + end + + def test_null_placement_property + assert_equal(Arrow::NullPlacement::AT_END, @options.null_placement) + @options.null_placement = :at_start + assert_equal(Arrow::NullPlacement::AT_START, @options.null_placement) + @options.null_placement = :at_end + assert_equal(Arrow::NullPlacement::AT_END, @options.null_placement) + end + + def test_partition_nth_indices_function + args = [ + Arrow::ArrayDatum.new(build_int32_array([5, 1, 4, 2, nil, 3])), + ] + @options.pivot = 2 + partition_nth_indices_function = Arrow::Function.find("partition_nth_indices") + result = partition_nth_indices_function.execute(args, @options).value + assert_equal(5, result.get_value(2)) + @options.null_placement = :at_start + result = partition_nth_indices_function.execute(args, @options).value + assert_equal(5, result.get_value(3)) + end +end + From 91ae71578cb9af04aea926cde1628645e1917577 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 17:20:17 +0100 Subject: [PATCH 19/33] Add GArrowPivotWiderOptions --- c_glib/arrow-glib/compute.cpp | 169 ++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 33 +++++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-pivot-wider-options.rb | 39 ++++++ 4 files changed, 247 insertions(+) create mode 100644 c_glib/test/test-pivot-wider-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index d6b4f000ab7..b956319e4d3 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -301,6 +301,9 @@ G_BEGIN_DECLS * #GArrowPartitionNthOptions is a class to customize the `partition_nth_indices` * function. * + * #GArrowPivotWiderOptions is a class to customize the `pivot_wider` and + * `hash_pivot_wider` functions. + * * There are many functions to compute data on an array. */ @@ -8502,6 +8505,143 @@ garrow_partition_nth_options_new(void) g_object_new(GARROW_TYPE_PARTITION_NTH_OPTIONS, NULL)); } +enum { + PROP_PIVOT_WIDER_OPTIONS_KEY_NAMES = 1, + PROP_PIVOT_WIDER_OPTIONS_UNEXPECTED_KEY_BEHAVIOR, +}; + +G_DEFINE_TYPE(GArrowPivotWiderOptions, + garrow_pivot_wider_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_pivot_wider_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pivot_wider_options_get_raw(GARROW_PIVOT_WIDER_OPTIONS(object)); + + switch (prop_id) { + case PROP_PIVOT_WIDER_OPTIONS_KEY_NAMES: + { + auto strv = static_cast(g_value_get_boxed(value)); + options->key_names.clear(); + if (strv) { + for (gchar **p = strv; *p; ++p) { + options->key_names.emplace_back(*p); + } + } + } + break; + case PROP_PIVOT_WIDER_OPTIONS_UNEXPECTED_KEY_BEHAVIOR: + options->unexpected_key_behavior = + static_cast( + g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pivot_wider_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_pivot_wider_options_get_raw(GARROW_PIVOT_WIDER_OPTIONS(object)); + + switch (prop_id) { + case PROP_PIVOT_WIDER_OPTIONS_KEY_NAMES: + { + const auto &names = options->key_names; + auto strv = static_cast(g_new0(gchar *, names.size() + 1)); + for (gsize i = 0; i < names.size(); ++i) { + strv[i] = g_strdup(names[i].c_str()); + } + g_value_take_boxed(value, strv); + } + break; + case PROP_PIVOT_WIDER_OPTIONS_UNEXPECTED_KEY_BEHAVIOR: + g_value_set_enum(value, + static_cast( + options->unexpected_key_behavior)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_pivot_wider_options_init(GArrowPivotWiderOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::PivotWiderOptions()); +} + +static void +garrow_pivot_wider_options_class_init(GArrowPivotWiderOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_pivot_wider_options_set_property; + gobject_class->get_property = garrow_pivot_wider_options_get_property; + + arrow::compute::PivotWiderOptions options; + + GParamSpec *spec; + /** + * GArrowPivotWiderOptions:key-names: + * + * The values expected in the pivot key column. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boxed("key-names", + "Key names", + "The values expected in the pivot key column", + G_TYPE_STRV, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_PIVOT_WIDER_OPTIONS_KEY_NAMES, + spec); + + /** + * GArrowPivotWiderOptions:unexpected-key-behavior: + * + * The behavior when pivot keys not in key_names are encountered. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum( + "unexpected-key-behavior", + "Unexpected key behavior", + "The behavior when pivot keys not in key_names are encountered", + GARROW_TYPE_PIVOT_WIDER_UNEXPECTED_KEY_BEHAVIOR, + static_cast(options.unexpected_key_behavior), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_PIVOT_WIDER_OPTIONS_UNEXPECTED_KEY_BEHAVIOR, + spec); +} + +/** + * garrow_pivot_wider_options_new: + * + * Returns: A newly created #GArrowPivotWiderOptions. + * + * Since: 23.0.0 + */ +GArrowPivotWiderOptions * +garrow_pivot_wider_options_new(void) +{ + return GARROW_PIVOT_WIDER_OPTIONS(g_object_new(GARROW_TYPE_PIVOT_WIDER_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -8716,6 +8856,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_partition_nth_options_new_raw(arrow_partition_nth_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "PivotWiderOptions") { + const auto arrow_pivot_wider_options = + static_cast(arrow_options); + auto options = garrow_pivot_wider_options_new_raw(arrow_pivot_wider_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9578,3 +9723,27 @@ garrow_partition_nth_options_get_raw(GArrowPartitionNthOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowPivotWiderOptions * +garrow_pivot_wider_options_new_raw(const arrow::compute::PivotWiderOptions *arrow_options) +{ + auto strv = static_cast(g_new0(gchar *, arrow_options->key_names.size() + 1)); + for (gsize i = 0; i < arrow_options->key_names.size(); ++i) { + strv[i] = g_strdup(arrow_options->key_names[i].c_str()); + } + return GARROW_PIVOT_WIDER_OPTIONS( + g_object_new(GARROW_TYPE_PIVOT_WIDER_OPTIONS, + "key-names", + strv, + "unexpected-key-behavior", + static_cast( + arrow_options->unexpected_key_behavior), + NULL)); +} + +arrow::compute::PivotWiderOptions * +garrow_pivot_wider_options_get_raw(GArrowPivotWiderOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 7acc635d2b2..9622d1f8713 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1495,4 +1495,37 @@ GARROW_AVAILABLE_IN_23_0 GArrowPartitionNthOptions * garrow_partition_nth_options_new(void); +/** + * GArrowPivotWiderUnexpectedKeyBehavior: + * @GARROW_PIVOT_WIDER_UNEXPECTED_KEY_BEHAVIOR_IGNORE: Unexpected pivot keys are ignored + * silently. + * @GARROW_PIVOT_WIDER_UNEXPECTED_KEY_BEHAVIOR_RAISE: Unexpected pivot keys return a + * KeyError. + * + * They correspond to the values of + * `arrow::compute::PivotWiderOptions::UnexpectedKeyBehavior`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_PIVOT_WIDER_UNEXPECTED_KEY_BEHAVIOR_IGNORE, + GARROW_PIVOT_WIDER_UNEXPECTED_KEY_BEHAVIOR_RAISE, +} GArrowPivotWiderUnexpectedKeyBehavior; + +#define GARROW_TYPE_PIVOT_WIDER_OPTIONS (garrow_pivot_wider_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowPivotWiderOptions, + garrow_pivot_wider_options, + GARROW, + PIVOT_WIDER_OPTIONS, + GArrowFunctionOptions) +struct _GArrowPivotWiderOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowPivotWiderOptions * +garrow_pivot_wider_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 818e182759e..c5f9359bbfa 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -268,3 +268,9 @@ garrow_partition_nth_options_new_raw( const arrow::compute::PartitionNthOptions *arrow_options); arrow::compute::PartitionNthOptions * garrow_partition_nth_options_get_raw(GArrowPartitionNthOptions *options); + +GArrowPivotWiderOptions * +garrow_pivot_wider_options_new_raw( + const arrow::compute::PivotWiderOptions *arrow_options); +arrow::compute::PivotWiderOptions * +garrow_pivot_wider_options_get_raw(GArrowPivotWiderOptions *options); diff --git a/c_glib/test/test-pivot-wider-options.rb b/c_glib/test/test-pivot-wider-options.rb new file mode 100644 index 00000000000..b959867782a --- /dev/null +++ b/c_glib/test/test-pivot-wider-options.rb @@ -0,0 +1,39 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestPivotWiderOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::PivotWiderOptions.new + end + + def test_key_names_property + assert_equal([], @options.key_names) + @options.key_names = ["height", "width"] + assert_equal(["height", "width"], @options.key_names) + end + + def test_unexpected_key_behavior_property + assert_equal(Arrow::PivotWiderUnexpectedKeyBehavior::IGNORE, @options.unexpected_key_behavior) + @options.unexpected_key_behavior = :raise + assert_equal(Arrow::PivotWiderUnexpectedKeyBehavior::RAISE, @options.unexpected_key_behavior) + @options.unexpected_key_behavior = :ignore + assert_equal(Arrow::PivotWiderUnexpectedKeyBehavior::IGNORE, @options.unexpected_key_behavior) + end +end + From c799ae6a6931e5a073207868bcaf827cc0f6ed23 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 17:44:20 +0100 Subject: [PATCH 20/33] Add GArrowRankQuantileOptions --- c_glib/arrow-glib/compute.cpp | 178 ++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 27 ++++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-rank-quantile-options.rb | 61 ++++++++ 4 files changed, 272 insertions(+) create mode 100644 c_glib/test/test-rank-quantile-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index b956319e4d3..b3d8cc5b7e7 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -304,6 +304,9 @@ G_BEGIN_DECLS * #GArrowPivotWiderOptions is a class to customize the `pivot_wider` and * `hash_pivot_wider` functions. * + * #GArrowRankQuantileOptions is a class to customize the `rank_quantile` and + * `rank_normal` functions. + * * There are many functions to compute data on an array. */ @@ -8642,6 +8645,155 @@ garrow_pivot_wider_options_new(void) return GARROW_PIVOT_WIDER_OPTIONS(g_object_new(GARROW_TYPE_PIVOT_WIDER_OPTIONS, NULL)); } +enum { + PROP_RANK_QUANTILE_OPTIONS_NULL_PLACEMENT = 1, +}; + +G_DEFINE_TYPE(GArrowRankQuantileOptions, + garrow_rank_quantile_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_rank_quantile_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_rank_quantile_options_get_raw(GARROW_RANK_QUANTILE_OPTIONS(object)); + + switch (prop_id) { + case PROP_RANK_QUANTILE_OPTIONS_NULL_PLACEMENT: + options->null_placement = + static_cast(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_rank_quantile_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_rank_quantile_options_get_raw(GARROW_RANK_QUANTILE_OPTIONS(object)); + + switch (prop_id) { + case PROP_RANK_QUANTILE_OPTIONS_NULL_PLACEMENT: + g_value_set_enum(value, static_cast(options->null_placement)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_rank_quantile_options_init(GArrowRankQuantileOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::RankQuantileOptions()); +} + +static void +garrow_rank_quantile_options_class_init(GArrowRankQuantileOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_rank_quantile_options_set_property; + gobject_class->get_property = garrow_rank_quantile_options_get_property; + + auto options = arrow::compute::RankQuantileOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowRankQuantileOptions:null-placement: + * + * Whether nulls and NaNs are placed at the start or at the end. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("null-placement", + "Null placement", + "Whether nulls and NaNs are placed " + "at the start or at the end.", + GARROW_TYPE_NULL_PLACEMENT, + static_cast(options.null_placement), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_RANK_QUANTILE_OPTIONS_NULL_PLACEMENT, + spec); +} + +/** + * garrow_rank_quantile_options_new: + * + * Returns: A newly created #GArrowRankQuantileOptions. + * + * Since: 23.0.0 + */ +GArrowRankQuantileOptions * +garrow_rank_quantile_options_new(void) +{ + return GARROW_RANK_QUANTILE_OPTIONS( + g_object_new(GARROW_TYPE_RANK_QUANTILE_OPTIONS, nullptr)); +} + +/** + * garrow_rank_quantile_options_get_sort_keys: + * @options: A #GArrowRankQuantileOptions. + * + * Returns: (transfer full) (element-type GArrowSortKey): + * The sort keys to be used. + * + * Since: 23.0.0 + */ +GList * +garrow_rank_quantile_options_get_sort_keys(GArrowRankQuantileOptions *options) +{ + auto arrow_options = garrow_rank_quantile_options_get_raw(options); + return garrow_sort_keys_new_raw(arrow_options->sort_keys); +} + +/** + * garrow_rank_quantile_options_set_sort_keys: + * @options: A #GArrowRankQuantileOptions. + * @sort_keys: (element-type GArrowSortKey): The sort keys to be used. + * + * Set sort keys to be used. + * + * Since: 23.0.0 + */ +void +garrow_rank_quantile_options_set_sort_keys(GArrowRankQuantileOptions *options, + GList *sort_keys) +{ + auto arrow_options = garrow_rank_quantile_options_get_raw(options); + garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys); +} + +/** + * garrow_rank_quantile_options_add_sort_key: + * @options: A #GArrowRankQuantileOptions. + * @sort_key: The sort key to be added. + * + * Add a sort key to be used. + * + * Since: 23.0.0 + */ +void +garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, + GArrowSortKey *sort_key) +{ + auto arrow_options = garrow_rank_quantile_options_get_raw(options); + garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key); +} + G_END_DECLS arrow::Result @@ -8861,6 +9013,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_pivot_wider_options_new_raw(arrow_pivot_wider_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "RankQuantileOptions") { + const auto arrow_rank_quantile_options = + static_cast(arrow_options); + auto options = garrow_rank_quantile_options_new_raw(arrow_rank_quantile_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9747,3 +9904,24 @@ garrow_pivot_wider_options_get_raw(GArrowPivotWiderOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowRankQuantileOptions * +garrow_rank_quantile_options_new_raw( + const arrow::compute::RankQuantileOptions *arrow_options) +{ + auto options = + GARROW_RANK_QUANTILE_OPTIONS(g_object_new(GARROW_TYPE_RANK_QUANTILE_OPTIONS, + "null-placement", + arrow_options->null_placement, + nullptr)); + auto arrow_new_options = garrow_rank_quantile_options_get_raw(options); + arrow_new_options->sort_keys = arrow_options->sort_keys; + return options; +} + +arrow::compute::RankQuantileOptions * +garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 9622d1f8713..541c4fe3b2a 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1528,4 +1528,31 @@ GARROW_AVAILABLE_IN_23_0 GArrowPivotWiderOptions * garrow_pivot_wider_options_new(void); +#define GARROW_TYPE_RANK_QUANTILE_OPTIONS (garrow_rank_quantile_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowRankQuantileOptions, + garrow_rank_quantile_options, + GARROW, + RANK_QUANTILE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowRankQuantileOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowRankQuantileOptions * +garrow_rank_quantile_options_new(void); +GARROW_AVAILABLE_IN_23_0 +GList * +garrow_rank_quantile_options_get_sort_keys(GArrowRankQuantileOptions *options); +GARROW_AVAILABLE_IN_23_0 +void +garrow_rank_quantile_options_set_sort_keys(GArrowRankQuantileOptions *options, + GList *sort_keys); +GARROW_AVAILABLE_IN_23_0 +void +garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, + GArrowSortKey *sort_key); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index c5f9359bbfa..0bade4eb3ed 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -274,3 +274,9 @@ garrow_pivot_wider_options_new_raw( const arrow::compute::PivotWiderOptions *arrow_options); arrow::compute::PivotWiderOptions * garrow_pivot_wider_options_get_raw(GArrowPivotWiderOptions *options); + +GArrowRankQuantileOptions * +garrow_rank_quantile_options_new_raw( + const arrow::compute::RankQuantileOptions *arrow_options); +arrow::compute::RankQuantileOptions * +garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options); diff --git a/c_glib/test/test-rank-quantile-options.rb b/c_glib/test/test-rank-quantile-options.rb new file mode 100644 index 00000000000..f4c2ed9f913 --- /dev/null +++ b/c_glib/test/test-rank-quantile-options.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRankQuantileOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::RankQuantileOptions.new + end + + def test_sort_keys + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + @options.sort_keys = sort_keys + assert_equal(sort_keys, @options.sort_keys) + end + + def test_add_sort_key + @options.add_sort_key(Arrow::SortKey.new("column1", :ascending)) + @options.add_sort_key(Arrow::SortKey.new("column2", :descending)) + assert_equal([ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ], + @options.sort_keys) + end + + def test_null_placement + assert_equal(Arrow::NullPlacement::AT_END, @options.null_placement) + @options.null_placement = :at_start + assert_equal(Arrow::NullPlacement::AT_START, @options.null_placement) + end + + def test_rank_quantile_function + args = [ + Arrow::ArrayDatum.new(build_int32_array([nil, 1, nil, 2, nil])), + ] + @options.null_placement = :at_start + rank_quantile_function = Arrow::Function.find("rank_quantile") + result = rank_quantile_function.execute(args, @options).value + expected = build_double_array([0.3, 0.7, 0.3, 0.9, 0.3]) + assert_equal(expected, result) + end +end + From 2670d07ee04de899e8d21cd36231ebabe83dd40a Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 17:52:52 +0100 Subject: [PATCH 21/33] Add GArrowReplaceSliceOptions --- c_glib/arrow-glib/compute.cpp | 211 ++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 ++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-replace-slice-options.rb | 59 ++++++ 4 files changed, 292 insertions(+) create mode 100644 c_glib/test/test-replace-slice-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index b3d8cc5b7e7..4e8ca7ada16 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -307,6 +307,9 @@ G_BEGIN_DECLS * #GArrowRankQuantileOptions is a class to customize the `rank_quantile` and * `rank_normal` functions. * + * #GArrowReplaceSliceOptions is a class to customize the `utf8_replace_slice` and + * `binary_replace_slice` functions. + * * There are many functions to compute data on an array. */ @@ -8794,6 +8797,188 @@ garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key); } +enum { + PROP_REPLACE_SLICE_OPTIONS_START = 1, + PROP_REPLACE_SLICE_OPTIONS_STOP, + PROP_REPLACE_SLICE_OPTIONS_REPLACEMENT, +}; + +typedef struct _GArrowReplaceSliceOptionsPrivate GArrowReplaceSliceOptionsPrivate; +struct _GArrowReplaceSliceOptionsPrivate +{ + gchar *replacement; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowReplaceSliceOptions, + garrow_replace_slice_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_REPLACE_SLICE_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_replace_slice_options_get_instance_private( \ + GARROW_REPLACE_SLICE_OPTIONS(object))) + +static void +garrow_replace_slice_options_dispose(GObject *object) +{ + auto priv = GARROW_REPLACE_SLICE_OPTIONS_GET_PRIVATE(object); + if (priv->replacement) { + g_free(priv->replacement); + priv->replacement = nullptr; + } + G_OBJECT_CLASS(garrow_replace_slice_options_parent_class)->dispose(object); +} + +static void +garrow_replace_slice_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_slice_options_get_raw(GARROW_REPLACE_SLICE_OPTIONS(object)); + auto priv = GARROW_REPLACE_SLICE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SLICE_OPTIONS_START: + options->start = g_value_get_int64(value); + break; + case PROP_REPLACE_SLICE_OPTIONS_STOP: + options->stop = g_value_get_int64(value); + break; + case PROP_REPLACE_SLICE_OPTIONS_REPLACEMENT: + { + const gchar *replacement = g_value_get_string(value); + if (priv->replacement) { + g_free(priv->replacement); + } + priv->replacement = g_strdup(replacement); + options->replacement = replacement ? replacement : ""; + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_slice_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_slice_options_get_raw(GARROW_REPLACE_SLICE_OPTIONS(object)); + auto priv = GARROW_REPLACE_SLICE_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SLICE_OPTIONS_START: + g_value_set_int64(value, options->start); + break; + case PROP_REPLACE_SLICE_OPTIONS_STOP: + g_value_set_int64(value, options->stop); + break; + case PROP_REPLACE_SLICE_OPTIONS_REPLACEMENT: + g_value_set_string(value, + priv->replacement ? priv->replacement + : options->replacement.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_slice_options_init(GArrowReplaceSliceOptions *object) +{ + auto priv = GARROW_REPLACE_SLICE_OPTIONS_GET_PRIVATE(object); + priv->replacement = nullptr; + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = static_cast( + new arrow::compute::ReplaceSliceOptions()); + // Sync the private replacement string with the C++ options + auto arrow_options = + garrow_replace_slice_options_get_raw(GARROW_REPLACE_SLICE_OPTIONS(object)); + priv->replacement = g_strdup(arrow_options->replacement.c_str()); +} + +static void +garrow_replace_slice_options_class_init(GArrowReplaceSliceOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_replace_slice_options_dispose; + gobject_class->set_property = garrow_replace_slice_options_set_property; + gobject_class->get_property = garrow_replace_slice_options_get_property; + + arrow::compute::ReplaceSliceOptions options; + + GParamSpec *spec; + /** + * GArrowReplaceSliceOptions:start: + * + * Index to start slicing at. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("start", + "Start", + "Index to start slicing at", + G_MININT64, + G_MAXINT64, + options.start, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_REPLACE_SLICE_OPTIONS_START, spec); + + /** + * GArrowReplaceSliceOptions:stop: + * + * Index to stop slicing at. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("stop", + "Stop", + "Index to stop slicing at", + G_MININT64, + G_MAXINT64, + options.stop, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_REPLACE_SLICE_OPTIONS_STOP, spec); + + /** + * GArrowReplaceSliceOptions:replacement: + * + * String to replace the slice with. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("replacement", + "Replacement", + "String to replace the slice with", + options.replacement.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SLICE_OPTIONS_REPLACEMENT, + spec); +} + +/** + * garrow_replace_slice_options_new: + * + * Returns: A newly created #GArrowReplaceSliceOptions. + * + * Since: 23.0.0 + */ +GArrowReplaceSliceOptions * +garrow_replace_slice_options_new(void) +{ + return GARROW_REPLACE_SLICE_OPTIONS( + g_object_new(GARROW_TYPE_REPLACE_SLICE_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9018,6 +9203,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_rank_quantile_options_new_raw(arrow_rank_quantile_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ReplaceSliceOptions") { + const auto arrow_replace_slice_options = + static_cast(arrow_options); + auto options = garrow_replace_slice_options_new_raw(arrow_replace_slice_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -9925,3 +10115,24 @@ garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowReplaceSliceOptions * +garrow_replace_slice_options_new_raw( + const arrow::compute::ReplaceSliceOptions *arrow_options) +{ + return GARROW_REPLACE_SLICE_OPTIONS(g_object_new(GARROW_TYPE_REPLACE_SLICE_OPTIONS, + "start", + arrow_options->start, + "stop", + arrow_options->stop, + "replacement", + arrow_options->replacement.c_str(), + NULL)); +} + +arrow::compute::ReplaceSliceOptions * +garrow_replace_slice_options_get_raw(GArrowReplaceSliceOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 541c4fe3b2a..35653e8f703 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1555,4 +1555,20 @@ void garrow_rank_quantile_options_add_sort_key(GArrowRankQuantileOptions *options, GArrowSortKey *sort_key); +#define GARROW_TYPE_REPLACE_SLICE_OPTIONS (garrow_replace_slice_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowReplaceSliceOptions, + garrow_replace_slice_options, + GARROW, + REPLACE_SLICE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowReplaceSliceOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowReplaceSliceOptions * +garrow_replace_slice_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 0bade4eb3ed..8f0c0cfbda5 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -280,3 +280,9 @@ garrow_rank_quantile_options_new_raw( const arrow::compute::RankQuantileOptions *arrow_options); arrow::compute::RankQuantileOptions * garrow_rank_quantile_options_get_raw(GArrowRankQuantileOptions *options); + +GArrowReplaceSliceOptions * +garrow_replace_slice_options_new_raw( + const arrow::compute::ReplaceSliceOptions *arrow_options); +arrow::compute::ReplaceSliceOptions * +garrow_replace_slice_options_get_raw(GArrowReplaceSliceOptions *options); diff --git a/c_glib/test/test-replace-slice-options.rb b/c_glib/test/test-replace-slice-options.rb new file mode 100644 index 00000000000..05db7b84078 --- /dev/null +++ b/c_glib/test/test-replace-slice-options.rb @@ -0,0 +1,59 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestReplaceSliceOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ReplaceSliceOptions.new + end + + def test_start_property + assert_equal(0, @options.start) + @options.start = 1 + assert_equal(1, @options.start) + end + + def test_stop_property + assert_equal(0, @options.stop) + @options.stop = 2 + assert_equal(2, @options.stop) + end + + def test_replacement_property + assert_equal("", @options.replacement) + @options.replacement = "XX" + assert_equal("XX", @options.replacement) + end + + def test_utf8_replace_slice_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["hello", "world"])), + ] + @options.start = 1 + @options.stop = 3 + @options.replacement = "XX" + utf8_replace_slice_function = Arrow::Function.find("utf8_replace_slice") + result = utf8_replace_slice_function.execute(args, @options).value + # Replace slice from index 1 to 3 (exclusive) with "XX" + # "hello" -> "hXXlo" (replaces "el" with "XX") + # "world" -> "wXXld" (replaces "or" with "XX") + expected = build_string_array(["hXXlo", "wXXld"]) + assert_equal(expected, result) + end +end + From 280d622f0f0e723ae4eb4aa3b31571ea8d0949a4 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 17:58:31 +0100 Subject: [PATCH 22/33] Add GReplaceSubstringOptions --- c_glib/arrow-glib/compute.cpp | 230 ++++++++++++++++++ c_glib/arrow-glib/compute.h | 17 ++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-replace-substring-options.rb | 68 ++++++ 4 files changed, 321 insertions(+) create mode 100644 c_glib/test/test-replace-substring-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 4e8ca7ada16..f381d24e0b3 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -310,6 +310,9 @@ G_BEGIN_DECLS * #GArrowReplaceSliceOptions is a class to customize the `utf8_replace_slice` and * `binary_replace_slice` functions. * + * #GArrowReplaceSubstringOptions is a class to customize the `replace_substring` and + * `replace_substring_regex` functions. + * * There are many functions to compute data on an array. */ @@ -8979,6 +8982,205 @@ garrow_replace_slice_options_new(void) g_object_new(GARROW_TYPE_REPLACE_SLICE_OPTIONS, NULL)); } +enum { + PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN = 1, + PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT, + PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS, +}; + +typedef struct _GArrowReplaceSubstringOptionsPrivate GArrowReplaceSubstringOptionsPrivate; +struct _GArrowReplaceSubstringOptionsPrivate +{ + gchar *pattern; + gchar *replacement; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowReplaceSubstringOptions, + garrow_replace_substring_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_replace_substring_options_get_instance_private( \ + GARROW_REPLACE_SUBSTRING_OPTIONS(object))) + +static void +garrow_replace_substring_options_dispose(GObject *object) +{ + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + if (priv->pattern) { + g_free(priv->pattern); + priv->pattern = nullptr; + } + if (priv->replacement) { + g_free(priv->replacement); + priv->replacement = nullptr; + } + G_OBJECT_CLASS(garrow_replace_substring_options_parent_class)->dispose(object); +} + +static void +garrow_replace_substring_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: + { + const gchar *pattern = g_value_get_string(value); + if (priv->pattern) { + g_free(priv->pattern); + } + priv->pattern = g_strdup(pattern); + options->pattern = pattern ? pattern : ""; + } + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: + { + const gchar *replacement = g_value_get_string(value); + if (priv->replacement) { + g_free(priv->replacement); + } + priv->replacement = g_strdup(replacement); + options->replacement = replacement ? replacement : ""; + } + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: + options->max_replacements = g_value_get_int64(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_substring_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN: + g_value_set_string(value, priv->pattern ? priv->pattern : options->pattern.c_str()); + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT: + g_value_set_string(value, + priv->replacement ? priv->replacement + : options->replacement.c_str()); + break; + case PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS: + g_value_set_int64(value, options->max_replacements); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_replace_substring_options_init(GArrowReplaceSubstringOptions *object) +{ + auto priv = GARROW_REPLACE_SUBSTRING_OPTIONS_GET_PRIVATE(object); + priv->pattern = nullptr; + priv->replacement = nullptr; + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = static_cast( + new arrow::compute::ReplaceSubstringOptions()); + // Sync the private strings with the C++ options + auto arrow_options = + garrow_replace_substring_options_get_raw(GARROW_REPLACE_SUBSTRING_OPTIONS(object)); + priv->pattern = g_strdup(arrow_options->pattern.c_str()); + priv->replacement = g_strdup(arrow_options->replacement.c_str()); +} + +static void +garrow_replace_substring_options_class_init(GArrowReplaceSubstringOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_replace_substring_options_dispose; + gobject_class->set_property = garrow_replace_substring_options_set_property; + gobject_class->get_property = garrow_replace_substring_options_get_property; + + arrow::compute::ReplaceSubstringOptions options; + + GParamSpec *spec; + /** + * GArrowReplaceSubstringOptions:pattern: + * + * Pattern to match, literal, or regular expression depending on which kernel is used. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string( + "pattern", + "Pattern", + "Pattern to match, literal, or regular expression depending on which kernel is used", + options.pattern.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_PATTERN, + spec); + + /** + * GArrowReplaceSubstringOptions:replacement: + * + * String to replace the pattern with. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("replacement", + "Replacement", + "String to replace the pattern with", + options.replacement.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_REPLACEMENT, + spec); + + /** + * GArrowReplaceSubstringOptions:max_replacements: + * + * Max number of substrings to replace (-1 means unbounded). + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("max_replacements", + "Max Replacements", + "Max number of substrings to replace (-1 means unbounded)", + G_MININT64, + G_MAXINT64, + options.max_replacements, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_REPLACE_SUBSTRING_OPTIONS_MAX_REPLACEMENTS, + spec); +} + +/** + * garrow_replace_substring_options_new: + * + * Returns: A newly created #GArrowReplaceSubstringOptions. + * + * Since: 23.0.0 + */ +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new(void) +{ + return GARROW_REPLACE_SUBSTRING_OPTIONS( + g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9208,6 +9410,12 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_replace_slice_options_new_raw(arrow_replace_slice_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ReplaceSubstringOptions") { + const auto arrow_replace_substring_options = + static_cast(arrow_options); + auto options = + garrow_replace_substring_options_new_raw(arrow_replace_substring_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10136,3 +10344,25 @@ garrow_replace_slice_options_get_raw(GArrowReplaceSliceOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new_raw( + const arrow::compute::ReplaceSubstringOptions *arrow_options) +{ + return GARROW_REPLACE_SUBSTRING_OPTIONS( + g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, + "pattern", + arrow_options->pattern.c_str(), + "replacement", + arrow_options->replacement.c_str(), + "max_replacements", + arrow_options->max_replacements, + NULL)); +} + +arrow::compute::ReplaceSubstringOptions * +garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 35653e8f703..8ceab71b0fc 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1571,4 +1571,21 @@ GARROW_AVAILABLE_IN_23_0 GArrowReplaceSliceOptions * garrow_replace_slice_options_new(void); +#define GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS \ + (garrow_replace_substring_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowReplaceSubstringOptions, + garrow_replace_substring_options, + GARROW, + REPLACE_SUBSTRING_OPTIONS, + GArrowFunctionOptions) +struct _GArrowReplaceSubstringOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 8f0c0cfbda5..a881e974299 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -286,3 +286,9 @@ garrow_replace_slice_options_new_raw( const arrow::compute::ReplaceSliceOptions *arrow_options); arrow::compute::ReplaceSliceOptions * garrow_replace_slice_options_get_raw(GArrowReplaceSliceOptions *options); + +GArrowReplaceSubstringOptions * +garrow_replace_substring_options_new_raw( + const arrow::compute::ReplaceSubstringOptions *arrow_options); +arrow::compute::ReplaceSubstringOptions * +garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options); diff --git a/c_glib/test/test-replace-substring-options.rb b/c_glib/test/test-replace-substring-options.rb new file mode 100644 index 00000000000..838c5c261e1 --- /dev/null +++ b/c_glib/test/test-replace-substring-options.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestReplaceSubstringOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ReplaceSubstringOptions.new + end + + def test_pattern_property + assert_equal("", @options.pattern) + @options.pattern = "foo" + assert_equal("foo", @options.pattern) + end + + def test_replacement_property + assert_equal("", @options.replacement) + @options.replacement = "bar" + assert_equal("bar", @options.replacement) + end + + def test_max_replacements_property + assert_equal(-1, @options.max_replacements) + @options.max_replacements = 1 + assert_equal(1, @options.max_replacements) + end + + def test_replace_substring_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["foo", "this foo that foo", "bar"])), + ] + @options.pattern = "foo" + @options.replacement = "baz" + replace_substring_function = Arrow::Function.find("replace_substring") + result = replace_substring_function.execute(args, @options).value + expected = build_string_array(["baz", "this baz that baz", "bar"]) + assert_equal(expected, result) + end + + def test_replace_substring_with_max_replacements + args = [ + Arrow::ArrayDatum.new(build_string_array(["this foo that foo"])), + ] + @options.pattern = "foo" + @options.replacement = "baz" + @options.max_replacements = 1 + replace_substring_function = Arrow::Function.find("replace_substring") + result = replace_substring_function.execute(args, @options).value + expected = build_string_array(["this baz that foo"]) + assert_equal(expected, result) + end +end + From bf02ca399f89a808a66c4f1e19dbcd7afa282ca1 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 21:52:15 +0100 Subject: [PATCH 23/33] Add GArrowRoundBinaryOptions --- c_glib/arrow-glib/compute.cpp | 118 +++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 6 ++ c_glib/test/test-round-binary-options.rb | 43 +++++++++ 4 files changed, 183 insertions(+) create mode 100644 c_glib/test/test-round-binary-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index f381d24e0b3..73378881d28 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -313,6 +313,8 @@ G_BEGIN_DECLS * #GArrowReplaceSubstringOptions is a class to customize the `replace_substring` and * `replace_substring_regex` functions. * + * #GArrowRoundBinaryOptions is a class to customize the `round_binary` function. + * * There are many functions to compute data on an array. */ @@ -9181,6 +9183,99 @@ garrow_replace_substring_options_new(void) g_object_new(GARROW_TYPE_REPLACE_SUBSTRING_OPTIONS, NULL)); } +enum { + PROP_ROUND_BINARY_OPTIONS_MODE = 1, +}; + +G_DEFINE_TYPE(GArrowRoundBinaryOptions, + garrow_round_binary_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_round_binary_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_round_binary_options_get_raw(GARROW_ROUND_BINARY_OPTIONS(object)); + + switch (prop_id) { + case PROP_ROUND_BINARY_OPTIONS_MODE: + options->round_mode = static_cast(g_value_get_enum(value)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_round_binary_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_round_binary_options_get_raw(GARROW_ROUND_BINARY_OPTIONS(object)); + + switch (prop_id) { + case PROP_ROUND_BINARY_OPTIONS_MODE: + g_value_set_enum(value, static_cast(options->round_mode)); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_round_binary_options_init(GArrowRoundBinaryOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = static_cast( + new arrow::compute::RoundBinaryOptions()); +} + +static void +garrow_round_binary_options_class_init(GArrowRoundBinaryOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_round_binary_options_set_property; + gobject_class->get_property = garrow_round_binary_options_get_property; + + arrow::compute::RoundBinaryOptions options; + + GParamSpec *spec; + /** + * GArrowRoundBinaryOptions:mode: + * + * The rounding and tie-breaking mode. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("mode", + "Mode", + "The rounding and tie-breaking mode", + GARROW_TYPE_ROUND_MODE, + static_cast(options.round_mode), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ROUND_BINARY_OPTIONS_MODE, spec); +} + +/** + * garrow_round_binary_options_new: + * + * Returns: A newly created #GArrowRoundBinaryOptions. + * + * Since: 23.0.0 + */ +GArrowRoundBinaryOptions * +garrow_round_binary_options_new(void) +{ + return GARROW_ROUND_BINARY_OPTIONS( + g_object_new(GARROW_TYPE_ROUND_BINARY_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9416,6 +9511,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt auto options = garrow_replace_substring_options_new_raw(arrow_replace_substring_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "RoundBinaryOptions") { + const auto arrow_round_binary_options = + static_cast(arrow_options); + auto options = garrow_round_binary_options_new_raw(arrow_round_binary_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10366,3 +10466,21 @@ garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowRoundBinaryOptions * +garrow_round_binary_options_new_raw( + const arrow::compute::RoundBinaryOptions *arrow_options) +{ + return GARROW_ROUND_BINARY_OPTIONS( + g_object_new(GARROW_TYPE_ROUND_BINARY_OPTIONS, + "mode", + static_cast(arrow_options->round_mode), + NULL)); +} + +arrow::compute::RoundBinaryOptions * +garrow_round_binary_options_get_raw(GArrowRoundBinaryOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 8ceab71b0fc..ffc6eef2cfa 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1588,4 +1588,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowReplaceSubstringOptions * garrow_replace_substring_options_new(void); +#define GARROW_TYPE_ROUND_BINARY_OPTIONS (garrow_round_binary_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowRoundBinaryOptions, + garrow_round_binary_options, + GARROW, + ROUND_BINARY_OPTIONS, + GArrowFunctionOptions) +struct _GArrowRoundBinaryOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowRoundBinaryOptions * +garrow_round_binary_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index a881e974299..8535c35c838 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -292,3 +292,9 @@ garrow_replace_substring_options_new_raw( const arrow::compute::ReplaceSubstringOptions *arrow_options); arrow::compute::ReplaceSubstringOptions * garrow_replace_substring_options_get_raw(GArrowReplaceSubstringOptions *options); + +GArrowRoundBinaryOptions * +garrow_round_binary_options_new_raw( + const arrow::compute::RoundBinaryOptions *arrow_options); +arrow::compute::RoundBinaryOptions * +garrow_round_binary_options_get_raw(GArrowRoundBinaryOptions *options); diff --git a/c_glib/test/test-round-binary-options.rb b/c_glib/test/test-round-binary-options.rb new file mode 100644 index 00000000000..4974acabc81 --- /dev/null +++ b/c_glib/test/test-round-binary-options.rb @@ -0,0 +1,43 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRoundBinaryOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::RoundBinaryOptions.new + end + + def test_mode + assert_equal(Arrow::RoundMode::HALF_TO_EVEN, @options.mode) + @options.mode = :down + assert_equal(Arrow::RoundMode::DOWN, @options.mode) + end + + def test_round_binary_function + args = [ + Arrow::ArrayDatum.new(build_double_array([5.0])), + Arrow::ArrayDatum.new(build_int32_array([-1])), + ] + @options.mode = :half_towards_zero + round_binary_function = Arrow::Function.find("round_binary") + result = round_binary_function.execute(args, @options).value + expected = build_double_array([0.0]) + assert_equal(expected, result) + end +end + From b9a9ae801537d6c0793e9ada4a35ccd2cd1261e9 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 22:08:20 +0100 Subject: [PATCH 24/33] Add GArrowRoundTemporalOptions --- c_glib/arrow-glib/compute.cpp | 231 +++++++++++++++++++++ c_glib/arrow-glib/compute.h | 50 +++++ c_glib/arrow-glib/compute.hpp | 6 + c_glib/test/test-round-temporal-options.rb | 68 ++++++ 4 files changed, 355 insertions(+) create mode 100644 c_glib/test/test-round-temporal-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 73378881d28..f818fe3402f 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -315,6 +315,9 @@ G_BEGIN_DECLS * * #GArrowRoundBinaryOptions is a class to customize the `round_binary` function. * + * #GArrowRoundTemporalOptions is a class to customize the `round_temporal`, + * `floor_temporal`, and `ceil_temporal` functions. + * * There are many functions to compute data on an array. */ @@ -9276,6 +9279,203 @@ garrow_round_binary_options_new(void) g_object_new(GARROW_TYPE_ROUND_BINARY_OPTIONS, NULL)); } +enum { + PROP_ROUND_TEMPORAL_OPTIONS_MULTIPLE = 1, + PROP_ROUND_TEMPORAL_OPTIONS_UNIT, + PROP_ROUND_TEMPORAL_OPTIONS_WEEK_STARTS_MONDAY, + PROP_ROUND_TEMPORAL_OPTIONS_CEIL_IS_STRICTLY_GREATER, + PROP_ROUND_TEMPORAL_OPTIONS_CALENDAR_BASED_ORIGIN, +}; + +G_DEFINE_TYPE(GArrowRoundTemporalOptions, + garrow_round_temporal_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_round_temporal_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_round_temporal_options_get_raw(GARROW_ROUND_TEMPORAL_OPTIONS(object)); + + switch (prop_id) { + case PROP_ROUND_TEMPORAL_OPTIONS_MULTIPLE: + options->multiple = g_value_get_int(value); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_UNIT: + options->unit = static_cast(g_value_get_enum(value)); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_WEEK_STARTS_MONDAY: + options->week_starts_monday = g_value_get_boolean(value); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_CEIL_IS_STRICTLY_GREATER: + options->ceil_is_strictly_greater = g_value_get_boolean(value); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_CALENDAR_BASED_ORIGIN: + options->calendar_based_origin = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_round_temporal_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = + garrow_round_temporal_options_get_raw(GARROW_ROUND_TEMPORAL_OPTIONS(object)); + + switch (prop_id) { + case PROP_ROUND_TEMPORAL_OPTIONS_MULTIPLE: + g_value_set_int(value, options->multiple); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_UNIT: + g_value_set_enum(value, static_cast(options->unit)); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_WEEK_STARTS_MONDAY: + g_value_set_boolean(value, options->week_starts_monday); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_CEIL_IS_STRICTLY_GREATER: + g_value_set_boolean(value, options->ceil_is_strictly_greater); + break; + case PROP_ROUND_TEMPORAL_OPTIONS_CALENDAR_BASED_ORIGIN: + g_value_set_boolean(value, options->calendar_based_origin); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_round_temporal_options_init(GArrowRoundTemporalOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = static_cast( + new arrow::compute::RoundTemporalOptions()); +} + +static void +garrow_round_temporal_options_class_init(GArrowRoundTemporalOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_round_temporal_options_set_property; + gobject_class->get_property = garrow_round_temporal_options_get_property; + + arrow::compute::RoundTemporalOptions options; + + GParamSpec *spec; + /** + * GArrowRoundTemporalOptions:multiple: + * + * Number of units to round to. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int("multiple", + "Multiple", + "Number of units to round to", + G_MININT, + G_MAXINT, + options.multiple, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ROUND_TEMPORAL_OPTIONS_MULTIPLE, + spec); + + /** + * GArrowRoundTemporalOptions:unit: + * + * The unit used for rounding of time. + * + * Since: 23.0.0 + */ + spec = g_param_spec_enum("unit", + "Unit", + "The unit used for rounding of time", + GARROW_TYPE_CALENDAR_UNIT, + static_cast(options.unit), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ROUND_TEMPORAL_OPTIONS_UNIT, spec); + + /** + * GArrowRoundTemporalOptions:week-starts-monday: + * + * What day does the week start with (Monday=true, Sunday=false). + * + * Since: 23.0.0 + */ + spec = + g_param_spec_boolean("week-starts-monday", + "Week Starts Monday", + "What day does the week start with (Monday=true, Sunday=false)", + options.week_starts_monday, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ROUND_TEMPORAL_OPTIONS_WEEK_STARTS_MONDAY, + spec); + + /** + * GArrowRoundTemporalOptions:ceil-is-strictly-greater: + * + * Enable this flag to return a rounded value that is strictly greater than the input. + * This applies for ceiling only. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "ceil-is-strictly-greater", + "Ceil Is Strictly Greater", + "Enable this flag to return a rounded value that is strictly greater than the input", + options.ceil_is_strictly_greater, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ROUND_TEMPORAL_OPTIONS_CEIL_IS_STRICTLY_GREATER, + spec); + + /** + * GArrowRoundTemporalOptions:calendar-based-origin: + * + * By default time is rounded to a multiple of units since 1970-01-01T00:00:00. + * By setting calendar_based_origin to true, time will be rounded to a number + * of units since the last greater calendar unit. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "calendar-based-origin", + "Calendar Based Origin", + "By default time is rounded to a multiple of units since 1970-01-01T00:00:00. By " + "setting calendar_based_origin to true, time will be rounded to a number of units " + "since the last greater calendar unit", + options.calendar_based_origin, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_ROUND_TEMPORAL_OPTIONS_CALENDAR_BASED_ORIGIN, + spec); +} + +/** + * garrow_round_temporal_options_new: + * + * Returns: A newly created #GArrowRoundTemporalOptions. + * + * Since: 23.0.0 + */ +GArrowRoundTemporalOptions * +garrow_round_temporal_options_new(void) +{ + return GARROW_ROUND_TEMPORAL_OPTIONS( + g_object_new(GARROW_TYPE_ROUND_TEMPORAL_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9516,6 +9716,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_round_binary_options_new_raw(arrow_round_binary_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "RoundTemporalOptions") { + const auto arrow_round_temporal_options = + static_cast(arrow_options); + auto options = garrow_round_temporal_options_new_raw(arrow_round_temporal_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10484,3 +10689,29 @@ garrow_round_binary_options_get_raw(GArrowRoundBinaryOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowRoundTemporalOptions * +garrow_round_temporal_options_new_raw( + const arrow::compute::RoundTemporalOptions *arrow_options) +{ + return GARROW_ROUND_TEMPORAL_OPTIONS( + g_object_new(GARROW_TYPE_ROUND_TEMPORAL_OPTIONS, + "multiple", + arrow_options->multiple, + "unit", + static_cast(arrow_options->unit), + "week-starts-monday", + arrow_options->week_starts_monday, + "ceil-is-strictly-greater", + arrow_options->ceil_is_strictly_greater, + "calendar-based-origin", + arrow_options->calendar_based_origin, + NULL)); +} + +arrow::compute::RoundTemporalOptions * +garrow_round_temporal_options_get_raw(GArrowRoundTemporalOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index ffc6eef2cfa..4f0e51c21f6 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1604,4 +1604,54 @@ GARROW_AVAILABLE_IN_23_0 GArrowRoundBinaryOptions * garrow_round_binary_options_new(void); +/** + * GArrowCalendarUnit: + * @GARROW_CALENDAR_UNIT_NANOSECOND: Nanosecond + * @GARROW_CALENDAR_UNIT_MICROSECOND: Microsecond + * @GARROW_CALENDAR_UNIT_MILLISECOND: Millisecond + * @GARROW_CALENDAR_UNIT_SECOND: Second + * @GARROW_CALENDAR_UNIT_MINUTE: Minute + * @GARROW_CALENDAR_UNIT_HOUR: Hour + * @GARROW_CALENDAR_UNIT_DAY: Day + * @GARROW_CALENDAR_UNIT_WEEK: Week + * @GARROW_CALENDAR_UNIT_MONTH: Month + * @GARROW_CALENDAR_UNIT_QUARTER: Quarter + * @GARROW_CALENDAR_UNIT_YEAR: Year + * + * They correspond to the values of `arrow::compute::CalendarUnit`. + * + * Since: 23.0.0 + */ +typedef enum { + GARROW_CALENDAR_UNIT_NANOSECOND, + GARROW_CALENDAR_UNIT_MICROSECOND, + GARROW_CALENDAR_UNIT_MILLISECOND, + GARROW_CALENDAR_UNIT_SECOND, + GARROW_CALENDAR_UNIT_MINUTE, + GARROW_CALENDAR_UNIT_HOUR, + GARROW_CALENDAR_UNIT_DAY, + GARROW_CALENDAR_UNIT_WEEK, + GARROW_CALENDAR_UNIT_MONTH, + GARROW_CALENDAR_UNIT_QUARTER, + GARROW_CALENDAR_UNIT_YEAR, +} GArrowCalendarUnit; + +#define GARROW_TYPE_CALENDAR_UNIT (garrow_calendar_unit_get_type()) + +#define GARROW_TYPE_ROUND_TEMPORAL_OPTIONS (garrow_round_temporal_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowRoundTemporalOptions, + garrow_round_temporal_options, + GARROW, + ROUND_TEMPORAL_OPTIONS, + GArrowFunctionOptions) +struct _GArrowRoundTemporalOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowRoundTemporalOptions * +garrow_round_temporal_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 8535c35c838..dc49f59bd0a 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -298,3 +298,9 @@ garrow_round_binary_options_new_raw( const arrow::compute::RoundBinaryOptions *arrow_options); arrow::compute::RoundBinaryOptions * garrow_round_binary_options_get_raw(GArrowRoundBinaryOptions *options); + +GArrowRoundTemporalOptions * +garrow_round_temporal_options_new_raw( + const arrow::compute::RoundTemporalOptions *arrow_options); +arrow::compute::RoundTemporalOptions * +garrow_round_temporal_options_get_raw(GArrowRoundTemporalOptions *options); diff --git a/c_glib/test/test-round-temporal-options.rb b/c_glib/test/test-round-temporal-options.rb new file mode 100644 index 00000000000..8e12b84bd60 --- /dev/null +++ b/c_glib/test/test-round-temporal-options.rb @@ -0,0 +1,68 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestRoundTemporalOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::RoundTemporalOptions.new + end + + def test_multiple + assert_equal(1, @options.multiple) + @options.multiple = 3 + assert_equal(3, @options.multiple) + end + + def test_unit + assert_equal(Arrow::CalendarUnit::DAY, @options.unit) + @options.unit = :hour + assert_equal(Arrow::CalendarUnit::HOUR, @options.unit) + end + + def test_week_starts_monday + assert_equal(true, @options.week_starts_monday?) + @options.week_starts_monday = false + assert_equal(false, @options.week_starts_monday?) + end + + def test_ceil_is_strictly_greater + assert_equal(false, @options.ceil_is_strictly_greater?) + @options.ceil_is_strictly_greater = true + assert_equal(true, @options.ceil_is_strictly_greater?) + end + + def test_calendar_based_origin + assert_equal(false, @options.calendar_based_origin?) + @options.calendar_based_origin = true + assert_equal(true, @options.calendar_based_origin?) + end + + def test_round_temporal_function + # 1504953190000 = 2017-09-09 10:33:10 UTC + args = [ + Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1504953190000])), + ] + @options.multiple = 5 + @options.unit = :minute + round_temporal_function = Arrow::Function.find("round_temporal") + result = round_temporal_function.execute(args, @options).value + # 1504953300000 = 2017-09-09 10:35:00 UTC + expected = build_timestamp_array(:milli, [1504953300000]) + assert_equal(expected, result) + end +end From 4d0c06fb2174a52e555f1d19501ebc80639d19c1 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 22:24:49 +0100 Subject: [PATCH 25/33] Add GArrowSelectKOptions --- c_glib/arrow-glib/compute.cpp | 164 +++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 29 +++++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-select-k-options.rb | 62 ++++++++++ 4 files changed, 260 insertions(+) create mode 100644 c_glib/test/test-select-k-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index f818fe3402f..25de4f1636b 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -318,6 +318,8 @@ G_BEGIN_DECLS * #GArrowRoundTemporalOptions is a class to customize the `round_temporal`, * `floor_temporal`, and `ceil_temporal` functions. * + * #GArrowSelectKOptions is a class to customize the `select_k_unstable` function. + * * There are many functions to compute data on an array. */ @@ -9476,6 +9478,146 @@ garrow_round_temporal_options_new(void) g_object_new(GARROW_TYPE_ROUND_TEMPORAL_OPTIONS, NULL)); } +enum { + PROP_SELECT_K_OPTIONS_K = 1, +}; + +G_DEFINE_TYPE(GArrowSelectKOptions, garrow_select_k_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_select_k_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_select_k_options_get_raw(GARROW_SELECT_K_OPTIONS(object)); + + switch (prop_id) { + case PROP_SELECT_K_OPTIONS_K: + options->k = g_value_get_int64(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_select_k_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_select_k_options_get_raw(GARROW_SELECT_K_OPTIONS(object)); + + switch (prop_id) { + case PROP_SELECT_K_OPTIONS_K: + g_value_set_int64(value, options->k); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_select_k_options_init(GArrowSelectKOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::SelectKOptions()); +} + +static void +garrow_select_k_options_class_init(GArrowSelectKOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_select_k_options_set_property; + gobject_class->get_property = garrow_select_k_options_get_property; + + arrow::compute::SelectKOptions options; + + GParamSpec *spec; + /** + * GArrowSelectKOptions:k: + * + * The number of k elements to keep. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("k", + "K", + "The number of k elements to keep", + G_MININT64, + G_MAXINT64, + options.k, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SELECT_K_OPTIONS_K, spec); +} + +/** + * garrow_select_k_options_new: + * + * Returns: A newly created #GArrowSelectKOptions. + * + * Since: 23.0.0 + */ +GArrowSelectKOptions * +garrow_select_k_options_new(void) +{ + return GARROW_SELECT_K_OPTIONS(g_object_new(GARROW_TYPE_SELECT_K_OPTIONS, NULL)); +} + +/** + * garrow_select_k_options_get_sort_keys: + * @options: A #GArrowSelectKOptions. + * + * Returns: (transfer full) (element-type GArrowSortKey): + * The sort keys to be used. + * + * Since: 23.0.0 + */ +GList * +garrow_select_k_options_get_sort_keys(GArrowSelectKOptions *options) +{ + auto arrow_options = garrow_select_k_options_get_raw(options); + return garrow_sort_keys_new_raw(arrow_options->sort_keys); +} + +/** + * garrow_select_k_options_set_sort_keys: + * @options: A #GArrowSelectKOptions. + * @sort_keys: (element-type GArrowSortKey): The sort keys to be used. + * + * Set sort keys to be used. + * + * Since: 23.0.0 + */ +void +garrow_select_k_options_set_sort_keys(GArrowSelectKOptions *options, GList *sort_keys) +{ + auto arrow_options = garrow_select_k_options_get_raw(options); + garrow_raw_sort_keys_set(arrow_options->sort_keys, sort_keys); +} + +/** + * garrow_select_k_options_add_sort_key: + * @options: A #GArrowSelectKOptions. + * @sort_key: The sort key to be added. + * + * Add a sort key to be used. + * + * Since: 23.0.0 + */ +void +garrow_select_k_options_add_sort_key(GArrowSelectKOptions *options, + GArrowSortKey *sort_key) +{ + auto arrow_options = garrow_select_k_options_get_raw(options); + garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key); +} + G_END_DECLS arrow::Result @@ -9721,6 +9863,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_round_temporal_options_new_raw(arrow_round_temporal_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "SelectKOptions") { + const auto arrow_select_k_options = + static_cast(arrow_options); + auto options = garrow_select_k_options_new_raw(arrow_select_k_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10715,3 +10862,20 @@ garrow_round_temporal_options_get_raw(GArrowRoundTemporalOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowSelectKOptions * +garrow_select_k_options_new_raw(const arrow::compute::SelectKOptions *arrow_options) +{ + auto options = GARROW_SELECT_K_OPTIONS( + g_object_new(GARROW_TYPE_SELECT_K_OPTIONS, "k", arrow_options->k, NULL)); + auto arrow_new_options = garrow_select_k_options_get_raw(options); + arrow_new_options->sort_keys = arrow_options->sort_keys; + return options; +} + +arrow::compute::SelectKOptions * +garrow_select_k_options_get_raw(GArrowSelectKOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 4f0e51c21f6..f6680c386c6 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1654,4 +1654,33 @@ GARROW_AVAILABLE_IN_23_0 GArrowRoundTemporalOptions * garrow_round_temporal_options_new(void); +#define GARROW_TYPE_SELECT_K_OPTIONS (garrow_select_k_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowSelectKOptions, + garrow_select_k_options, + GARROW, + SELECT_K_OPTIONS, + GArrowFunctionOptions) +struct _GArrowSelectKOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowSelectKOptions * +garrow_select_k_options_new(void); + +GARROW_AVAILABLE_IN_23_0 +GList * +garrow_select_k_options_get_sort_keys(GArrowSelectKOptions *options); + +GARROW_AVAILABLE_IN_23_0 +void +garrow_select_k_options_set_sort_keys(GArrowSelectKOptions *options, GList *sort_keys); + +GARROW_AVAILABLE_IN_23_0 +void +garrow_select_k_options_add_sort_key(GArrowSelectKOptions *options, + GArrowSortKey *sort_key); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index dc49f59bd0a..520a6f31997 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -304,3 +304,8 @@ garrow_round_temporal_options_new_raw( const arrow::compute::RoundTemporalOptions *arrow_options); arrow::compute::RoundTemporalOptions * garrow_round_temporal_options_get_raw(GArrowRoundTemporalOptions *options); + +GArrowSelectKOptions * +garrow_select_k_options_new_raw(const arrow::compute::SelectKOptions *arrow_options); +arrow::compute::SelectKOptions * +garrow_select_k_options_get_raw(GArrowSelectKOptions *options); diff --git a/c_glib/test/test-select-k-options.rb b/c_glib/test/test-select-k-options.rb new file mode 100644 index 00000000000..0e33ea22d66 --- /dev/null +++ b/c_glib/test/test-select-k-options.rb @@ -0,0 +1,62 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSelectKOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::SelectKOptions.new + end + + def test_k + assert_equal(-1, @options.k) + @options.k = 3 + assert_equal(3, @options.k) + end + + def test_sort_keys + sort_keys = [ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ] + @options.sort_keys = sort_keys + assert_equal(sort_keys, @options.sort_keys) + end + + def test_add_sort_key + @options.add_sort_key(Arrow::SortKey.new("column1", :ascending)) + @options.add_sort_key(Arrow::SortKey.new("column2", :descending)) + assert_equal([ + Arrow::SortKey.new("column1", :ascending), + Arrow::SortKey.new("column2", :descending), + ], + @options.sort_keys) + end + + def test_select_k_unstable_function + input_array = build_int32_array([5, 2, 8, 1, 9, 3]) + args = [ + Arrow::ArrayDatum.new(input_array), + ] + @options.k = 3 + @options.add_sort_key(Arrow::SortKey.new("dummy", :descending)) + select_k_unstable_function = Arrow::Function.find("select_k_unstable") + result = select_k_unstable_function.execute(args, @options).value + assert_equal(build_uint64_array([4, 2, 0]), result) + end +end + From ffa7073200555cc5008000e0ace3bb64606248ae Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 22:43:57 +0100 Subject: [PATCH 26/33] Add GArrowSkewOptions --- c_glib/arrow-glib/compute.cpp | 165 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-skew-options.rb | 61 ++++++++++++ 4 files changed, 244 insertions(+) create mode 100644 c_glib/test/test-skew-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 25de4f1636b..24618f8e456 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -320,6 +320,8 @@ G_BEGIN_DECLS * * #GArrowSelectKOptions is a class to customize the `select_k_unstable` function. * + * #GArrowSkewOptions is a class to customize the `skew` and `kurtosis` functions. + * * There are many functions to compute data on an array. */ @@ -9618,6 +9620,143 @@ garrow_select_k_options_add_sort_key(GArrowSelectKOptions *options, garrow_raw_sort_keys_add(arrow_options->sort_keys, sort_key); } +enum { + PROP_SKEW_OPTIONS_SKIP_NULLS = 1, + PROP_SKEW_OPTIONS_BIASED, + PROP_SKEW_OPTIONS_MIN_COUNT, +}; + +G_DEFINE_TYPE(GArrowSkewOptions, garrow_skew_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_skew_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_skew_options_get_raw(GARROW_SKEW_OPTIONS(object)); + + switch (prop_id) { + case PROP_SKEW_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + case PROP_SKEW_OPTIONS_BIASED: + options->biased = g_value_get_boolean(value); + break; + case PROP_SKEW_OPTIONS_MIN_COUNT: + options->min_count = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_skew_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_skew_options_get_raw(GARROW_SKEW_OPTIONS(object)); + + switch (prop_id) { + case PROP_SKEW_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + case PROP_SKEW_OPTIONS_BIASED: + g_value_set_boolean(value, options->biased); + break; + case PROP_SKEW_OPTIONS_MIN_COUNT: + g_value_set_uint(value, options->min_count); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_skew_options_init(GArrowSkewOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::SkewOptions()); +} + +static void +garrow_skew_options_class_init(GArrowSkewOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_skew_options_set_property; + gobject_class->get_property = garrow_skew_options_get_property; + + arrow::compute::SkewOptions options; + + GParamSpec *spec; + /** + * GArrowSkewOptions:skip-nulls: + * + * Whether NULLs are skipped or not. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip NULLs", + "Whether NULLs are skipped or not", + options.skip_nulls, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SKEW_OPTIONS_SKIP_NULLS, spec); + + /** + * GArrowSkewOptions:biased: + * + * Whether the calculated value is biased. + * If false, the value computed includes a correction factor to reduce bias. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_boolean("biased", + "Biased", + "Whether the calculated value is biased. If false, the value " + "computed includes a correction factor to reduce bias", + options.biased, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SKEW_OPTIONS_BIASED, spec); + + /** + * GArrowSkewOptions:min-count: + * + * If less than this many non-null values are observed, emit null. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_uint("min-count", + "Min count", + "If less than this many non-null values are observed, emit null", + 0, + G_MAXUINT, + options.min_count, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SKEW_OPTIONS_MIN_COUNT, spec); +} + +/** + * garrow_skew_options_new: + * + * Returns: A newly created #GArrowSkewOptions. + * + * Since: 23.0.0 + */ +GArrowSkewOptions * +garrow_skew_options_new(void) +{ + return GARROW_SKEW_OPTIONS(g_object_new(GARROW_TYPE_SKEW_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -9868,6 +10007,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_select_k_options_new_raw(arrow_select_k_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "SkewOptions") { + const auto arrow_skew_options = + static_cast(arrow_options); + auto options = garrow_skew_options_new_raw(arrow_skew_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -10879,3 +11023,24 @@ garrow_select_k_options_get_raw(GArrowSelectKOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowSkewOptions * +garrow_skew_options_new_raw(const arrow::compute::SkewOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_SKEW_OPTIONS, + "skip-nulls", + arrow_options->skip_nulls, + "biased", + arrow_options->biased, + "min-count", + arrow_options->min_count, + NULL); + return GARROW_SKEW_OPTIONS(options); +} + +arrow::compute::SkewOptions * +garrow_skew_options_get_raw(GArrowSkewOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index f6680c386c6..f10edc90584 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1683,4 +1683,17 @@ void garrow_select_k_options_add_sort_key(GArrowSelectKOptions *options, GArrowSortKey *sort_key); +#define GARROW_TYPE_SKEW_OPTIONS (garrow_skew_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowSkewOptions, garrow_skew_options, GARROW, SKEW_OPTIONS, GArrowFunctionOptions) +struct _GArrowSkewOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowSkewOptions * +garrow_skew_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 520a6f31997..f4efaf34f3f 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -309,3 +309,8 @@ GArrowSelectKOptions * garrow_select_k_options_new_raw(const arrow::compute::SelectKOptions *arrow_options); arrow::compute::SelectKOptions * garrow_select_k_options_get_raw(GArrowSelectKOptions *options); + +GArrowSkewOptions * +garrow_skew_options_new_raw(const arrow::compute::SkewOptions *arrow_options); +arrow::compute::SkewOptions * +garrow_skew_options_get_raw(GArrowSkewOptions *options); diff --git a/c_glib/test/test-skew-options.rb b/c_glib/test/test-skew-options.rb new file mode 100644 index 00000000000..40cf04add65 --- /dev/null +++ b/c_glib/test/test-skew-options.rb @@ -0,0 +1,61 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSkewOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::SkewOptions.new + end + + def test_skip_nulls + assert do + @options.skip_nulls? + end + @options.skip_nulls = false + assert do + not @options.skip_nulls? + end + end + + def test_biased + assert do + @options.biased? + end + @options.biased = false + assert do + not @options.biased? + end + end + + def test_min_count + assert_equal(0, @options.min_count) + @options.min_count = 1 + assert_equal(1, @options.min_count) + end + + def test_skew_function + args = [ + Arrow::ArrayDatum.new(build_double_array([1.0, 1.0, 2.0])), + ] + @options.min_count = 4 + skew_function = Arrow::Function.find("skew") + result = skew_function.execute(args, @options).value + assert_equal(0.0, result.value) + end +end + From 0d66a0a0e4ed219b1a68ba1015c60d8dbc194d10 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 22:51:58 +0100 Subject: [PATCH 27/33] Add GArrowSliceOptions --- c_glib/arrow-glib/compute.cpp | 166 ++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-slice-options.rb | 55 ++++++++++ 4 files changed, 239 insertions(+) create mode 100644 c_glib/test/test-slice-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 24618f8e456..7fa35b4e5d9 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -322,6 +322,9 @@ G_BEGIN_DECLS * * #GArrowSkewOptions is a class to customize the `skew` and `kurtosis` functions. * + * #GArrowSliceOptions is a class to customize the `utf8_slice_codeunits` and + * `binary_slice` functions. + * * There are many functions to compute data on an array. */ @@ -9757,6 +9760,143 @@ garrow_skew_options_new(void) return GARROW_SKEW_OPTIONS(g_object_new(GARROW_TYPE_SKEW_OPTIONS, NULL)); } +enum { + PROP_SLICE_OPTIONS_START = 1, + PROP_SLICE_OPTIONS_STOP, + PROP_SLICE_OPTIONS_STEP, +}; + +G_DEFINE_TYPE(GArrowSliceOptions, garrow_slice_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_slice_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_slice_options_get_raw(GARROW_SLICE_OPTIONS(object)); + + switch (prop_id) { + case PROP_SLICE_OPTIONS_START: + options->start = g_value_get_int64(value); + break; + case PROP_SLICE_OPTIONS_STOP: + options->stop = g_value_get_int64(value); + break; + case PROP_SLICE_OPTIONS_STEP: + options->step = g_value_get_int64(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_slice_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_slice_options_get_raw(GARROW_SLICE_OPTIONS(object)); + + switch (prop_id) { + case PROP_SLICE_OPTIONS_START: + g_value_set_int64(value, options->start); + break; + case PROP_SLICE_OPTIONS_STOP: + g_value_set_int64(value, options->stop); + break; + case PROP_SLICE_OPTIONS_STEP: + g_value_set_int64(value, options->step); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_slice_options_init(GArrowSliceOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::SliceOptions()); +} + +static void +garrow_slice_options_class_init(GArrowSliceOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_slice_options_set_property; + gobject_class->get_property = garrow_slice_options_get_property; + + arrow::compute::SliceOptions options; + + GParamSpec *spec; + /** + * GArrowSliceOptions:start: + * + * Index to start slicing at (inclusive). + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("start", + "Start", + "Index to start slicing at (inclusive)", + G_MININT64, + G_MAXINT64, + options.start, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SLICE_OPTIONS_START, spec); + + /** + * GArrowSliceOptions:stop: + * + * Index to stop slicing at (exclusive). + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("stop", + "Stop", + "Index to stop slicing at (exclusive)", + G_MININT64, + G_MAXINT64, + options.stop, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SLICE_OPTIONS_STOP, spec); + + /** + * GArrowSliceOptions:step: + * + * Slice step. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("step", + "Step", + "Slice step", + G_MININT64, + G_MAXINT64, + options.step, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SLICE_OPTIONS_STEP, spec); +} + +/** + * garrow_slice_options_new: + * + * Returns: A newly created #GArrowSliceOptions. + * + * Since: 23.0.0 + */ +GArrowSliceOptions * +garrow_slice_options_new(void) +{ + return GARROW_SLICE_OPTIONS(g_object_new(GARROW_TYPE_SLICE_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10012,6 +10152,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_skew_options_new_raw(arrow_skew_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "SliceOptions") { + const auto arrow_slice_options = + static_cast(arrow_options); + auto options = garrow_slice_options_new_raw(arrow_slice_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11044,3 +11189,24 @@ garrow_skew_options_get_raw(GArrowSkewOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowSliceOptions * +garrow_slice_options_new_raw(const arrow::compute::SliceOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_SLICE_OPTIONS, + "start", + arrow_options->start, + "stop", + arrow_options->stop, + "step", + arrow_options->step, + NULL); + return GARROW_SLICE_OPTIONS(options); +} + +arrow::compute::SliceOptions * +garrow_slice_options_get_raw(GArrowSliceOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index f10edc90584..f3fb1f03d7f 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1696,4 +1696,17 @@ GARROW_AVAILABLE_IN_23_0 GArrowSkewOptions * garrow_skew_options_new(void); +#define GARROW_TYPE_SLICE_OPTIONS (garrow_slice_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowSliceOptions, garrow_slice_options, GARROW, SLICE_OPTIONS, GArrowFunctionOptions) +struct _GArrowSliceOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowSliceOptions * +garrow_slice_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index f4efaf34f3f..ac44ead982b 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -314,3 +314,8 @@ GArrowSkewOptions * garrow_skew_options_new_raw(const arrow::compute::SkewOptions *arrow_options); arrow::compute::SkewOptions * garrow_skew_options_get_raw(GArrowSkewOptions *options); + +GArrowSliceOptions * +garrow_slice_options_new_raw(const arrow::compute::SliceOptions *arrow_options); +arrow::compute::SliceOptions * +garrow_slice_options_get_raw(GArrowSliceOptions *options); diff --git a/c_glib/test/test-slice-options.rb b/c_glib/test/test-slice-options.rb new file mode 100644 index 00000000000..27459c91828 --- /dev/null +++ b/c_glib/test/test-slice-options.rb @@ -0,0 +1,55 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSliceOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::SliceOptions.new + end + + def test_start_property + assert_equal(0, @options.start) + @options.start = 1 + assert_equal(1, @options.start) + end + + def test_stop_property + assert_equal(0, @options.stop) + @options.stop = 5 + assert_equal(5, @options.stop) + end + + def test_step_property + assert_equal(1, @options.step) + @options.step = 2 + assert_equal(2, @options.step) + end + + def test_utf8_slice_codeunits_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["hello", "world", "test"])), + ] + @options.start = 1 + @options.stop = 4 + @options.step = 1 + utf8_slice_codeunits_function = Arrow::Function.find("utf8_slice_codeunits") + result = utf8_slice_codeunits_function.execute(args, @options).value + expected = build_string_array(["ell", "orl", "est"]) + assert_equal(expected, result) + end +end From 2e46099e34eabcb962b734801568b5829cb4b19f Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:00:50 +0100 Subject: [PATCH 28/33] Add GArrowSplitOptions --- c_glib/arrow-glib/compute.cpp | 140 ++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 ++ c_glib/test/test-split-options.rb | 52 +++++++++++ 4 files changed, 210 insertions(+) create mode 100644 c_glib/test/test-split-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 7fa35b4e5d9..541d9b8eac7 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -325,6 +325,9 @@ G_BEGIN_DECLS * #GArrowSliceOptions is a class to customize the `utf8_slice_codeunits` and * `binary_slice` functions. * + * #GArrowSplitOptions is a class to customize the `ascii_split_whitespace` and + * `utf8_split_whitespace` functions. + * * There are many functions to compute data on an array. */ @@ -9897,6 +9900,119 @@ garrow_slice_options_new(void) return GARROW_SLICE_OPTIONS(g_object_new(GARROW_TYPE_SLICE_OPTIONS, NULL)); } +enum { + PROP_SPLIT_OPTIONS_MAX_SPLITS = 1, + PROP_SPLIT_OPTIONS_REVERSE, +}; + +G_DEFINE_TYPE(GArrowSplitOptions, garrow_split_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_split_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_split_options_get_raw(GARROW_SPLIT_OPTIONS(object)); + + switch (prop_id) { + case PROP_SPLIT_OPTIONS_MAX_SPLITS: + options->max_splits = g_value_get_int64(value); + break; + case PROP_SPLIT_OPTIONS_REVERSE: + options->reverse = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_split_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_split_options_get_raw(GARROW_SPLIT_OPTIONS(object)); + + switch (prop_id) { + case PROP_SPLIT_OPTIONS_MAX_SPLITS: + g_value_set_int64(value, options->max_splits); + break; + case PROP_SPLIT_OPTIONS_REVERSE: + g_value_set_boolean(value, options->reverse); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_split_options_init(GArrowSplitOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::SplitOptions()); +} + +static void +garrow_split_options_class_init(GArrowSplitOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_split_options_set_property; + gobject_class->get_property = garrow_split_options_get_property; + + arrow::compute::SplitOptions options; + + GParamSpec *spec; + /** + * GArrowSplitOptions:max-splits: + * + * Maximum number of splits allowed, or unlimited when -1. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("max-splits", + "Max splits", + "Maximum number of splits allowed, or unlimited when -1", + G_MININT64, + G_MAXINT64, + options.max_splits, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SPLIT_OPTIONS_MAX_SPLITS, spec); + + /** + * GArrowSplitOptions:reverse: + * + * Start splitting from the end of the string (only relevant when max_splits != -1). + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "reverse", + "Reverse", + "Start splitting from the end of the string (only relevant when max_splits != -1)", + options.reverse, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_SPLIT_OPTIONS_REVERSE, spec); +} + +/** + * garrow_split_options_new: + * + * Returns: A newly created #GArrowSplitOptions. + * + * Since: 23.0.0 + */ +GArrowSplitOptions * +garrow_split_options_new(void) +{ + return GARROW_SPLIT_OPTIONS(g_object_new(GARROW_TYPE_SPLIT_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10157,6 +10273,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_slice_options_new_raw(arrow_slice_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "SplitOptions") { + const auto arrow_split_options = + static_cast(arrow_options); + auto options = garrow_split_options_new_raw(arrow_split_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11210,3 +11331,22 @@ garrow_slice_options_get_raw(GArrowSliceOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowSplitOptions * +garrow_split_options_new_raw(const arrow::compute::SplitOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_SPLIT_OPTIONS, + "max-splits", + arrow_options->max_splits, + "reverse", + arrow_options->reverse, + NULL); + return GARROW_SPLIT_OPTIONS(options); +} + +arrow::compute::SplitOptions * +garrow_split_options_get_raw(GArrowSplitOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index f3fb1f03d7f..995d3320880 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1709,4 +1709,17 @@ GARROW_AVAILABLE_IN_23_0 GArrowSliceOptions * garrow_slice_options_new(void); +#define GARROW_TYPE_SPLIT_OPTIONS (garrow_split_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowSplitOptions, garrow_split_options, GARROW, SPLIT_OPTIONS, GArrowFunctionOptions) +struct _GArrowSplitOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowSplitOptions * +garrow_split_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index ac44ead982b..c7e9ccaead6 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -319,3 +319,8 @@ GArrowSliceOptions * garrow_slice_options_new_raw(const arrow::compute::SliceOptions *arrow_options); arrow::compute::SliceOptions * garrow_slice_options_get_raw(GArrowSliceOptions *options); + +GArrowSplitOptions * +garrow_split_options_new_raw(const arrow::compute::SplitOptions *arrow_options); +arrow::compute::SplitOptions * +garrow_split_options_get_raw(GArrowSplitOptions *options); diff --git a/c_glib/test/test-split-options.rb b/c_glib/test/test-split-options.rb new file mode 100644 index 00000000000..943bb2e5f76 --- /dev/null +++ b/c_glib/test/test-split-options.rb @@ -0,0 +1,52 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestSplitOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::SplitOptions.new + end + + def test_max_splits_property + assert_equal(-1, @options.max_splits) + @options.max_splits = 1 + assert_equal(1, @options.max_splits) + end + + def test_reverse_property + assert do + !@options.reverse? + end + @options.reverse = true + assert do + @options.reverse? + end + end + + def test_utf8_split_whitespace_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["hello world test"])), + ] + @options.max_splits = 1 + utf8_split_whitespace_function = Arrow::Function.find("utf8_split_whitespace") + result = utf8_split_whitespace_function.execute(args, @options).value + expected = build_list_array(Arrow::StringDataType.new, [["hello", "world test"]]) + assert_equal(expected, result) + end +end + From 247c9ffe896468e5185fc154547d7e886fea21c5 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:05:48 +0100 Subject: [PATCH 29/33] Add GArrowTDigestOptions --- c_glib/arrow-glib/compute.cpp | 247 ++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 25 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-tdigest-options.rb | 72 ++++++++ 4 files changed, 349 insertions(+) create mode 100644 c_glib/test/test-tdigest-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 541d9b8eac7..6e475d48ed9 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -328,6 +328,9 @@ G_BEGIN_DECLS * #GArrowSplitOptions is a class to customize the `ascii_split_whitespace` and * `utf8_split_whitespace` functions. * + * #GArrowTDigestOptions is a class to customize the `tdigest` and + * `hash_tdigest` functions. + * * There are many functions to compute data on an array. */ @@ -10013,6 +10016,219 @@ garrow_split_options_new(void) return GARROW_SPLIT_OPTIONS(g_object_new(GARROW_TYPE_SPLIT_OPTIONS, NULL)); } +enum { + PROP_TDIGEST_OPTIONS_DELTA = 1, + PROP_TDIGEST_OPTIONS_BUFFER_SIZE, + PROP_TDIGEST_OPTIONS_SKIP_NULLS, + PROP_TDIGEST_OPTIONS_MIN_COUNT, +}; + +G_DEFINE_TYPE(GArrowTDigestOptions, garrow_tdigest_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_tdigest_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object)); + + switch (prop_id) { + case PROP_TDIGEST_OPTIONS_DELTA: + options->delta = g_value_get_uint(value); + break; + case PROP_TDIGEST_OPTIONS_BUFFER_SIZE: + options->buffer_size = g_value_get_uint(value); + break; + case PROP_TDIGEST_OPTIONS_SKIP_NULLS: + options->skip_nulls = g_value_get_boolean(value); + break; + case PROP_TDIGEST_OPTIONS_MIN_COUNT: + options->min_count = g_value_get_uint(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_tdigest_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_tdigest_options_get_raw(GARROW_TDIGEST_OPTIONS(object)); + + switch (prop_id) { + case PROP_TDIGEST_OPTIONS_DELTA: + g_value_set_uint(value, options->delta); + break; + case PROP_TDIGEST_OPTIONS_BUFFER_SIZE: + g_value_set_uint(value, options->buffer_size); + break; + case PROP_TDIGEST_OPTIONS_SKIP_NULLS: + g_value_set_boolean(value, options->skip_nulls); + break; + case PROP_TDIGEST_OPTIONS_MIN_COUNT: + g_value_set_uint(value, options->min_count); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_tdigest_options_init(GArrowTDigestOptions *object) +{ + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::TDigestOptions()); +} + +static void +garrow_tdigest_options_class_init(GArrowTDigestOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_tdigest_options_set_property; + gobject_class->get_property = garrow_tdigest_options_get_property; + + auto options = arrow::compute::TDigestOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowTDigestOptions:delta: + * + * Compression parameter, default 100. + * + * Since: 23.0.0 + */ + spec = g_param_spec_uint("delta", + "Delta", + "Compression parameter, default 100", + 0, + G_MAXUINT32, + options.delta, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_DELTA, spec); + + /** + * GArrowTDigestOptions:buffer-size: + * + * Input buffer size, default 500. + * + * Since: 23.0.0 + */ + spec = g_param_spec_uint("buffer-size", + "Buffer size", + "Input buffer size, default 500", + 0, + G_MAXUINT32, + options.buffer_size, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_BUFFER_SIZE, spec); + + /** + * GArrowTDigestOptions:skip-nulls: + * + * If true (the default), null values are ignored. Otherwise, if any + * value is null, emit null. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("skip-nulls", + "Skip nulls", + "If true (the default), null values are ignored. " + "Otherwise, if any value is null, emit null.", + options.skip_nulls, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_SKIP_NULLS, spec); + + /** + * GArrowTDigestOptions:min-count: + * + * If less than this many non-null values are observed, emit null. + * + * Since: 23.0.0 + */ + spec = + g_param_spec_uint("min-count", + "Min count", + "If less than this many non-null values are observed, emit null", + 0, + G_MAXUINT32, + options.min_count, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TDIGEST_OPTIONS_MIN_COUNT, spec); +} + +/** + * garrow_tdigest_options_new: + * + * Returns: A newly created #GArrowTDigestOptions. + * + * Since: 23.0.0 + */ +GArrowTDigestOptions * +garrow_tdigest_options_new(void) +{ + return GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS, NULL)); +} + +/** + * garrow_tdigest_options_get_qs: + * @options: A #GArrowTDigestOptions. + * @n: (out): The number of `q`s. + * + * Returns: (array length=n) (transfer none): The `q`s to be used. + * + * Since: 23.0.0 + */ +const gdouble * +garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n) +{ + auto priv = garrow_tdigest_options_get_raw(options); + if (n) { + *n = priv->q.size(); + } + return priv->q.data(); +} + +/** + * garrow_tdigest_options_set_q: + * @options: A #GArrowTDigestOptions. + * @q: A `q` to be used. + * + * Since: 23.0.0 + */ +void +garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q) +{ + auto priv = garrow_tdigest_options_get_raw(options); + priv->q.clear(); + priv->q.push_back(q); +} + +/** + * garrow_tdigest_options_set_qs: + * @options: A #GArrowTDigestOptions. + * @qs: (array length=n): `q`s to be used. + * @n: The number of @qs. + * + * Since: 23.0.0 + */ +void +garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble *qs, gsize n) +{ + auto priv = garrow_tdigest_options_get_raw(options); + priv->q.clear(); + for (gsize i = 0; i < n; i++) { + priv->q.push_back(qs[i]); + } +} + G_END_DECLS arrow::Result @@ -10278,6 +10494,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_split_options_new_raw(arrow_split_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "TDigestOptions") { + const auto arrow_tdigest_options = + static_cast(arrow_options); + auto options = garrow_tdigest_options_new_raw(arrow_tdigest_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11350,3 +11571,29 @@ garrow_split_options_get_raw(GArrowSplitOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowTDigestOptions * +garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions *arrow_options) +{ + auto options = GARROW_TDIGEST_OPTIONS(g_object_new(GARROW_TYPE_TDIGEST_OPTIONS, + "delta", + arrow_options->delta, + "buffer-size", + arrow_options->buffer_size, + "skip-nulls", + arrow_options->skip_nulls, + "min-count", + arrow_options->min_count, + NULL)); + garrow_tdigest_options_set_qs(options, + arrow_options->q.data(), + arrow_options->q.size()); + return options; +} + +arrow::compute::TDigestOptions * +garrow_tdigest_options_get_raw(GArrowTDigestOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 995d3320880..749e71e9046 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1722,4 +1722,29 @@ GARROW_AVAILABLE_IN_23_0 GArrowSplitOptions * garrow_split_options_new(void); +#define GARROW_TYPE_TDIGEST_OPTIONS (garrow_tdigest_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowTDigestOptions, + garrow_tdigest_options, + GARROW, + TDIGEST_OPTIONS, + GArrowFunctionOptions) +struct _GArrowTDigestOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowTDigestOptions * +garrow_tdigest_options_new(void); +GARROW_AVAILABLE_IN_23_0 +const gdouble * +garrow_tdigest_options_get_qs(GArrowTDigestOptions *options, gsize *n); +GARROW_AVAILABLE_IN_23_0 +void +garrow_tdigest_options_set_q(GArrowTDigestOptions *options, gdouble q); +GARROW_AVAILABLE_IN_23_0 +void +garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble *qs, gsize n); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index c7e9ccaead6..dab6652b037 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -324,3 +324,8 @@ GArrowSplitOptions * garrow_split_options_new_raw(const arrow::compute::SplitOptions *arrow_options); arrow::compute::SplitOptions * garrow_split_options_get_raw(GArrowSplitOptions *options); + +GArrowTDigestOptions * +garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions *arrow_options); +arrow::compute::TDigestOptions * +garrow_tdigest_options_get_raw(GArrowTDigestOptions *options); diff --git a/c_glib/test/test-tdigest-options.rb b/c_glib/test/test-tdigest-options.rb new file mode 100644 index 00000000000..9a67d2157bf --- /dev/null +++ b/c_glib/test/test-tdigest-options.rb @@ -0,0 +1,72 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTDigestOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::TDigestOptions.new + end + + def test_delta + assert_equal(100, @options.delta) + @options.delta = 200 + assert_equal(200, @options.delta) + end + + def test_buffer_size + assert_equal(500, @options.buffer_size) + @options.buffer_size = 1000 + assert_equal(1000, @options.buffer_size) + end + + def test_skip_nulls + assert do + @options.skip_nulls? + end + @options.skip_nulls = false + assert do + not @options.skip_nulls? + end + end + + def test_min_count + assert_equal(0, @options.min_count) + @options.min_count = 1 + assert_equal(1, @options.min_count) + end + + def test_q + assert_equal([0.5], @options.qs) + @options.qs = [0.1, 0.2, 0.9] + assert_equal([0.1, 0.2, 0.9], @options.qs) + @options.q = 0.7 + assert_equal([0.7], @options.qs) + end + + def test_tdigest_function + args = [ + Arrow::ArrayDatum.new(build_double_array([1.0, 2.0, 3.0, 4.0, 5.0])), + ] + @options.q = 0.5 + @options.delta = 200 + tdigest_function = Arrow::Function.find("tdigest") + result = tdigest_function.execute(args, @options).value + assert_equal(build_double_array([3.0]), result) + end +end + From 95a0520e385836e2c2bafb18f964d840b7eb9283 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:08:23 +0100 Subject: [PATCH 30/33] Add GArrowTrimOptions --- c_glib/arrow-glib/compute.cpp | 152 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-trim-options.rb | 42 +++++++++ 4 files changed, 212 insertions(+) create mode 100644 c_glib/test/test-trim-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 6e475d48ed9..fe2a4128033 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -331,6 +331,9 @@ G_BEGIN_DECLS * #GArrowTDigestOptions is a class to customize the `tdigest` and * `hash_tdigest` functions. * + * #GArrowTrimOptions is a class to customize the `utf8_trim`, `utf8_ltrim`, + * `utf8_rtrim`, `ascii_trim`, `ascii_ltrim`, and `ascii_rtrim` functions. + * * There are many functions to compute data on an array. */ @@ -10229,6 +10232,134 @@ garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble *qs, } } +enum { + PROP_TRIM_OPTIONS_CHARACTERS = 1, +}; + +typedef struct _GArrowTrimOptionsPrivate GArrowTrimOptionsPrivate; +struct _GArrowTrimOptionsPrivate +{ + gchar *characters; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowTrimOptions, + garrow_trim_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_TRIM_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_trim_options_get_instance_private(GARROW_TRIM_OPTIONS(object))) + +static void +garrow_trim_options_dispose(GObject *object) +{ + auto priv = GARROW_TRIM_OPTIONS_GET_PRIVATE(object); + if (priv->characters) { + g_free(priv->characters); + priv->characters = nullptr; + } + G_OBJECT_CLASS(garrow_trim_options_parent_class)->dispose(object); +} + +static void +garrow_trim_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_trim_options_get_raw(GARROW_TRIM_OPTIONS(object)); + auto priv = GARROW_TRIM_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TRIM_OPTIONS_CHARACTERS: + { + const gchar *characters = g_value_get_string(value); + if (priv->characters) { + g_free(priv->characters); + } + priv->characters = g_strdup(characters); + options->characters = characters ? characters : ""; + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_trim_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_trim_options_get_raw(GARROW_TRIM_OPTIONS(object)); + auto priv = GARROW_TRIM_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_TRIM_OPTIONS_CHARACTERS: + g_value_set_string(value, + priv->characters ? priv->characters : options->characters.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_trim_options_init(GArrowTrimOptions *object) +{ + auto priv = GARROW_TRIM_OPTIONS_GET_PRIVATE(object); + priv->characters = nullptr; + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::TrimOptions()); + // Sync the private string with the C++ options + auto arrow_options = garrow_trim_options_get_raw(GARROW_TRIM_OPTIONS(object)); + priv->characters = g_strdup(arrow_options->characters.c_str()); +} + +static void +garrow_trim_options_class_init(GArrowTrimOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_trim_options_dispose; + gobject_class->set_property = garrow_trim_options_set_property; + gobject_class->get_property = garrow_trim_options_get_property; + + arrow::compute::TrimOptions options; + + GParamSpec *spec; + /** + * GArrowTrimOptions:characters: + * + * The individual characters to be trimmed from the string. + * + * Since: 23.0.0 + */ + spec = g_param_spec_string("characters", + "Characters", + "The individual characters to be trimmed from the string", + options.characters.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_TRIM_OPTIONS_CHARACTERS, spec); +} + +/** + * garrow_trim_options_new: + * + * Returns: A newly created #GArrowTrimOptions. + * + * Since: 23.0.0 + */ +GArrowTrimOptions * +garrow_trim_options_new(void) +{ + return GARROW_TRIM_OPTIONS(g_object_new(GARROW_TYPE_TRIM_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10499,6 +10630,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_tdigest_options_new_raw(arrow_tdigest_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "TrimOptions") { + const auto arrow_trim_options = + static_cast(arrow_options); + auto options = garrow_trim_options_new_raw(arrow_trim_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11597,3 +11733,19 @@ garrow_tdigest_options_get_raw(GArrowTDigestOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowTrimOptions * +garrow_trim_options_new_raw(const arrow::compute::TrimOptions *arrow_options) +{ + return GARROW_TRIM_OPTIONS(g_object_new(GARROW_TYPE_TRIM_OPTIONS, + "characters", + arrow_options->characters.c_str(), + NULL)); +} + +arrow::compute::TrimOptions * +garrow_trim_options_get_raw(GArrowTrimOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 749e71e9046..30842cff296 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1747,4 +1747,17 @@ GARROW_AVAILABLE_IN_23_0 void garrow_tdigest_options_set_qs(GArrowTDigestOptions *options, const gdouble *qs, gsize n); +#define GARROW_TYPE_TRIM_OPTIONS (garrow_trim_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowTrimOptions, garrow_trim_options, GARROW, TRIM_OPTIONS, GArrowFunctionOptions) +struct _GArrowTrimOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowTrimOptions * +garrow_trim_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index dab6652b037..29dbaa09e70 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -329,3 +329,8 @@ GArrowTDigestOptions * garrow_tdigest_options_new_raw(const arrow::compute::TDigestOptions *arrow_options); arrow::compute::TDigestOptions * garrow_tdigest_options_get_raw(GArrowTDigestOptions *options); + +GArrowTrimOptions * +garrow_trim_options_new_raw(const arrow::compute::TrimOptions *arrow_options); +arrow::compute::TrimOptions * +garrow_trim_options_get_raw(GArrowTrimOptions *options); diff --git a/c_glib/test/test-trim-options.rb b/c_glib/test/test-trim-options.rb new file mode 100644 index 00000000000..4109c9d1bff --- /dev/null +++ b/c_glib/test/test-trim-options.rb @@ -0,0 +1,42 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestTrimOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::TrimOptions.new + end + + def test_characters_property + assert_equal("", @options.characters) + @options.characters = " \t" + assert_equal(" \t", @options.characters) + end + + def test_utf8_trim_function + args = [ + Arrow::ArrayDatum.new(build_string_array([" hello ", " world "])), + ] + @options.characters = " " + utf8_trim_function = Arrow::Function.find("utf8_trim") + result = utf8_trim_function.execute(args, @options).value + expected = build_string_array(["hello", "world"]) + assert_equal(expected, result) + end +end + From a7dd55382c80460cb7d69bd83135a87ad0e79859 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:21:32 +0100 Subject: [PATCH 31/33] Add GArrowWeekOptions --- c_glib/arrow-glib/compute.cpp | 170 +++++++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 13 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-week-options.rb | 71 +++++++++++++ 4 files changed, 259 insertions(+) create mode 100644 c_glib/test/test-week-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index fe2a4128033..9b6a6ecaa56 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -334,6 +334,8 @@ G_BEGIN_DECLS * #GArrowTrimOptions is a class to customize the `utf8_trim`, `utf8_ltrim`, * `utf8_rtrim`, `ascii_trim`, `ascii_ltrim`, and `ascii_rtrim` functions. * + * #GArrowWeekOptions is a class to customize the `week` function. + * * There are many functions to compute data on an array. */ @@ -10360,6 +10362,148 @@ garrow_trim_options_new(void) return GARROW_TRIM_OPTIONS(g_object_new(GARROW_TYPE_TRIM_OPTIONS, NULL)); } +enum { + PROP_WEEK_OPTIONS_WEEK_STARTS_MONDAY = 1, + PROP_WEEK_OPTIONS_COUNT_FROM_ZERO, + PROP_WEEK_OPTIONS_FIRST_WEEK_IS_FULLY_IN_YEAR, +}; + +G_DEFINE_TYPE(GArrowWeekOptions, garrow_week_options, GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_week_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_week_options_get_raw(GARROW_WEEK_OPTIONS(object)); + + switch (prop_id) { + case PROP_WEEK_OPTIONS_WEEK_STARTS_MONDAY: + options->week_starts_monday = g_value_get_boolean(value); + break; + case PROP_WEEK_OPTIONS_COUNT_FROM_ZERO: + options->count_from_zero = g_value_get_boolean(value); + break; + case PROP_WEEK_OPTIONS_FIRST_WEEK_IS_FULLY_IN_YEAR: + options->first_week_is_fully_in_year = g_value_get_boolean(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_week_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_week_options_get_raw(GARROW_WEEK_OPTIONS(object)); + + switch (prop_id) { + case PROP_WEEK_OPTIONS_WEEK_STARTS_MONDAY: + g_value_set_boolean(value, options->week_starts_monday); + break; + case PROP_WEEK_OPTIONS_COUNT_FROM_ZERO: + g_value_set_boolean(value, options->count_from_zero); + break; + case PROP_WEEK_OPTIONS_FIRST_WEEK_IS_FULLY_IN_YEAR: + g_value_set_boolean(value, options->first_week_is_fully_in_year); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_week_options_init(GArrowWeekOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = + static_cast(new arrow::compute::WeekOptions()); +} + +static void +garrow_week_options_class_init(GArrowWeekOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_week_options_set_property; + gobject_class->get_property = garrow_week_options_get_property; + + auto options = arrow::compute::WeekOptions::Defaults(); + + GParamSpec *spec; + /** + * GArrowWeekOptions:week-starts-monday: + * + * What day does the week start with (Monday=true, Sunday=false). + * + * Since: 23.0.0 + */ + spec = + g_param_spec_boolean("week-starts-monday", + "Week starts Monday", + "What day does the week start with (Monday=true, Sunday=false)", + options.week_starts_monday, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WEEK_OPTIONS_WEEK_STARTS_MONDAY, + spec); + + /** + * GArrowWeekOptions:count-from-zero: + * + * Dates from current year that fall into last ISO week of the previous year + * return 0 if true and 52 or 53 if false. + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean("count-from-zero", + "Count from zero", + "Dates from current year that fall into last ISO week of " + "the previous year return 0 if true and 52 or 53 if false", + options.count_from_zero, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_WEEK_OPTIONS_COUNT_FROM_ZERO, spec); + + /** + * GArrowWeekOptions:first-week-is-fully-in-year: + * + * Must the first week be fully in January (true), or is a week that begins + * on December 29, 30, or 31 considered to be the first week of the new + * year (false)? + * + * Since: 23.0.0 + */ + spec = g_param_spec_boolean( + "first-week-is-fully-in-year", + "First week is fully in year", + "Must the first week be fully in January (true), or is a week that begins on " + "December 29, 30, or 31 considered to be the first week of the new year (false)?", + options.first_week_is_fully_in_year, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WEEK_OPTIONS_FIRST_WEEK_IS_FULLY_IN_YEAR, + spec); +} + +/** + * garrow_week_options_new: + * + * Returns: A newly created #GArrowWeekOptions. + * + * Since: 23.0.0 + */ +GArrowWeekOptions * +garrow_week_options_new(void) +{ + return GARROW_WEEK_OPTIONS(g_object_new(GARROW_TYPE_WEEK_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10635,6 +10779,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_trim_options_new_raw(arrow_trim_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "WeekOptions") { + const auto arrow_week_options = + static_cast(arrow_options); + auto options = garrow_week_options_new_raw(arrow_week_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11749,3 +11898,24 @@ garrow_trim_options_get_raw(GArrowTrimOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowWeekOptions * +garrow_week_options_new_raw(const arrow::compute::WeekOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_WEEK_OPTIONS, + "week-starts-monday", + arrow_options->week_starts_monday, + "count-from-zero", + arrow_options->count_from_zero, + "first-week-is-fully-in-year", + arrow_options->first_week_is_fully_in_year, + NULL); + return GARROW_WEEK_OPTIONS(options); +} + +arrow::compute::WeekOptions * +garrow_week_options_get_raw(GArrowWeekOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 30842cff296..fdbf9486ea4 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1760,4 +1760,17 @@ GARROW_AVAILABLE_IN_23_0 GArrowTrimOptions * garrow_trim_options_new(void); +#define GARROW_TYPE_WEEK_OPTIONS (garrow_week_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE( + GArrowWeekOptions, garrow_week_options, GARROW, WEEK_OPTIONS, GArrowFunctionOptions) +struct _GArrowWeekOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowWeekOptions * +garrow_week_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 29dbaa09e70..9cb1ef83e75 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -334,3 +334,8 @@ GArrowTrimOptions * garrow_trim_options_new_raw(const arrow::compute::TrimOptions *arrow_options); arrow::compute::TrimOptions * garrow_trim_options_get_raw(GArrowTrimOptions *options); + +GArrowWeekOptions * +garrow_week_options_new_raw(const arrow::compute::WeekOptions *arrow_options); +arrow::compute::WeekOptions * +garrow_week_options_get_raw(GArrowWeekOptions *options); diff --git a/c_glib/test/test-week-options.rb b/c_glib/test/test-week-options.rb new file mode 100644 index 00000000000..7ec3c69b820 --- /dev/null +++ b/c_glib/test/test-week-options.rb @@ -0,0 +1,71 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestWeekOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::WeekOptions.new + end + + def test_week_starts_monday_property + assert do + @options.week_starts_monday? + end + @options.week_starts_monday = false + assert do + !@options.week_starts_monday? + end + end + + def test_count_from_zero_property + assert do + !@options.count_from_zero? + end + @options.count_from_zero = true + assert do + @options.count_from_zero? + end + end + + def test_first_week_is_fully_in_year_property + assert do + !@options.first_week_is_fully_in_year? + end + @options.first_week_is_fully_in_year = true + assert do + @options.first_week_is_fully_in_year? + end + end + + def test_week_function_with_week_starts_monday + omit("Missing tzdata on Windows") if Gem.win_platform? + # January 1, 2023 (Sunday) + args = [ + Arrow::ArrayDatum.new(build_timestamp_array(:milli, [1672531200000])), + ] + @options.week_starts_monday = true + week_function = Arrow::Function.find("week") + result = week_function.execute(args, @options).value + assert_equal(build_int64_array([52]), result) + + @options.week_starts_monday = false + result = week_function.execute(args, @options).value + assert_equal(build_int64_array([1]), result) + end +end + From 31ea9474b4ae42f341a99b219c286255d10b6761 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:25:41 +0100 Subject: [PATCH 32/33] Add GArrowWinsorizeOptions --- c_glib/arrow-glib/compute.cpp | 156 ++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-winsorize-options.rb | 50 +++++++++ 4 files changed, 227 insertions(+) create mode 100644 c_glib/test/test-winsorize-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 9b6a6ecaa56..1f3dab1c6e5 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -336,6 +336,8 @@ G_BEGIN_DECLS * * #GArrowWeekOptions is a class to customize the `week` function. * + * #GArrowWinsorizeOptions is a class to customize the `winsorize` function. + * * There are many functions to compute data on an array. */ @@ -10504,6 +10506,136 @@ garrow_week_options_new(void) return GARROW_WEEK_OPTIONS(g_object_new(GARROW_TYPE_WEEK_OPTIONS, NULL)); } +enum { + PROP_WINSORIZE_OPTIONS_LOWER_LIMIT = 1, + PROP_WINSORIZE_OPTIONS_UPPER_LIMIT, +}; + +G_DEFINE_TYPE(GArrowWinsorizeOptions, + garrow_winsorize_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +static void +garrow_winsorize_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_winsorize_options_get_raw(GARROW_WINSORIZE_OPTIONS(object)); + + switch (prop_id) { + case PROP_WINSORIZE_OPTIONS_LOWER_LIMIT: + options->lower_limit = g_value_get_double(value); + break; + case PROP_WINSORIZE_OPTIONS_UPPER_LIMIT: + options->upper_limit = g_value_get_double(value); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_winsorize_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_winsorize_options_get_raw(GARROW_WINSORIZE_OPTIONS(object)); + + switch (prop_id) { + case PROP_WINSORIZE_OPTIONS_LOWER_LIMIT: + g_value_set_double(value, options->lower_limit); + break; + case PROP_WINSORIZE_OPTIONS_UPPER_LIMIT: + g_value_set_double(value, options->upper_limit); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_winsorize_options_init(GArrowWinsorizeOptions *object) +{ + auto priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + priv->options = static_cast( + new arrow::compute::WinsorizeOptions()); +} + +static void +garrow_winsorize_options_class_init(GArrowWinsorizeOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->set_property = garrow_winsorize_options_set_property; + gobject_class->get_property = garrow_winsorize_options_get_property; + + auto options = arrow::compute::WinsorizeOptions(); + + GParamSpec *spec; + /** + * GArrowWinsorizeOptions:lower-limit: + * + * The quantile below which all values are replaced with the quantile's value. + * For example, if lower_limit = 0.05, then all values in the lower 5% percentile + * will be replaced with the 5% percentile value. + * + * Since: 23.0.0 + */ + spec = g_param_spec_double( + "lower-limit", + "Lower limit", + "The quantile below which all values are replaced with the quantile's value. For " + "example, if lower_limit = 0.05, then all values in the lower 5% percentile will be " + "replaced with the 5% percentile value", + 0.0, + 1.0, + options.lower_limit, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WINSORIZE_OPTIONS_LOWER_LIMIT, + spec); + + /** + * GArrowWinsorizeOptions:upper-limit: + * + * The quantile above which all values are replaced with the quantile's value. + * For example, if upper_limit = 0.95, then all values in the upper 95% percentile + * will be replaced with the 95% percentile value. + * + * Since: 23.0.0 + */ + spec = g_param_spec_double( + "upper-limit", + "Upper limit", + "The quantile above which all values are replaced with the quantile's value. For " + "example, if upper_limit = 0.95, then all values in the upper 95% percentile will be " + "replaced with the 95% percentile value", + 0.0, + 1.0, + options.upper_limit, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, + PROP_WINSORIZE_OPTIONS_UPPER_LIMIT, + spec); +} + +/** + * garrow_winsorize_options_new: + * + * Returns: A newly created #GArrowWinsorizeOptions. + * + * Since: 23.0.0 + */ +GArrowWinsorizeOptions * +garrow_winsorize_options_new(void) +{ + return GARROW_WINSORIZE_OPTIONS(g_object_new(GARROW_TYPE_WINSORIZE_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10784,6 +10916,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_week_options_new_raw(arrow_week_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "WinsorizeOptions") { + const auto arrow_winsorize_options = + static_cast(arrow_options); + auto options = garrow_winsorize_options_new_raw(arrow_winsorize_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -11919,3 +12056,22 @@ garrow_week_options_get_raw(GArrowWeekOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowWinsorizeOptions * +garrow_winsorize_options_new_raw(const arrow::compute::WinsorizeOptions *arrow_options) +{ + auto options = g_object_new(GARROW_TYPE_WINSORIZE_OPTIONS, + "lower-limit", + arrow_options->lower_limit, + "upper-limit", + arrow_options->upper_limit, + NULL); + return GARROW_WINSORIZE_OPTIONS(options); +} + +arrow::compute::WinsorizeOptions * +garrow_winsorize_options_get_raw(GArrowWinsorizeOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index fdbf9486ea4..66184e689fe 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1773,4 +1773,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowWeekOptions * garrow_week_options_new(void); +#define GARROW_TYPE_WINSORIZE_OPTIONS (garrow_winsorize_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowWinsorizeOptions, + garrow_winsorize_options, + GARROW, + WINSORIZE_OPTIONS, + GArrowFunctionOptions) +struct _GArrowWinsorizeOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowWinsorizeOptions * +garrow_winsorize_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 9cb1ef83e75..1e95d1aa0d9 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -339,3 +339,8 @@ GArrowWeekOptions * garrow_week_options_new_raw(const arrow::compute::WeekOptions *arrow_options); arrow::compute::WeekOptions * garrow_week_options_get_raw(GArrowWeekOptions *options); + +GArrowWinsorizeOptions * +garrow_winsorize_options_new_raw(const arrow::compute::WinsorizeOptions *arrow_options); +arrow::compute::WinsorizeOptions * +garrow_winsorize_options_get_raw(GArrowWinsorizeOptions *options); diff --git a/c_glib/test/test-winsorize-options.rb b/c_glib/test/test-winsorize-options.rb new file mode 100644 index 00000000000..dcad6e24cb7 --- /dev/null +++ b/c_glib/test/test-winsorize-options.rb @@ -0,0 +1,50 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestWinsorizeOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::WinsorizeOptions.new + end + + def test_lower_limit_property + assert_equal(0.0, @options.lower_limit) + @options.lower_limit = 0.05 + assert_equal(0.05, @options.lower_limit) + end + + def test_upper_limit_property + assert_equal(1.0, @options.upper_limit) + @options.upper_limit = 0.95 + assert_equal(0.95, @options.upper_limit) + end + + def test_winsorize_function + args = [ + Arrow::ArrayDatum.new(build_double_array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, +10.0])), + ] + @options.lower_limit = 0.1 + @options.upper_limit = 0.9 + winsorize_function = Arrow::Function.find("winsorize") + result = winsorize_function.execute(args, @options).value + expected = build_double_array([2.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0, 9.0]) + assert_equal(expected, result) + end +end + From 83251e0a3a7fd70f7d39bcf5f33b3c35cacdec79 Mon Sep 17 00:00:00 2001 From: Sten Larsson Date: Thu, 4 Dec 2025 23:29:03 +0100 Subject: [PATCH 33/33] Add GArrowZeroFillOptions --- c_glib/arrow-glib/compute.cpp | 176 ++++++++++++++++++++++++++ c_glib/arrow-glib/compute.h | 16 +++ c_glib/arrow-glib/compute.hpp | 5 + c_glib/test/test-zero-fill-options.rb | 49 +++++++ 4 files changed, 246 insertions(+) create mode 100644 c_glib/test/test-zero-fill-options.rb diff --git a/c_glib/arrow-glib/compute.cpp b/c_glib/arrow-glib/compute.cpp index 1f3dab1c6e5..9f3f14032bb 100644 --- a/c_glib/arrow-glib/compute.cpp +++ b/c_glib/arrow-glib/compute.cpp @@ -338,6 +338,8 @@ G_BEGIN_DECLS * * #GArrowWinsorizeOptions is a class to customize the `winsorize` function. * + * #GArrowZeroFillOptions is a class to customize the `utf8_zero_fill` function. + * * There are many functions to compute data on an array. */ @@ -10636,6 +10638,157 @@ garrow_winsorize_options_new(void) return GARROW_WINSORIZE_OPTIONS(g_object_new(GARROW_TYPE_WINSORIZE_OPTIONS, NULL)); } +enum { + PROP_ZERO_FILL_OPTIONS_WIDTH = 1, + PROP_ZERO_FILL_OPTIONS_PADDING, +}; + +typedef struct _GArrowZeroFillOptionsPrivate GArrowZeroFillOptionsPrivate; +struct _GArrowZeroFillOptionsPrivate +{ + gchar *padding; +}; + +G_DEFINE_TYPE_WITH_PRIVATE(GArrowZeroFillOptions, + garrow_zero_fill_options, + GARROW_TYPE_FUNCTION_OPTIONS) + +#define GARROW_ZERO_FILL_OPTIONS_GET_PRIVATE(object) \ + static_cast( \ + garrow_zero_fill_options_get_instance_private(GARROW_ZERO_FILL_OPTIONS(object))) + +static void +garrow_zero_fill_options_dispose(GObject *object) +{ + auto priv = GARROW_ZERO_FILL_OPTIONS_GET_PRIVATE(object); + if (priv->padding) { + g_free(priv->padding); + priv->padding = nullptr; + } + G_OBJECT_CLASS(garrow_zero_fill_options_parent_class)->dispose(object); +} + +static void +garrow_zero_fill_options_set_property(GObject *object, + guint prop_id, + const GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_zero_fill_options_get_raw(GARROW_ZERO_FILL_OPTIONS(object)); + auto priv = GARROW_ZERO_FILL_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ZERO_FILL_OPTIONS_WIDTH: + options->width = g_value_get_int64(value); + break; + case PROP_ZERO_FILL_OPTIONS_PADDING: + { + const gchar *padding = g_value_get_string(value); + if (priv->padding) { + g_free(priv->padding); + } + priv->padding = g_strdup(padding); + options->padding = padding ? padding : ""; + } + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_zero_fill_options_get_property(GObject *object, + guint prop_id, + GValue *value, + GParamSpec *pspec) +{ + auto options = garrow_zero_fill_options_get_raw(GARROW_ZERO_FILL_OPTIONS(object)); + auto priv = GARROW_ZERO_FILL_OPTIONS_GET_PRIVATE(object); + + switch (prop_id) { + case PROP_ZERO_FILL_OPTIONS_WIDTH: + g_value_set_int64(value, options->width); + break; + case PROP_ZERO_FILL_OPTIONS_PADDING: + g_value_set_string(value, priv->padding ? priv->padding : options->padding.c_str()); + break; + default: + G_OBJECT_WARN_INVALID_PROPERTY_ID(object, prop_id, pspec); + break; + } +} + +static void +garrow_zero_fill_options_init(GArrowZeroFillOptions *object) +{ + auto priv = GARROW_ZERO_FILL_OPTIONS_GET_PRIVATE(object); + priv->padding = nullptr; + auto arrow_priv = GARROW_FUNCTION_OPTIONS_GET_PRIVATE(object); + arrow_priv->options = + static_cast(new arrow::compute::ZeroFillOptions()); + // Sync the private string with the C++ options + auto arrow_options = garrow_zero_fill_options_get_raw(GARROW_ZERO_FILL_OPTIONS(object)); + priv->padding = g_strdup(arrow_options->padding.c_str()); +} + +static void +garrow_zero_fill_options_class_init(GArrowZeroFillOptionsClass *klass) +{ + auto gobject_class = G_OBJECT_CLASS(klass); + + gobject_class->dispose = garrow_zero_fill_options_dispose; + gobject_class->set_property = garrow_zero_fill_options_set_property; + gobject_class->get_property = garrow_zero_fill_options_get_property; + + arrow::compute::ZeroFillOptions options; + + GParamSpec *spec; + /** + * GArrowZeroFillOptions:width: + * + * The desired string length. + * + * Since: 23.0.0 + */ + spec = g_param_spec_int64("width", + "Width", + "The desired string length", + G_MININT64, + G_MAXINT64, + options.width, + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ZERO_FILL_OPTIONS_WIDTH, spec); + + /** + * GArrowZeroFillOptions:padding: + * + * What to pad the string with. Should be one codepoint (Unicode). + * + * Since: 23.0.0 + */ + spec = + g_param_spec_string("padding", + "Padding", + "What to pad the string with. Should be one codepoint (Unicode)", + options.padding.c_str(), + static_cast(G_PARAM_READWRITE)); + g_object_class_install_property(gobject_class, PROP_ZERO_FILL_OPTIONS_PADDING, spec); +} + +/** + * garrow_zero_fill_options_new: + * + * Returns: A newly created #GArrowZeroFillOptions. + * + * Since: 23.0.0 + */ +GArrowZeroFillOptions * +garrow_zero_fill_options_new(void) +{ + return GARROW_ZERO_FILL_OPTIONS(g_object_new(GARROW_TYPE_ZERO_FILL_OPTIONS, NULL)); +} + G_END_DECLS arrow::Result @@ -10921,6 +11074,11 @@ garrow_function_options_new_raw(const arrow::compute::FunctionOptions *arrow_opt static_cast(arrow_options); auto options = garrow_winsorize_options_new_raw(arrow_winsorize_options); return GARROW_FUNCTION_OPTIONS(options); + } else if (arrow_type_name == "ZeroFillOptions") { + const auto arrow_zero_fill_options = + static_cast(arrow_options); + auto options = garrow_zero_fill_options_new_raw(arrow_zero_fill_options); + return GARROW_FUNCTION_OPTIONS(options); } else { auto options = g_object_new(GARROW_TYPE_FUNCTION_OPTIONS, NULL); return GARROW_FUNCTION_OPTIONS(options); @@ -12075,3 +12233,21 @@ garrow_winsorize_options_get_raw(GArrowWinsorizeOptions *options) return static_cast( garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); } + +GArrowZeroFillOptions * +garrow_zero_fill_options_new_raw(const arrow::compute::ZeroFillOptions *arrow_options) +{ + return GARROW_ZERO_FILL_OPTIONS(g_object_new(GARROW_TYPE_ZERO_FILL_OPTIONS, + "width", + arrow_options->width, + "padding", + arrow_options->padding.c_str(), + NULL)); +} + +arrow::compute::ZeroFillOptions * +garrow_zero_fill_options_get_raw(GArrowZeroFillOptions *options) +{ + return static_cast( + garrow_function_options_get_raw(GARROW_FUNCTION_OPTIONS(options))); +} diff --git a/c_glib/arrow-glib/compute.h b/c_glib/arrow-glib/compute.h index 66184e689fe..5670c7e1d26 100644 --- a/c_glib/arrow-glib/compute.h +++ b/c_glib/arrow-glib/compute.h @@ -1789,4 +1789,20 @@ GARROW_AVAILABLE_IN_23_0 GArrowWinsorizeOptions * garrow_winsorize_options_new(void); +#define GARROW_TYPE_ZERO_FILL_OPTIONS (garrow_zero_fill_options_get_type()) +GARROW_AVAILABLE_IN_23_0 +G_DECLARE_DERIVABLE_TYPE(GArrowZeroFillOptions, + garrow_zero_fill_options, + GARROW, + ZERO_FILL_OPTIONS, + GArrowFunctionOptions) +struct _GArrowZeroFillOptionsClass +{ + GArrowFunctionOptionsClass parent_class; +}; + +GARROW_AVAILABLE_IN_23_0 +GArrowZeroFillOptions * +garrow_zero_fill_options_new(void); + G_END_DECLS diff --git a/c_glib/arrow-glib/compute.hpp b/c_glib/arrow-glib/compute.hpp index 1e95d1aa0d9..ccdc53382eb 100644 --- a/c_glib/arrow-glib/compute.hpp +++ b/c_glib/arrow-glib/compute.hpp @@ -344,3 +344,8 @@ GArrowWinsorizeOptions * garrow_winsorize_options_new_raw(const arrow::compute::WinsorizeOptions *arrow_options); arrow::compute::WinsorizeOptions * garrow_winsorize_options_get_raw(GArrowWinsorizeOptions *options); + +GArrowZeroFillOptions * +garrow_zero_fill_options_new_raw(const arrow::compute::ZeroFillOptions *arrow_options); +arrow::compute::ZeroFillOptions * +garrow_zero_fill_options_get_raw(GArrowZeroFillOptions *options); diff --git a/c_glib/test/test-zero-fill-options.rb b/c_glib/test/test-zero-fill-options.rb new file mode 100644 index 00000000000..6a302af9d15 --- /dev/null +++ b/c_glib/test/test-zero-fill-options.rb @@ -0,0 +1,49 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +class TestZeroFillOptions < Test::Unit::TestCase + include Helper::Buildable + + def setup + @options = Arrow::ZeroFillOptions.new + end + + def test_width_property + assert_equal(0, @options.width) + @options.width = 4 + assert_equal(4, @options.width) + end + + def test_padding_property + assert_equal("0", @options.padding) + @options.padding = "x" + assert_equal("x", @options.padding) + end + + def test_utf8_zero_fill_function + args = [ + Arrow::ArrayDatum.new(build_string_array(["1", "-2", "+3"])), + ] + @options.width = 4 + @options.padding = "0" + utf8_zero_fill_function = Arrow::Function.find("utf8_zero_fill") + result = utf8_zero_fill_function.execute(args, @options).value + expected = build_string_array(["0001", "-002", "+003"]) + assert_equal(expected, result) + end +end +