diff --git a/datafusion/functions-nested/src/cardinality.rs b/datafusion/functions-nested/src/cardinality.rs index 6db0011cd078..58a83feb6676 100644 --- a/datafusion/functions-nested/src/cardinality.rs +++ b/datafusion/functions-nested/src/cardinality.rs @@ -117,8 +117,7 @@ impl ScalarUDFImpl for Cardinality { } } -/// Cardinality SQL function -pub fn cardinality_inner(args: &[ArrayRef]) -> Result { +fn cardinality_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("cardinality", args)?; match array.data_type() { Null => Ok(Arc::new(UInt64Array::from_value(0, array.len()))), diff --git a/datafusion/functions-nested/src/concat.rs b/datafusion/functions-nested/src/concat.rs index 9a12db525f95..a565006a2577 100644 --- a/datafusion/functions-nested/src/concat.rs +++ b/datafusion/functions-nested/src/concat.rs @@ -352,8 +352,7 @@ impl ScalarUDFImpl for ArrayConcat { } } -/// Array_concat/Array_cat SQL function -pub(crate) fn array_concat_inner(args: &[ArrayRef]) -> Result { +fn array_concat_inner(args: &[ArrayRef]) -> Result { if args.is_empty() { return exec_err!("array_concat expects at least one argument"); } @@ -453,8 +452,7 @@ fn concat_internal(args: &[ArrayRef]) -> Result { // Kernel functions -/// Array_append SQL function -pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result { +fn array_append_inner(args: &[ArrayRef]) -> Result { let [array, values] = take_function_args("array_append", args)?; match array.data_type() { DataType::Null => make_array_inner(&[Arc::clone(values)]), @@ -464,8 +462,7 @@ pub(crate) fn array_append_inner(args: &[ArrayRef]) -> Result { } } -/// Array_prepend SQL function -pub(crate) fn array_prepend_inner(args: &[ArrayRef]) -> Result { +fn array_prepend_inner(args: &[ArrayRef]) -> Result { let [values, array] = take_function_args("array_prepend", args)?; match array.data_type() { DataType::Null => make_array_inner(&[Arc::clone(values)]), diff --git a/datafusion/functions-nested/src/dimension.rs b/datafusion/functions-nested/src/dimension.rs index b0fc5bee5494..d0fa294fe42d 100644 --- a/datafusion/functions-nested/src/dimension.rs +++ b/datafusion/functions-nested/src/dimension.rs @@ -189,8 +189,7 @@ impl ScalarUDFImpl for ArrayNdims { } } -/// Array_dims SQL function -pub fn array_dims_inner(args: &[ArrayRef]) -> Result { +fn array_dims_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_dims", args)?; let data: Vec<_> = match array.data_type() { List(_) => as_list_array(&array)? @@ -214,8 +213,7 @@ pub fn array_dims_inner(args: &[ArrayRef]) -> Result { Ok(Arc::new(result)) } -/// Array_ndims SQL function -pub fn array_ndims_inner(args: &[ArrayRef]) -> Result { +fn array_ndims_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_ndims", args)?; fn general_list_ndims(array: &ArrayRef) -> Result { diff --git a/datafusion/functions-nested/src/distance.rs b/datafusion/functions-nested/src/distance.rs index e2e38fbd0d83..dc8eaa699f87 100644 --- a/datafusion/functions-nested/src/distance.rs +++ b/datafusion/functions-nested/src/distance.rs @@ -141,7 +141,7 @@ impl ScalarUDFImpl for ArrayDistance { } } -pub fn array_distance_inner(args: &[ArrayRef]) -> Result { +fn array_distance_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_distance", args)?; match (array1.data_type(), array2.data_type()) { (List(_), List(_)) => general_array_distance::(args), diff --git a/datafusion/functions-nested/src/empty.rs b/datafusion/functions-nested/src/empty.rs index 27a90ab0442b..3f9077575205 100644 --- a/datafusion/functions-nested/src/empty.rs +++ b/datafusion/functions-nested/src/empty.rs @@ -110,8 +110,7 @@ impl ScalarUDFImpl for ArrayEmpty { } } -/// Array_empty SQL function -pub fn array_empty_inner(args: &[ArrayRef]) -> Result { +fn array_empty_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_empty", args)?; match array.data_type() { List(_) => general_array_empty::(array), diff --git a/datafusion/functions-nested/src/except.rs b/datafusion/functions-nested/src/except.rs index d6982ab5a2ab..8b6bcaa0620c 100644 --- a/datafusion/functions-nested/src/except.rs +++ b/datafusion/functions-nested/src/except.rs @@ -126,8 +126,7 @@ impl ScalarUDFImpl for ArrayExcept { } } -/// Array_except SQL function -pub fn array_except_inner(args: &[ArrayRef]) -> Result { +fn array_except_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_except", args)?; match (array1.data_type(), array2.data_type()) { diff --git a/datafusion/functions-nested/src/flatten.rs b/datafusion/functions-nested/src/flatten.rs index e84a942fab2a..76c4714de1af 100644 --- a/datafusion/functions-nested/src/flatten.rs +++ b/datafusion/functions-nested/src/flatten.rs @@ -130,8 +130,7 @@ impl ScalarUDFImpl for Flatten { } } -/// Flatten SQL function -pub fn flatten_inner(args: &[ArrayRef]) -> Result { +fn flatten_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("flatten", args)?; match array.data_type() { diff --git a/datafusion/functions-nested/src/length.rs b/datafusion/functions-nested/src/length.rs index 060a978185e5..ceceee7bfa52 100644 --- a/datafusion/functions-nested/src/length.rs +++ b/datafusion/functions-nested/src/length.rs @@ -150,8 +150,7 @@ macro_rules! array_length_impl { }}; } -/// Array_length SQL function -pub fn array_length_inner(args: &[ArrayRef]) -> Result { +fn array_length_inner(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { return exec_err!("array_length expects one or two arguments"); } diff --git a/datafusion/functions-nested/src/min_max.rs b/datafusion/functions-nested/src/min_max.rs index 117cfbeaa2b2..1f3623ca243d 100644 --- a/datafusion/functions-nested/src/min_max.rs +++ b/datafusion/functions-nested/src/min_max.rs @@ -113,14 +113,7 @@ impl ScalarUDFImpl for ArrayMax { } } -/// array_max SQL function -/// -/// There is one argument for array_max as the array. -/// `array_max(array)` -/// -/// For example: -/// > array_max(\[1, 3, 2]) -> 3 -pub fn array_max_inner(args: &[ArrayRef]) -> Result { +fn array_max_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_max", args)?; match array.data_type() { List(_) => array_min_max_helper(as_list_array(array)?, max_batch), @@ -202,7 +195,7 @@ impl ScalarUDFImpl for ArrayMin { } } -pub fn array_min_inner(args: &[ArrayRef]) -> Result { +fn array_min_inner(args: &[ArrayRef]) -> Result { let [array] = take_function_args("array_min", args)?; match array.data_type() { List(_) => array_min_max_helper(as_list_array(array)?, min_batch), diff --git a/datafusion/functions-nested/src/position.rs b/datafusion/functions-nested/src/position.rs index b390bf3c4226..14f2ed3313d4 100644 --- a/datafusion/functions-nested/src/position.rs +++ b/datafusion/functions-nested/src/position.rs @@ -141,8 +141,7 @@ impl ScalarUDFImpl for ArrayPosition { } } -/// Array_position SQL function -pub fn array_position_inner(args: &[ArrayRef]) -> Result { +fn array_position_inner(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { return exec_err!("array_position expects two or three arguments"); } @@ -152,6 +151,7 @@ pub fn array_position_inner(args: &[ArrayRef]) -> Result { array_type => exec_err!("array_position does not support type '{array_type}'."), } } + fn general_position_dispatch(args: &[ArrayRef]) -> Result { let list_array = as_generic_list_array::(&args[0])?; let element_array = &args[1]; @@ -292,8 +292,7 @@ impl ScalarUDFImpl for ArrayPositions { } } -/// Array_positions SQL function -pub fn array_positions_inner(args: &[ArrayRef]) -> Result { +fn array_positions_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_positions", args)?; match &array.data_type() { diff --git a/datafusion/functions-nested/src/remove.rs b/datafusion/functions-nested/src/remove.rs index e1ebc9cda0bf..46111b0c2d12 100644 --- a/datafusion/functions-nested/src/remove.rs +++ b/datafusion/functions-nested/src/remove.rs @@ -284,24 +284,21 @@ impl ScalarUDFImpl for ArrayRemoveAll { } } -/// Array_remove SQL function -pub fn array_remove_inner(args: &[ArrayRef]) -> Result { +fn array_remove_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove", args)?; let arr_n = vec![1; array.len()]; array_remove_internal(array, element, &arr_n) } -/// Array_remove_n SQL function -pub fn array_remove_n_inner(args: &[ArrayRef]) -> Result { +fn array_remove_n_inner(args: &[ArrayRef]) -> Result { let [array, element, max] = take_function_args("array_remove_n", args)?; let arr_n = as_int64_array(max)?.values().to_vec(); array_remove_internal(array, element, &arr_n) } -/// Array_remove_all SQL function -pub fn array_remove_all_inner(args: &[ArrayRef]) -> Result { +fn array_remove_all_inner(args: &[ArrayRef]) -> Result { let [array, element] = take_function_args("array_remove_all", args)?; let arr_n = vec![i64::MAX; array.len()]; diff --git a/datafusion/functions-nested/src/repeat.rs b/datafusion/functions-nested/src/repeat.rs index ed66b9e39676..d978081e490c 100644 --- a/datafusion/functions-nested/src/repeat.rs +++ b/datafusion/functions-nested/src/repeat.rs @@ -148,8 +148,7 @@ impl ScalarUDFImpl for ArrayRepeat { } } -/// Array_repeat SQL function -pub fn array_repeat_inner(args: &[ArrayRef]) -> Result { +fn array_repeat_inner(args: &[ArrayRef]) -> Result { let element = &args[0]; let count_array = &args[1]; diff --git a/datafusion/functions-nested/src/replace.rs b/datafusion/functions-nested/src/replace.rs index 079c28175d45..53182b58988f 100644 --- a/datafusion/functions-nested/src/replace.rs +++ b/datafusion/functions-nested/src/replace.rs @@ -418,7 +418,7 @@ fn general_replace( )?)) } -pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result { +fn array_replace_inner(args: &[ArrayRef]) -> Result { let [array, from, to] = take_function_args("array_replace", args)?; // replace at most one occurrence for each element @@ -437,7 +437,7 @@ pub(crate) fn array_replace_inner(args: &[ArrayRef]) -> Result { } } -pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result { +fn array_replace_n_inner(args: &[ArrayRef]) -> Result { let [array, from, to, max] = take_function_args("array_replace_n", args)?; // replace the specified number of occurrences @@ -458,7 +458,7 @@ pub(crate) fn array_replace_n_inner(args: &[ArrayRef]) -> Result { } } -pub(crate) fn array_replace_all_inner(args: &[ArrayRef]) -> Result { +fn array_replace_all_inner(args: &[ArrayRef]) -> Result { let [array, from, to] = take_function_args("array_replace_all", args)?; // replace all occurrences (up to "i64::MAX") diff --git a/datafusion/functions-nested/src/resize.rs b/datafusion/functions-nested/src/resize.rs index 09f67a75fd56..c76f7970d206 100644 --- a/datafusion/functions-nested/src/resize.rs +++ b/datafusion/functions-nested/src/resize.rs @@ -152,8 +152,7 @@ impl ScalarUDFImpl for ArrayResize { } } -/// array_resize SQL function -pub(crate) fn array_resize_inner(arg: &[ArrayRef]) -> Result { +fn array_resize_inner(arg: &[ArrayRef]) -> Result { if arg.len() < 2 || arg.len() > 3 { return exec_err!("array_resize needs two or three arguments"); } diff --git a/datafusion/functions-nested/src/set_ops.rs b/datafusion/functions-nested/src/set_ops.rs index 71a42531f99e..e3531d1cf8ee 100644 --- a/datafusion/functions-nested/src/set_ops.rs +++ b/datafusion/functions-nested/src/set_ops.rs @@ -501,13 +501,11 @@ fn general_set_op( } } -/// Array_union SQL function fn array_union_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_union", args)?; general_set_op(array1, array2, SetOp::Union) } -/// array_intersect SQL function fn array_intersect_inner(args: &[ArrayRef]) -> Result { let [array1, array2] = take_function_args("array_intersect", args)?; general_set_op(array1, array2, SetOp::Intersect) diff --git a/datafusion/functions-nested/src/sort.rs b/datafusion/functions-nested/src/sort.rs index 4a7aa31c755b..8cfc8a297b7b 100644 --- a/datafusion/functions-nested/src/sort.rs +++ b/datafusion/functions-nested/src/sort.rs @@ -164,8 +164,7 @@ impl ScalarUDFImpl for ArraySort { } } -/// Array_sort SQL function -pub fn array_sort_inner(args: &[ArrayRef]) -> Result { +fn array_sort_inner(args: &[ArrayRef]) -> Result { if args.is_empty() || args.len() > 3 { return exec_err!("array_sort expects one to three arguments"); } @@ -218,8 +217,7 @@ pub fn array_sort_inner(args: &[ArrayRef]) -> Result { } } -/// Array_sort SQL function -pub fn array_sort_generic( +fn array_sort_generic( list_array: &GenericListArray, field: &FieldRef, sort_options: Option, diff --git a/datafusion/functions-nested/src/string.rs b/datafusion/functions-nested/src/string.rs index b87ac0f8c41d..e19025cf673e 100644 --- a/datafusion/functions-nested/src/string.rs +++ b/datafusion/functions-nested/src/string.rs @@ -329,8 +329,7 @@ impl ScalarUDFImpl for StringToArray { } } -/// Array_to_string SQL function -pub(super) fn array_to_string_inner(args: &[ArrayRef]) -> Result { +fn array_to_string_inner(args: &[ArrayRef]) -> Result { if args.len() < 2 || args.len() > 3 { return exec_err!("array_to_string expects two or three arguments"); } diff --git a/datafusion/functions/src/core/overlay.rs b/datafusion/functions/src/core/overlay.rs index 165bc571afe0..0b3bb2ce7413 100644 --- a/datafusion/functions/src/core/overlay.rs +++ b/datafusion/functions/src/core/overlay.rs @@ -201,7 +201,7 @@ fn overlay(args: &[ArrayRef]) -> Result { } } -pub fn string_overlay(args: &[ArrayRef]) -> Result { +fn string_overlay(args: &[ArrayRef]) -> Result { match args.len() { 3 => { let string_array = as_generic_string_array::(&args[0])?; @@ -227,7 +227,7 @@ pub fn string_overlay(args: &[ArrayRef]) -> Result } } -pub fn string_view_overlay(args: &[ArrayRef]) -> Result { +fn string_view_overlay(args: &[ArrayRef]) -> Result { match args.len() { 3 => { let string_array = as_string_view_array(&args[0])?; diff --git a/datafusion/functions/src/core/version.rs b/datafusion/functions/src/core/version.rs index ef3c5aafa480..006da4b132ad 100644 --- a/datafusion/functions/src/core/version.rs +++ b/datafusion/functions/src/core/version.rs @@ -53,7 +53,7 @@ impl Default for VersionFunc { impl VersionFunc { pub fn new() -> Self { Self { - signature: Signature::exact(vec![], Volatility::Immutable), + signature: Signature::nullary(Volatility::Immutable), } } } diff --git a/datafusion/functions/src/math/iszero.rs b/datafusion/functions/src/math/iszero.rs index 68cd3aca28fd..f053256a4870 100644 --- a/datafusion/functions/src/math/iszero.rs +++ b/datafusion/functions/src/math/iszero.rs @@ -96,7 +96,7 @@ impl ScalarUDFImpl for IsZeroFunc { } /// Iszero SQL function -pub fn iszero(args: &[ArrayRef]) -> Result { +fn iszero(args: &[ArrayRef]) -> Result { match args[0].data_type() { Float64 => Ok(Arc::new(BooleanArray::from_unary( args[0].as_primitive::(), diff --git a/datafusion/functions/src/math/round.rs b/datafusion/functions/src/math/round.rs index 837f0be43240..5f9b1eb6ad58 100644 --- a/datafusion/functions/src/math/round.rs +++ b/datafusion/functions/src/math/round.rs @@ -124,7 +124,7 @@ impl ScalarUDFImpl for RoundFunc { } /// Round SQL function -pub fn round(args: &[ArrayRef]) -> Result { +fn round(args: &[ArrayRef]) -> Result { if args.len() != 1 && args.len() != 2 { return exec_err!( "round function requires one or two arguments, got {}", diff --git a/datafusion/functions/src/math/signum.rs b/datafusion/functions/src/math/signum.rs index bbe6178f39b7..2e616fe0fe35 100644 --- a/datafusion/functions/src/math/signum.rs +++ b/datafusion/functions/src/math/signum.rs @@ -107,7 +107,7 @@ impl ScalarUDFImpl for SignumFunc { } /// signum SQL function -pub fn signum(args: &[ArrayRef]) -> Result { +fn signum(args: &[ArrayRef]) -> Result { match args[0].data_type() { Float64 => Ok(Arc::new( args[0] diff --git a/datafusion/functions/src/regex/regexpcount.rs b/datafusion/functions/src/regex/regexpcount.rs index e61b3f764dc6..ae08ca3e920c 100644 --- a/datafusion/functions/src/regex/regexpcount.rs +++ b/datafusion/functions/src/regex/regexpcount.rs @@ -183,7 +183,7 @@ pub fn regexp_count_func(args: &[ArrayRef]) -> Result { /// /// # Errors /// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile. -pub fn regexp_count( +fn regexp_count( values: &dyn Array, regex_array: &dyn Datum, start_array: Option<&dyn Datum>, diff --git a/datafusion/functions/src/regex/regexpinstr.rs b/datafusion/functions/src/regex/regexpinstr.rs index 10fddda1a35b..011564866584 100644 --- a/datafusion/functions/src/regex/regexpinstr.rs +++ b/datafusion/functions/src/regex/regexpinstr.rs @@ -205,7 +205,7 @@ pub fn regexp_instr_func(args: &[ArrayRef]) -> Result { /// /// # Errors /// Returns an error if the input arrays have mismatched lengths or if the regular expression fails to compile. -pub fn regexp_instr( +fn regexp_instr( values: &dyn Array, regex_array: &dyn Datum, start_array: Option<&dyn Datum>, @@ -233,48 +233,48 @@ pub fn regexp_instr( match (values.data_type(), regex_array.data_type(), flags_array) { (Utf8, Utf8, None) => regexp_instr_inner( - values.as_string::(), - regex_array.as_string::(), + &values.as_string::(), + ®ex_array.as_string::(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), None, subexpr_array.map(|subexpr| subexpr.as_primitive::()), ), (Utf8, Utf8, Some(flags_array)) if *flags_array.data_type() == Utf8 => regexp_instr_inner( - values.as_string::(), - regex_array.as_string::(), + &values.as_string::(), + ®ex_array.as_string::(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), Some(flags_array.as_string::()), subexpr_array.map(|subexpr| subexpr.as_primitive::()), ), (LargeUtf8, LargeUtf8, None) => regexp_instr_inner( - values.as_string::(), - regex_array.as_string::(), + &values.as_string::(), + ®ex_array.as_string::(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), None, subexpr_array.map(|subexpr| subexpr.as_primitive::()), ), (LargeUtf8, LargeUtf8, Some(flags_array)) if *flags_array.data_type() == LargeUtf8 => regexp_instr_inner( - values.as_string::(), - regex_array.as_string::(), + &values.as_string::(), + ®ex_array.as_string::(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), Some(flags_array.as_string::()), subexpr_array.map(|subexpr| subexpr.as_primitive::()), ), (Utf8View, Utf8View, None) => regexp_instr_inner( - values.as_string_view(), - regex_array.as_string_view(), + &values.as_string_view(), + ®ex_array.as_string_view(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), None, subexpr_array.map(|subexpr| subexpr.as_primitive::()), ), (Utf8View, Utf8View, Some(flags_array)) if *flags_array.data_type() == Utf8View => regexp_instr_inner( - values.as_string_view(), - regex_array.as_string_view(), + &values.as_string_view(), + ®ex_array.as_string_view(), start_array.map(|start| start.as_primitive::()), nth_array.map(|nth| nth.as_primitive::()), Some(flags_array.as_string_view()), @@ -287,10 +287,9 @@ pub fn regexp_instr( } #[allow(clippy::too_many_arguments)] -#[expect(clippy::needless_pass_by_value)] -pub fn regexp_instr_inner<'a, S>( - values: S, - regex_array: S, +fn regexp_instr_inner<'a, S>( + values: &S, + regex_array: &S, start_array: Option<&Int64Array>, nth_array: Option<&Int64Array>, flags_array: Option, diff --git a/datafusion/functions/src/regex/regexpreplace.rs b/datafusion/functions/src/regex/regexpreplace.rs index f9866427139f..29da195c7a92 100644 --- a/datafusion/functions/src/regex/regexpreplace.rs +++ b/datafusion/functions/src/regex/regexpreplace.rs @@ -76,7 +76,7 @@ Additional examples can be found [here](https://github.com/apache/datafusion/blo argument( name = "flags", description = r#"Optional regular expression flags that control the behavior of the regular expression. The following flags are supported: -- **g**: (global) Search globally and don't return after the first match +- **g**: (global) Search globally and don't return after the first match - **i**: case-insensitive: letters match both upper and lower case - **m**: multi-line mode: ^ and $ match begin/end of line - **s**: allow . to match \n @@ -382,49 +382,32 @@ where } } -#[expect(clippy::needless_pass_by_value)] -fn _regexp_replace_early_abort( - input_array: T, - sz: usize, -) -> Result { - // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments - // are actually null, then the result will be an array of the same size as the first argument with all nulls. - // - // Also acts like an early abort mechanism when the input array is empty. - Ok(new_null_array(input_array.data_type(), sz)) -} - /// Get the first argument from the given string array. /// /// Note: If the array is empty or the first argument is null, -/// then calls the given early abort function. +/// then aborts early. macro_rules! fetch_string_arg { - ($ARG:expr, $NAME:expr, $EARLY_ABORT:ident, $ARRAY_SIZE:expr) => {{ + ($ARG:expr, $NAME:expr, $ARRAY_SIZE:expr) => {{ let string_array_type = ($ARG).data_type(); match string_array_type { + dt if $ARG.len() == 0 || $ARG.is_null(0) => { + // Mimicking the existing behavior of regexp_replace, if any of the scalar arguments + // are actually null, then the result will be an array of the same size as the first argument with all nulls. + // + // Also acts like an early abort mechanism when the input array is empty. + return Ok(new_null_array(dt, $ARRAY_SIZE)); + } DataType::Utf8 => { let array = as_string_array($ARG)?; - if array.len() == 0 || array.is_null(0) { - return $EARLY_ABORT(array, $ARRAY_SIZE); - } else { - array.value(0) - } + array.value(0) } DataType::LargeUtf8 => { let array = as_large_string_array($ARG)?; - if array.len() == 0 || array.is_null(0) { - return $EARLY_ABORT(array, $ARRAY_SIZE); - } else { - array.value(0) - } + array.value(0) } DataType::Utf8View => { let array = as_string_view_array($ARG)?; - if array.len() == 0 || array.is_null(0) { - return $EARLY_ABORT(array, $ARRAY_SIZE); - } else { - array.value(0) - } + array.value(0) } _ => unreachable!( "Invalid data type for regexp_replace: {}", @@ -443,17 +426,11 @@ fn _regexp_replace_static_pattern_replace( args: &[ArrayRef], ) -> Result { let array_size = args[0].len(); - let pattern = - fetch_string_arg!(&args[1], "pattern", _regexp_replace_early_abort, array_size); - let replacement = fetch_string_arg!( - &args[2], - "replacement", - _regexp_replace_early_abort, - array_size - ); + let pattern = fetch_string_arg!(&args[1], "pattern", array_size); + let replacement = fetch_string_arg!(&args[2], "replacement", array_size); let flags = match args.len() { 3 => None, - 4 => Some(fetch_string_arg!(&args[3], "flags", _regexp_replace_early_abort, array_size)), + 4 => Some(fetch_string_arg!(&args[3], "flags", array_size)), other => { return exec_err!( "regexp_replace was called with {other} arguments. It requires at least 3 and at most 4." @@ -538,7 +515,7 @@ fn _regexp_replace_static_pattern_replace( /// Determine which implementation of the regexp_replace to use based /// on the given set of arguments. -pub fn specialize_regexp_replace( +fn specialize_regexp_replace( args: &[ColumnarValue], ) -> Result { // This will serve as a dispatch table where we can diff --git a/datafusion/functions/src/string/chr.rs b/datafusion/functions/src/string/chr.rs index 4d2beafbae53..8706c43214ea 100644 --- a/datafusion/functions/src/string/chr.rs +++ b/datafusion/functions/src/string/chr.rs @@ -33,7 +33,7 @@ use datafusion_macros::user_doc; /// Returns the character with the given code. /// chr(65) = 'A' -pub fn chr(args: &[ArrayRef]) -> Result { +fn chr(args: &[ArrayRef]) -> Result { let integer_array = as_int64_array(&args[0])?; let mut builder = GenericStringBuilder::::with_capacity( diff --git a/datafusion/functions/src/string/common.rs b/datafusion/functions/src/string/common.rs index 6bce289edb4c..34f1b6232d41 100644 --- a/datafusion/functions/src/string/common.rs +++ b/datafusion/functions/src/string/common.rs @@ -32,6 +32,7 @@ use datafusion_common::Result; use datafusion_common::{exec_err, ScalarValue}; use datafusion_expr::ColumnarValue; +#[derive(Copy, Clone)] pub(crate) enum TrimType { Left, Right, @@ -48,7 +49,6 @@ impl Display for TrimType { } } -#[expect(clippy::needless_pass_by_value)] pub(crate) fn general_trim( args: &[ArrayRef], trim_type: TrimType, diff --git a/datafusion/functions/src/string/concat.rs b/datafusion/functions/src/string/concat.rs index a93e70e714e8..3b53660463d4 100644 --- a/datafusion/functions/src/string/concat.rs +++ b/datafusion/functions/src/string/concat.rs @@ -287,7 +287,7 @@ impl ScalarUDFImpl for ConcatFunc { } } -pub fn simplify_concat(args: Vec) -> Result { +pub(crate) fn simplify_concat(args: Vec) -> Result { let mut new_args = Vec::with_capacity(args.len()); let mut contiguous_scalar = "".to_string(); diff --git a/datafusion/functions/src/string/split_part.rs b/datafusion/functions/src/string/split_part.rs index b32eba990d76..c8b293f29811 100644 --- a/datafusion/functions/src/string/split_part.rs +++ b/datafusion/functions/src/string/split_part.rs @@ -123,64 +123,64 @@ impl ScalarUDFImpl for SplitPartFunc { let result = match (args[0].data_type(), args[1].data_type()) { (DataType::Utf8View, DataType::Utf8View) => { split_part_impl::<&StringViewArray, &StringViewArray, i32>( - args[0].as_string_view(), - args[1].as_string_view(), + &args[0].as_string_view(), + &args[1].as_string_view(), n_array, ) } (DataType::Utf8View, DataType::Utf8) => { split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - args[0].as_string_view(), - args[1].as_string::(), + &args[0].as_string_view(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8View, DataType::LargeUtf8) => { split_part_impl::<&StringViewArray, &GenericStringArray, i32>( - args[0].as_string_view(), - args[1].as_string::(), + &args[0].as_string_view(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8, DataType::Utf8View) => { split_part_impl::<&GenericStringArray, &StringViewArray, i32>( - args[0].as_string::(), - args[1].as_string_view(), + &args[0].as_string::(), + &args[1].as_string_view(), n_array, ) } (DataType::LargeUtf8, DataType::Utf8View) => { split_part_impl::<&GenericStringArray, &StringViewArray, i64>( - args[0].as_string::(), - args[1].as_string_view(), + &args[0].as_string::(), + &args[1].as_string_view(), n_array, ) } (DataType::Utf8, DataType::Utf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::LargeUtf8, DataType::LargeUtf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::Utf8, DataType::LargeUtf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i32>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } (DataType::LargeUtf8, DataType::Utf8) => { split_part_impl::<&GenericStringArray, &GenericStringArray, i64>( - args[0].as_string::(), - args[1].as_string::(), + &args[0].as_string::(), + &args[1].as_string::(), n_array, ) } @@ -200,11 +200,9 @@ impl ScalarUDFImpl for SplitPartFunc { } } -/// impl -#[expect(clippy::needless_pass_by_value)] -pub fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( - string_array: StringArrType, - delimiter_array: DelimiterArrType, +fn split_part_impl<'a, StringArrType, DelimiterArrType, StringArrayLen>( + string_array: &StringArrType, + delimiter_array: &DelimiterArrType, n_array: &Int64Array, ) -> Result where diff --git a/datafusion/functions/src/string/to_hex.rs b/datafusion/functions/src/string/to_hex.rs index 26be0066c2df..4000f3bb3be2 100644 --- a/datafusion/functions/src/string/to_hex.rs +++ b/datafusion/functions/src/string/to_hex.rs @@ -39,7 +39,7 @@ use datafusion_macros::user_doc; /// Converts the number to its equivalent hexadecimal representation. /// to_hex(2147483647) = '7fffffff' -pub fn to_hex(args: &[ArrayRef]) -> Result +fn to_hex(args: &[ArrayRef]) -> Result where T::Native: std::fmt::LowerHex, { diff --git a/datafusion/functions/src/unicode/find_in_set.rs b/datafusion/functions/src/unicode/find_in_set.rs index 649bd631d142..e83e3d99a329 100644 --- a/datafusion/functions/src/unicode/find_in_set.rs +++ b/datafusion/functions/src/unicode/find_in_set.rs @@ -263,10 +263,7 @@ fn find_in_set(str: &ArrayRef, str_list: &ArrayRef) -> Result { } } -pub fn find_in_set_general<'a, T, V>( - string_array: V, - str_list_array: V, -) -> Result +fn find_in_set_general<'a, T, V>(string_array: V, str_list_array: V) -> Result where T: ArrowPrimitiveType, T::Native: OffsetSizeTrait, diff --git a/datafusion/functions/src/unicode/left.rs b/datafusion/functions/src/unicode/left.rs index fceb2a131a2b..ec7ec456ab8b 100644 --- a/datafusion/functions/src/unicode/left.rs +++ b/datafusion/functions/src/unicode/left.rs @@ -122,7 +122,7 @@ impl ScalarUDFImpl for LeftFunc { /// Returns first n characters in the string, or when n is negative, returns all but last |n| characters. /// left('abcde', 2) = 'ab' /// The implementation uses UTF-8 code points as characters -pub fn left(args: &[ArrayRef]) -> Result { +fn left(args: &[ArrayRef]) -> Result { let n_array = as_int64_array(&args[1])?; if args[0].data_type() == &DataType::Utf8View { diff --git a/datafusion/functions/src/unicode/lpad.rs b/datafusion/functions/src/unicode/lpad.rs index b69af247b9ad..6940459b177a 100644 --- a/datafusion/functions/src/unicode/lpad.rs +++ b/datafusion/functions/src/unicode/lpad.rs @@ -129,7 +129,7 @@ impl ScalarUDFImpl for LPadFunc { /// Extends the string to length 'length' by prepending the characters fill (a space by default). /// If the string is already longer than length then it is truncated (on the right). /// lpad('hi', 5, 'xy') = 'xyxhi' -pub fn lpad(args: &[ArrayRef]) -> Result { +fn lpad(args: &[ArrayRef]) -> Result { if args.len() <= 1 || args.len() > 3 { return exec_err!( "lpad was called with {} arguments. It requires at least 2 and at most 3.", diff --git a/datafusion/functions/src/unicode/reverse.rs b/datafusion/functions/src/unicode/reverse.rs index b5f870d54bba..56f6048d6b6e 100644 --- a/datafusion/functions/src/unicode/reverse.rs +++ b/datafusion/functions/src/unicode/reverse.rs @@ -106,7 +106,7 @@ impl ScalarUDFImpl for ReverseFunc { /// Reverses the order of the characters in the string `reverse('abcde') = 'edcba'`. /// The implementation uses UTF-8 code points as characters -pub fn reverse(args: &[ArrayRef]) -> Result { +fn reverse(args: &[ArrayRef]) -> Result { if args[0].data_type() == &Utf8View { reverse_impl::(&args[0].as_string_view()) } else { diff --git a/datafusion/functions/src/unicode/right.rs b/datafusion/functions/src/unicode/right.rs index c492f606e9c5..670586e11b4f 100644 --- a/datafusion/functions/src/unicode/right.rs +++ b/datafusion/functions/src/unicode/right.rs @@ -122,7 +122,7 @@ impl ScalarUDFImpl for RightFunc { /// Returns last n characters in the string, or when n is negative, returns all but first |n| characters. /// right('abcde', 2) = 'de' /// The implementation uses UTF-8 code points as characters -pub fn right(args: &[ArrayRef]) -> Result { +fn right(args: &[ArrayRef]) -> Result { let n_array = as_int64_array(&args[1])?; if args[0].data_type() == &DataType::Utf8View { // string_view_right(args) diff --git a/datafusion/functions/src/unicode/rpad.rs b/datafusion/functions/src/unicode/rpad.rs index d644df9874eb..a7e951051d7c 100644 --- a/datafusion/functions/src/unicode/rpad.rs +++ b/datafusion/functions/src/unicode/rpad.rs @@ -145,7 +145,7 @@ impl ScalarUDFImpl for RPadFunc { } } -pub fn rpad( +fn rpad( args: &[ArrayRef], ) -> Result { if args.len() < 2 || args.len() > 3 { @@ -205,7 +205,7 @@ pub fn rpad( /// Extends the string to length 'length' by appending the characters fill (a space by default). If the string is already longer than length then it is truncated. /// rpad('hi', 5, 'xy') = 'hixyx' -pub fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>( +fn rpad_impl<'a, StringArrType, FillArrType, StringArrayLen>( string_array: &StringArrType, length_array: &Int64Array, fill_array: Option, diff --git a/datafusion/functions/src/unicode/substr.rs b/datafusion/functions/src/unicode/substr.rs index 6eee49d49093..27b194ca2b99 100644 --- a/datafusion/functions/src/unicode/substr.rs +++ b/datafusion/functions/src/unicode/substr.rs @@ -141,7 +141,7 @@ impl ScalarUDFImpl for SubstrFunc { /// substr('alphabet', 3) = 'phabet' /// substr('alphabet', 3, 2) = 'ph' /// The implementation uses UTF-8 code points as characters -pub fn substr(args: &[ArrayRef]) -> Result { +fn substr(args: &[ArrayRef]) -> Result { match args[0].data_type() { DataType::Utf8 => { let string_array = args[0].as_string::(); diff --git a/datafusion/functions/src/unicode/substrindex.rs b/datafusion/functions/src/unicode/substrindex.rs index a7ee7388f901..bf5978720692 100644 --- a/datafusion/functions/src/unicode/substrindex.rs +++ b/datafusion/functions/src/unicode/substrindex.rs @@ -169,7 +169,7 @@ fn substr_index(args: &[ArrayRef]) -> Result { } } -pub fn substr_index_general< +fn substr_index_general< 'a, T: ArrowPrimitiveType, V: ArrayAccessor,