From 0d3f04be7c42f86c6d1efdbed371b9fce76719ad Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 29 Sep 2025 16:16:14 +0900 Subject: [PATCH 1/2] fix: correct edge case where haystack with null element returns false instead of null --- datafusion/functions-nested/src/array_has.rs | 61 ++++++++++++++++++-- 1 file changed, 56 insertions(+), 5 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index f77cc5dd7b39..d3aab7a64e52 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -333,7 +333,7 @@ fn array_has_dispatch_for_scalar( let is_nested = values.data_type().is_nested(); // If first argument is empty list (second argument is non-null), return false // i.e. array_has([], non-null element) -> false - if values.is_empty() { + if haystack.len() == 0 { return Ok(Arc::new(BooleanArray::new( BooleanBuffer::new_unset(haystack.len()), None, @@ -658,11 +658,22 @@ fn general_array_has_all_and_any_kernel( #[cfg(test)] mod tests { - use arrow::array::create_array; - use datafusion_common::utils::SingleRowListArrayBuilder; + use std::sync::Arc; + + use arrow::{ + array::{ + create_array, Array, ArrayRef, AsArray, BooleanArray, Int32Array, ListArray, + }, + buffer::OffsetBuffer, + datatypes::{DataType, Field}, + }; + use datafusion_common::{ + config::ConfigOptions, utils::SingleRowListArrayBuilder, DataFusionError, + ScalarValue, + }; use datafusion_expr::{ - col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, Expr, - ScalarUDFImpl, + col, execution_props::ExecutionProps, lit, simplify::ExprSimplifyResult, + ColumnarValue, Expr, ScalarFunctionArgs, ScalarUDFImpl, }; use crate::expr_fn::make_array; @@ -737,4 +748,44 @@ mod tests { assert_eq!(args, vec![col("c1"), col("c2")],); } + + #[test] + fn test_array_has_list_empty_child() -> Result<(), DataFusionError> { + let haystack_field = Arc::new(Field::new_list( + "haystack", + Field::new_list("", Field::new("", DataType::Int32, true), true), + true, + )); + let needle_field = Arc::new(Field::new("needle", DataType::Int32, true)); + let return_field = Arc::new(Field::new_list( + "return", + Field::new("", DataType::Boolean, true), + true, + )); + + let haystack = ListArray::new( + Field::new_list_field(DataType::Int32, true).into(), + OffsetBuffer::new(vec![0, 0].into()), + Arc::new(Int32Array::from(Vec::::new())) as ArrayRef, + Some(vec![false].into()), + ); + + let haystack = ColumnarValue::Array(Arc::new(haystack)); + let needle = ColumnarValue::Scalar(ScalarValue::Int32(Some(1))); + + let result = ArrayHas::new().invoke_with_args(ScalarFunctionArgs { + args: vec![haystack, needle], + arg_fields: vec![haystack_field, needle_field], + number_rows: 1, + return_field, + config_options: Arc::new(ConfigOptions::default()), + })?; + + let output = result.into_array(1)?; + let output = output.as_boolean(); + assert_eq!(output.len(), 1); + assert!(output.is_null(0)); + + Ok(()) + } } From 2db977077a350bd6cd25255ca7fbdc32e9e8ad0c Mon Sep 17 00:00:00 2001 From: Jefffrey Date: Mon, 29 Sep 2025 16:29:43 +0900 Subject: [PATCH 2/2] clippy --- datafusion/functions-nested/src/array_has.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/datafusion/functions-nested/src/array_has.rs b/datafusion/functions-nested/src/array_has.rs index d3aab7a64e52..43aa5f4ae60d 100644 --- a/datafusion/functions-nested/src/array_has.rs +++ b/datafusion/functions-nested/src/array_has.rs @@ -661,9 +661,7 @@ mod tests { use std::sync::Arc; use arrow::{ - array::{ - create_array, Array, ArrayRef, AsArray, BooleanArray, Int32Array, ListArray, - }, + array::{create_array, Array, ArrayRef, AsArray, Int32Array, ListArray}, buffer::OffsetBuffer, datatypes::{DataType, Field}, };