diff --git a/datafusion/functions-nested/src/make_array.rs b/datafusion/functions-nested/src/make_array.rs index 32af5df2c6019..427ea06daebb0 100644 --- a/datafusion/functions-nested/src/make_array.rs +++ b/datafusion/functions-nested/src/make_array.rs @@ -198,13 +198,26 @@ pub fn array_array( return plan_err!("Array requires at least one argument"); } + // Widen the element type so inputs that differ only in nested-field + // nullability (e.g. a struct field that is non-nullable in one argument and + // nullable in another) can be combined. Without this, `MutableArrayData` + // below panics with "Arrays with inconsistent types". See issue #22366. + let data_type = merge_nullability( + std::iter::once(data_type.clone()) + .chain(args.iter().map(|arg| arg.data_type().clone())), + ) + .unwrap_or(data_type); + let mut data = vec![]; let mut total_len = 0; for arg in args { let arg_data = if arg.as_any().is::() { ArrayData::new_empty(&data_type) - } else { + } else if arg.data_type() == &data_type { arg.to_data() + } else { + // Only nullability metadata differs; this cast widens it cheaply. + arrow::compute::cast(arg, &data_type)?.to_data() }; total_len += arg_data.len(); data.push(arg_data); @@ -241,8 +254,45 @@ pub fn array_array( )?)) } +/// Merge `data_types` into a single type, OR-ing the nullable flag at every +/// nesting level (struct fields, list elements, ...). `Null` inputs are +/// ignored. +/// +/// Returns `None` when there is no non-null input or the inputs are +/// structurally incompatible (different field names, element types, ...), in +/// which case callers fall back to their previous behavior. +/// +/// This is what allows e.g. `make_array(named_struct('i', 1), some_column)` to +/// succeed when the literal yields a non-nullable field and the column yields a +/// nullable one: the result widens both to nullable. See issue #22366. +fn merge_nullability(data_types: impl IntoIterator) -> Option { + let mut merged: Option = None; + for data_type in data_types { + if data_type.is_null() { + continue; + } + let field = Field::new(Field::LIST_FIELD_DEFAULT_NAME, data_type, true); + match merged { + None => merged = Some(field), + // `Field::try_merge` ORs nullability and recurses into nested types; + // it errors only when the structures are incompatible. + Some(ref mut m) => m.try_merge(&field).ok()?, + } + } + merged.map(|f| f.data_type().clone()) +} + pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result> { if let Ok(unified) = try_type_union_resolution_with_struct(arg_types) { + // `try_type_union_resolution_with_struct` coerces the inner field types + // but preserves each argument's own nested-field nullability, so the + // returned struct types can still differ in nullability. Widen them to + // a single common type, both so the declared return type matches the + // value produced at runtime and so the arguments can actually be + // combined. See issue #22366. + if let Some(merged) = merge_nullability(unified.iter().cloned()) { + return Ok(vec![merged; unified.len()]); + } return Ok(unified); } @@ -256,3 +306,45 @@ pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result ArrayRef { + let field = Arc::new(Field::new("i", DataType::Int32, nullable)); + let values = Arc::new(Int32Array::from(vec![1])) as ArrayRef; + Arc::new(StructArray::new( + Fields::from(vec![field]), + vec![values], + None, + )) + } + + /// Reproduces https://github.com/apache/datafusion/issues/22366: + /// `make_array` over structs that differ only in nested-field nullability + /// must succeed (widening nullable flags) rather than panic in + /// `MutableArrayData`. + #[test] + fn make_array_relaxes_nested_field_nullability() { + let non_nullable = struct_array_with_field_nullable(false); + let nullable = struct_array_with_field_nullable(true); + + let result = make_array_inner(&[non_nullable, nullable]).expect( + "make_array should accept structs differing only in field nullability", + ); + + // The result is a single-row list of two struct elements. + let list = result + .as_any() + .downcast_ref::>() + .expect("expected a List array"); + assert_eq!(list.len(), 1); + assert_eq!(list.value(0).len(), 2); + } +} diff --git a/datafusion/sqllogictest/test_files/array/make_array.slt b/datafusion/sqllogictest/test_files/array/make_array.slt index fa91dcbaabc74..bc4f5a89eee0e 100644 --- a/datafusion/sqllogictest/test_files/array/make_array.slt +++ b/datafusion/sqllogictest/test_files/array/make_array.slt @@ -163,4 +163,38 @@ from arrays_values_without_nulls; [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]] +# make_array over structs built from a literal (non-nullable fields) and a +# column (nullable fields). The element types differ only in nested-field +# nullability; make_array must widen them to a common type rather than fail. +# See https://github.com/apache/datafusion/issues/22366 +statement ok +create table struct_values(i int) as values (1), (NULL), (3); + +query ? +select make_array(named_struct('i', 10), named_struct('i', i)) from struct_values order by 1; +---- +[{i: 10}, {i: 1}] +[{i: 10}, {i: 3}] +[{i: 10}, {i: NULL}] + +query T +select arrow_typeof(make_array(named_struct('i', 10), named_struct('i', i))) from struct_values limit 1; +---- +List(Struct("i": Int64)) + +# nested struct case +query ? +select make_array( + named_struct('a', 1, 'b', named_struct('c', 2)), + named_struct('a', i, 'b', named_struct('c', i)) + ) +from struct_values order by 1; +---- +[{a: 1, b: {c: 2}}, {a: 1, b: {c: 1}}] +[{a: 1, b: {c: 2}}, {a: 3, b: {c: 3}}] +[{a: 1, b: {c: 2}}, {a: NULL, b: {c: NULL}}] + +statement ok +drop table struct_values; + include ./cleanup.slt.part