Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 93 additions & 1 deletion datafusion/functions-nested/src/make_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,13 +198,26 @@ pub fn array_array<O: OffsetSizeTrait>(
return plan_err!("Array requires at least one argument");
}

// Widen the element type so inputs that differ only in nested-field
// nullability (e.g. a struct field that is non-nullable in one argument and
// nullable in another) can be combined. Without this, `MutableArrayData`
// below panics with "Arrays with inconsistent types". See issue #22366.
let data_type = merge_nullability(
std::iter::once(data_type.clone())
.chain(args.iter().map(|arg| arg.data_type().clone())),
)
.unwrap_or(data_type);

let mut data = vec![];
let mut total_len = 0;
for arg in args {
let arg_data = if arg.as_any().is::<NullArray>() {
ArrayData::new_empty(&data_type)
} else {
} else if arg.data_type() == &data_type {
arg.to_data()
} else {
// Only nullability metadata differs; this cast widens it cheaply.
arrow::compute::cast(arg, &data_type)?.to_data()
};
total_len += arg_data.len();
data.push(arg_data);
Expand Down Expand Up @@ -241,8 +254,45 @@ pub fn array_array<O: OffsetSizeTrait>(
)?))
}

/// Merge `data_types` into a single type, OR-ing the nullable flag at every
/// nesting level (struct fields, list elements, ...). `Null` inputs are
/// ignored.
///
/// Returns `None` when there is no non-null input or the inputs are
/// structurally incompatible (different field names, element types, ...), in
/// which case callers fall back to their previous behavior.
///
/// This is what allows e.g. `make_array(named_struct('i', 1), some_column)` to
/// succeed when the literal yields a non-nullable field and the column yields a
/// nullable one: the result widens both to nullable. See issue #22366.
fn merge_nullability(data_types: impl IntoIterator<Item = DataType>) -> Option<DataType> {
let mut merged: Option<Field> = None;
for data_type in data_types {
if data_type.is_null() {
continue;
}
let field = Field::new(Field::LIST_FIELD_DEFAULT_NAME, data_type, true);
match merged {
None => merged = Some(field),
// `Field::try_merge` ORs nullability and recurses into nested types;
// it errors only when the structures are incompatible.
Some(ref mut m) => m.try_merge(&field).ok()?,
}
}
merged.map(|f| f.data_type().clone())
}

pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result<Vec<DataType>> {
if let Ok(unified) = try_type_union_resolution_with_struct(arg_types) {
// `try_type_union_resolution_with_struct` coerces the inner field types
// but preserves each argument's own nested-field nullability, so the
// returned struct types can still differ in nullability. Widen them to
// a single common type, both so the declared return type matches the
// value produced at runtime and so the arguments can actually be
// combined. See issue #22366.
if let Some(merged) = merge_nullability(unified.iter().cloned()) {
return Ok(vec![merged; unified.len()]);
}
return Ok(unified);
}

Expand All @@ -256,3 +306,45 @@ pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result<Vec<Data
)
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::array::{Int32Array, StructArray};
use arrow::datatypes::Fields;

/// Builds a single-row struct array `{ i: 1 }` whose `i` field has the
/// given nullability flag. The underlying data is identical regardless of
/// the flag.
fn struct_array_with_field_nullable(nullable: bool) -> ArrayRef {
let field = Arc::new(Field::new("i", DataType::Int32, nullable));
let values = Arc::new(Int32Array::from(vec![1])) as ArrayRef;
Arc::new(StructArray::new(
Fields::from(vec![field]),
vec![values],
None,
))
}

/// Reproduces https://github.com/apache/datafusion/issues/22366:
/// `make_array` over structs that differ only in nested-field nullability
/// must succeed (widening nullable flags) rather than panic in
/// `MutableArrayData`.
#[test]
fn make_array_relaxes_nested_field_nullability() {
let non_nullable = struct_array_with_field_nullable(false);
let nullable = struct_array_with_field_nullable(true);

let result = make_array_inner(&[non_nullable, nullable]).expect(
"make_array should accept structs differing only in field nullability",
);

// The result is a single-row list of two struct elements.
let list = result
.as_any()
.downcast_ref::<GenericListArray<i32>>()
.expect("expected a List array");
assert_eq!(list.len(), 1);
assert_eq!(list.value(0).len(), 2);
}
}
34 changes: 34 additions & 0 deletions datafusion/sqllogictest/test_files/array/make_array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -163,4 +163,38 @@ from arrays_values_without_nulls;
[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]]


# make_array over structs built from a literal (non-nullable fields) and a
# column (nullable fields). The element types differ only in nested-field
# nullability; make_array must widen them to a common type rather than fail.
# See https://github.com/apache/datafusion/issues/22366
statement ok
create table struct_values(i int) as values (1), (NULL), (3);

query ?
select make_array(named_struct('i', 10), named_struct('i', i)) from struct_values order by 1;
----
[{i: 10}, {i: 1}]
[{i: 10}, {i: 3}]
[{i: 10}, {i: NULL}]

query T
select arrow_typeof(make_array(named_struct('i', 10), named_struct('i', i))) from struct_values limit 1;
----
List(Struct("i": Int64))

# nested struct case
query ?
select make_array(
named_struct('a', 1, 'b', named_struct('c', 2)),
named_struct('a', i, 'b', named_struct('c', i))
)
from struct_values order by 1;
----
[{a: 1, b: {c: 2}}, {a: 1, b: {c: 1}}]
[{a: 1, b: {c: 2}}, {a: 3, b: {c: 3}}]
[{a: 1, b: {c: 2}}, {a: NULL, b: {c: NULL}}]

statement ok
drop table struct_values;

include ./cleanup.slt.part