Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions datafusion/core/tests/sql/select.rs
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,38 @@ async fn prepared_statement_type_coercion() -> Result<()> {
Ok(())
}

#[tokio::test]
async fn make_array_null_typed_column_preserves_rows() -> Result<()> {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's remove this test as the SLT one is sufficient

let ctx = SessionContext::new();
let batch = RecordBatch::try_from_iter(vec![
("id", Arc::new(Int32Array::from(vec![1, 2, 3])) as ArrayRef),
("n", Arc::new(NullArray::new(3)) as ArrayRef),
])?;
ctx.register_batch("test", batch)?;

let results = ctx
.sql(
"SELECT id, make_array(n) AS arr, array_length(make_array(n)) AS len \
FROM test \
ORDER BY id",
)
.await?
.collect()
.await?;

assert_snapshot!(batches_to_sort_string(&results), @r"
+----+-----+-----+
| id | arr | len |
+----+-----+-----+
| 1 | [] | 1 |
| 2 | [] | 1 |
| 3 | [] | 1 |
+----+-----+-----+
");

Ok(())
}

#[tokio::test]
async fn test_parameter_type_coercion() -> Result<()> {
let ctx = SessionContext::new();
Expand Down
56 changes: 43 additions & 13 deletions datafusion/functions-nested/src/make_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,7 @@ use arrow::array::{
NullArray, OffsetSizeTrait, new_null_array,
};
use arrow::buffer::OffsetBuffer;
use arrow::datatypes::DataType;
use arrow::datatypes::{DataType::Null, Field};
use arrow::datatypes::{DataType, Field};
use datafusion_common::utils::SingleRowListArrayBuilder;
use datafusion_common::{Result, plan_err};
use datafusion_expr::binary::{
Expand Down Expand Up @@ -96,7 +95,7 @@ impl ScalarUDFImpl for MakeArray {

fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
let element_type = if arg_types.is_empty() {
Null
DataType::Null
} else {
// At this point, all the type in array should be coerced to the same one.
arg_types[0].to_owned()
Expand Down Expand Up @@ -130,20 +129,23 @@ impl ScalarUDFImpl for MakeArray {
/// Constructs an array using the input `data` as `ArrayRef`.
/// Returns a reference-counted `Array` instance result.
pub(crate) fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
let data_type = arrays.iter().find_map(|arg| {
let arg_type = arg.data_type();
(!arg_type.is_null()).then_some(arg_type)
});

let data_type = data_type.unwrap_or(&Null);
if data_type.is_null() {
// Either an empty array or all nulls:
let length = arrays.iter().map(|a| a.len()).sum();
let array = new_null_array(&Null, length);
// Zero arguments are the only case that should build a scalar empty list.
if arrays.is_empty() {
let array = new_null_array(&DataType::Null, 0);
Ok(Arc::new(
SingleRowListArrayBuilder::new(array).build_list_array(),
))
} else {
// All-null inputs still need to flow through `array_array()` so rows
// are built per input row instead of collapsing to one value.
let data_type = arrays
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel we can get this from the return type, which is provided in the ScalarFunctionArgs in invoke_with_args, so we some plumbing we can reuse that instead of having this logic

.iter()
.find_map(|arg| {
let arg_type = arg.data_type();
(!arg_type.is_null()).then_some(arg_type)
})
.unwrap_or(&DataType::Null);

array_array::<i32>(arrays, data_type.clone(), Field::LIST_FIELD_DEFAULT_NAME)
}
}
Expand Down Expand Up @@ -256,3 +258,31 @@ pub fn coerce_types_inner(arg_types: &[DataType], name: &str) -> Result<Vec<Data
)
}
}

#[cfg(test)]
mod tests {
use super::*;
use arrow::array::ListArray;

#[test]
fn make_array_inner_all_null_arrays_preserves_row_count_and_width() {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same for these unit tests

let inputs = vec![
Arc::new(NullArray::new(3)) as ArrayRef,
Arc::new(NullArray::new(3)) as ArrayRef,
];

let result = make_array_inner(&inputs).unwrap();
let list = result.as_any().downcast_ref::<ListArray>().unwrap();

assert_eq!(list.len(), 3);
assert_eq!(list.value_type(), DataType::Null);
assert_eq!(list.values().len(), 6);

for row in 0..list.len() {
assert_eq!(list.value_length(row), 2);
let values = list.value(row);
assert_eq!(values.len(), 2);
assert_eq!(values.logical_null_count(), 2);
}
}
}
65 changes: 45 additions & 20 deletions datafusion/spark/src/function/array/spark_array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,28 +110,53 @@ impl ScalarUDFImpl for SparkArray {
/// Constructs an array using the input `data` as `ArrayRef`.
/// Returns a reference-counted `Array` instance result.
pub fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> {
let mut data_type = DataType::Null;
for arg in arrays {
let arg_data_type = arg.data_type();
if !arg_data_type.equals_datatype(&DataType::Null) {
data_type = arg_data_type.clone();
break;
}
// Zero arguments are the only case that should build a scalar empty list.
if arrays.is_empty() {
let array = new_null_array(&DataType::Null, 0);
Ok(Arc::new(
SingleRowListArrayBuilder::new(array)
.with_field_name(Some(ARRAY_FIELD_DEFAULT_NAME.to_string()))
.build_list_array(),
))
} else {
// All-null inputs still need to flow through `array_array()` so rows
// are built per input row instead of collapsing to one value.
let data_type = arrays
.iter()
.find_map(|arg| {
let arg_type = arg.data_type();
(!arg_type.is_null()).then_some(arg_type)
})
.unwrap_or(&DataType::Null);
array_array::<i32>(arrays, data_type.clone(), ARRAY_FIELD_DEFAULT_NAME)
}
}

match data_type {
// Either an empty array or all nulls:
DataType::Null => {
let length = arrays.iter().map(|a| a.len()).sum();
// By default Int32
let array = new_null_array(&DataType::Null, length);
Ok(Arc::new(
SingleRowListArrayBuilder::new(array)
.with_nullable(true)
.with_field_name(Some(ARRAY_FIELD_DEFAULT_NAME.to_string()))
.build_list_array(),
))
#[cfg(test)]
mod tests {
use super::*;
use arrow::array::{ListArray, NullArray};
use arrow::datatypes::DataType;

#[test]
fn spark_array_inner_all_null_arrays_preserves_row_count_and_width() {
let inputs = vec![
Arc::new(NullArray::new(3)) as ArrayRef,
Arc::new(NullArray::new(3)) as ArrayRef,
];

let result = make_array_inner(&inputs).unwrap();
let list = result.as_any().downcast_ref::<ListArray>().unwrap();

assert_eq!(list.len(), 3);
assert_eq!(list.value_type(), DataType::Null);
assert_eq!(list.values().len(), 6);

for row in 0..list.len() {
assert_eq!(list.value_length(row), 2);
let values = list.value(row);
assert_eq!(values.len(), 2);
assert_eq!(values.logical_null_count(), 2);
}
_ => array_array::<i32>(arrays, data_type, ARRAY_FIELD_DEFAULT_NAME),
}
}
10 changes: 10 additions & 0 deletions datafusion/sqllogictest/test_files/array/make_array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,16 @@ select make_array(NULL), make_array(NULL, NULL, NULL), make_array(make_array(NUL
----
[NULL] [NULL, NULL, NULL] [[NULL, NULL], [NULL, NULL]]

# make_array with null-array parameter preserves input rows and list-null value
query I?I
select id, make_array(n), array_length(make_array(n))
from (values (1, NULL), (2, NULL), (3, NULL)) as t(id, n)
order by id;
----
1 [NULL] 1
2 [NULL] 1
3 [NULL] 1

# make_array with 1 columns
query ???
select make_array(a), make_array(d), make_array(e) from values;
Expand Down
10 changes: 10 additions & 0 deletions datafusion/sqllogictest/test_files/spark/array/array.slt
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,16 @@ SELECT array(null);
----
[NULL]

# array with null-array parameter preserves input rows and list-null value
query I?I
SELECT id, array(n), size(array(n))
FROM (VALUES (1, NULL), (2, NULL), (3, NULL)) AS t(id, n)
ORDER BY id;
----
1 [NULL] 1
2 [NULL] 1
3 [NULL] 1


query ?
SELECT array(1, NULL, 3);
Expand Down
Loading