From 6c7f309049532461a76c54eef504e56511b87e2a Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Sat, 27 Jul 2024 12:35:56 +1000 Subject: [PATCH 1/3] Remove unnecessary heap allocations in implementation of `named_struct_expr` caused by zipping then unzipping fields and values. --- datafusion/functions/src/core/named_struct.rs | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/datafusion/functions/src/core/named_struct.rs b/datafusion/functions/src/core/named_struct.rs index f71b1b00f0fe..85c332745355 100644 --- a/datafusion/functions/src/core/named_struct.rs +++ b/datafusion/functions/src/core/named_struct.rs @@ -70,20 +70,17 @@ fn named_struct_expr(args: &[ColumnarValue]) -> Result { } } - let arrays = ColumnarValue::values_to_arrays(&values)?; - - let fields = names + let fields: Fields = names .into_iter() - .zip(arrays) - .map(|(name, value)| { - ( - Arc::new(Field::new(name, value.data_type().clone(), true)), - value, - ) - }) - .collect::>(); + .zip(&values) + .map(|(name, value)| Arc::new(Field::new(name, value.data_type().clone(), true))) + .collect::>() + .into(); + + let arrays = ColumnarValue::values_to_arrays(&values)?; - Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields)))) + let struct_array = StructArray::new(fields, arrays, None); + Ok(ColumnarValue::Array(Arc::new(struct_array))) } #[derive(Debug)] From b6ab034951c5efe7db678df24eb6f13d90c90f28 Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Sun, 28 Jul 2024 14:57:56 +1000 Subject: [PATCH 2/3] Change implementation of `array_struct` to reduce number of allocations --- datafusion/functions/src/core/struct.rs | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index c3dee8b1ccb4..74378a9b9cc7 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -29,23 +29,23 @@ fn array_struct(args: &[ArrayRef]) -> Result { return exec_err!("struct requires at least one argument"); } - let vec: Vec<_> = args + let fields = args .iter() .enumerate() .map(|(i, arg)| { let field_name = format!("c{i}"); - Ok(( - Arc::new(Field::new( - field_name.as_str(), - arg.data_type().clone(), - true, - )), - Arc::clone(arg), - )) + Ok(Arc::new(Field::new( + field_name.as_str(), + arg.data_type().clone(), + true, + ))) }) - .collect::>>()?; + .collect::>>()? + .into(); - Ok(Arc::new(StructArray::from(vec))) + let arrays = args.to_vec(); + + Ok(Arc::new(StructArray::new(fields, arrays, None))) } /// put values in a struct array. @@ -53,6 +53,7 @@ fn struct_expr(args: &[ColumnarValue]) -> Result { let arrays = ColumnarValue::values_to_arrays(args)?; Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?)) } + #[derive(Debug)] pub struct StructFunc { signature: Signature, From dcaa7170494f34c3af9e1bd3b9059b70c6e15299 Mon Sep 17 00:00:00 2001 From: Alexander Rafferty Date: Sun, 1 Sep 2024 12:05:33 +1000 Subject: [PATCH 3/3] Remove tests already covered by `struct.slt` --- datafusion/functions/src/core/struct.rs | 45 ------------------------- 1 file changed, 45 deletions(-) diff --git a/datafusion/functions/src/core/struct.rs b/datafusion/functions/src/core/struct.rs index 74378a9b9cc7..bdddbb81beab 100644 --- a/datafusion/functions/src/core/struct.rs +++ b/datafusion/functions/src/core/struct.rs @@ -98,48 +98,3 @@ impl ScalarUDFImpl for StructFunc { struct_expr(args) } } - -#[cfg(test)] -mod tests { - use super::*; - use arrow::array::Int64Array; - use datafusion_common::cast::as_struct_array; - use datafusion_common::ScalarValue; - - #[test] - fn test_struct() { - // struct(1, 2, 3) = {"c0": 1, "c1": 2, "c2": 3} - let args = [ - ColumnarValue::Scalar(ScalarValue::Int64(Some(1))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(2))), - ColumnarValue::Scalar(ScalarValue::Int64(Some(3))), - ]; - let struc = struct_expr(&args) - .expect("failed to initialize function struct") - .into_array(1) - .expect("Failed to convert to array"); - let result = - as_struct_array(&struc).expect("failed to initialize function struct"); - assert_eq!( - &Int64Array::from(vec![1]), - Arc::clone(result.column_by_name("c0").unwrap()) - .as_any() - .downcast_ref::() - .unwrap() - ); - assert_eq!( - &Int64Array::from(vec![2]), - Arc::clone(result.column_by_name("c1").unwrap()) - .as_any() - .downcast_ref::() - .unwrap() - ); - assert_eq!( - &Int64Array::from(vec![3]), - Arc::clone(result.column_by_name("c2").unwrap()) - .as_any() - .downcast_ref::() - .unwrap() - ); - } -}