From 13d78d032fe04b55adae24812fd054bb1b3306a9 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 2 Aug 2021 22:44:53 -0700 Subject: [PATCH 1/2] Support date datatype in max/min. --- datafusion/src/physical_plan/aggregates.rs | 6 ++ .../src/physical_plan/expressions/min_max.rs | 78 ++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/datafusion/src/physical_plan/aggregates.rs b/datafusion/src/physical_plan/aggregates.rs index c297a959639a..5a1338f2e6f2 100644 --- a/datafusion/src/physical_plan/aggregates.rs +++ b/datafusion/src/physical_plan/aggregates.rs @@ -188,6 +188,11 @@ static TIMESTAMPS: &[DataType] = &[ DataType::Timestamp(TimeUnit::Nanosecond, None), ]; +static DATES: &[DataType] = &[ + DataType::Date32, + DataType::Date64, +]; + /// the signatures supported by the function `fun`. pub fn signature(fun: &AggregateFunction) -> Signature { // note: the physical expression must accept the type returned by this function or the execution panics. @@ -198,6 +203,7 @@ pub fn signature(fun: &AggregateFunction) -> Signature { .iter() .chain(NUMERICS.iter()) .chain(TIMESTAMPS.iter()) + .chain(DATES.iter()) .cloned() .collect::>(); Signature::Uniform(1, valid) diff --git a/datafusion/src/physical_plan/expressions/min_max.rs b/datafusion/src/physical_plan/expressions/min_max.rs index 46e41f46a0e5..891f2fdbcb62 100644 --- a/datafusion/src/physical_plan/expressions/min_max.rs +++ b/datafusion/src/physical_plan/expressions/min_max.rs @@ -28,7 +28,7 @@ use arrow::compute; use arrow::datatypes::{DataType, TimeUnit}; use arrow::{ array::{ - ArrayRef, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, + ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, @@ -158,6 +158,18 @@ macro_rules! min_max_batch { TimestampNanosecond, $OP ), + DataType::Date32 => typed_min_max_batch!( + $VALUES, + Date32Array, + Date32, + $OP + ), + DataType::Date64 => typed_min_max_batch!( + $VALUES, + Date64Array, + Date64, + $OP + ), other => { // This should have been handled before return Err(DataFusionError::Internal(format!( @@ -280,6 +292,18 @@ macro_rules! min_max { ) => { typed_min_max!(lhs, rhs, TimestampNanosecond, $OP) } + ( + ScalarValue::Date32(lhs), + ScalarValue::Date32(rhs), + ) => { + typed_min_max!(lhs, rhs, Date32, $OP) + } + ( + ScalarValue::Date64(lhs), + ScalarValue::Date64(rhs), + ) => { + typed_min_max!(lhs, rhs, Date64, $OP) + } e => { return Err(DataFusionError::Internal(format!( "MIN/MAX is not expected to receive scalars of incompatible types {:?}", @@ -668,4 +692,56 @@ mod tests { DataType::Float64 ) } + + #[test] + fn min_date32() -> Result<()> { + let a: ArrayRef = + Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Date32, + Min, + ScalarValue::Date32(Some(1)), + DataType::Date32 + ) + } + + #[test] + fn min_date64() -> Result<()> { + let a: ArrayRef = + Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Date64, + Min, + ScalarValue::Date64(Some(1)), + DataType::Date64 + ) + } + + #[test] + fn max_date32() -> Result<()> { + let a: ArrayRef = + Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Date32, + Max, + ScalarValue::Date32(Some(5)), + DataType::Date32 + ) + } + + #[test] + fn max_date64() -> Result<()> { + let a: ArrayRef = + Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); + generic_test_op!( + a, + DataType::Date64, + Max, + ScalarValue::Date64(Some(5)), + DataType::Date64 + ) + } } From 3f51ada6f8e07cb8d1ae07040cdff052c3d7df79 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 2 Aug 2021 23:02:19 -0700 Subject: [PATCH 2/2] fix format. --- datafusion/src/physical_plan/aggregates.rs | 5 +-- .../src/physical_plan/expressions/min_max.rs | 34 ++++++------------- 2 files changed, 11 insertions(+), 28 deletions(-) diff --git a/datafusion/src/physical_plan/aggregates.rs b/datafusion/src/physical_plan/aggregates.rs index 5a1338f2e6f2..57c9b61c91fd 100644 --- a/datafusion/src/physical_plan/aggregates.rs +++ b/datafusion/src/physical_plan/aggregates.rs @@ -188,10 +188,7 @@ static TIMESTAMPS: &[DataType] = &[ DataType::Timestamp(TimeUnit::Nanosecond, None), ]; -static DATES: &[DataType] = &[ - DataType::Date32, - DataType::Date64, -]; +static DATES: &[DataType] = &[DataType::Date32, DataType::Date64]; /// the signatures supported by the function `fun`. pub fn signature(fun: &AggregateFunction) -> Signature { diff --git a/datafusion/src/physical_plan/expressions/min_max.rs b/datafusion/src/physical_plan/expressions/min_max.rs index 891f2fdbcb62..6bb4c5b21b86 100644 --- a/datafusion/src/physical_plan/expressions/min_max.rs +++ b/datafusion/src/physical_plan/expressions/min_max.rs @@ -28,10 +28,10 @@ use arrow::compute; use arrow::datatypes::{DataType, TimeUnit}; use arrow::{ array::{ - ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, - Int8Array, LargeStringArray, StringArray, TimestampMicrosecondArray, - TimestampMillisecondArray, TimestampNanosecondArray, TimestampSecondArray, - UInt16Array, UInt32Array, UInt64Array, UInt8Array, + ArrayRef, Date32Array, Date64Array, Float32Array, Float64Array, Int16Array, + Int32Array, Int64Array, Int8Array, LargeStringArray, StringArray, + TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, + TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, }, datatypes::Field, }; @@ -158,18 +158,8 @@ macro_rules! min_max_batch { TimestampNanosecond, $OP ), - DataType::Date32 => typed_min_max_batch!( - $VALUES, - Date32Array, - Date32, - $OP - ), - DataType::Date64 => typed_min_max_batch!( - $VALUES, - Date64Array, - Date64, - $OP - ), + DataType::Date32 => typed_min_max_batch!($VALUES, Date32Array, Date32, $OP), + DataType::Date64 => typed_min_max_batch!($VALUES, Date64Array, Date64, $OP), other => { // This should have been handled before return Err(DataFusionError::Internal(format!( @@ -695,8 +685,7 @@ mod tests { #[test] fn min_date32() -> Result<()> { - let a: ArrayRef = - Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); + let a: ArrayRef = Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Date32, @@ -708,8 +697,7 @@ mod tests { #[test] fn min_date64() -> Result<()> { - let a: ArrayRef = - Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); + let a: ArrayRef = Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Date64, @@ -721,8 +709,7 @@ mod tests { #[test] fn max_date32() -> Result<()> { - let a: ArrayRef = - Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); + let a: ArrayRef = Arc::new(Date32Array::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Date32, @@ -734,8 +721,7 @@ mod tests { #[test] fn max_date64() -> Result<()> { - let a: ArrayRef = - Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); + let a: ArrayRef = Arc::new(Date64Array::from(vec![1, 2, 3, 4, 5])); generic_test_op!( a, DataType::Date64,