diff --git a/datafusion/functions/src/datetime/date_trunc.rs b/datafusion/functions/src/datetime/date_trunc.rs index 405aabfde991..f055177ca221 100644 --- a/datafusion/functions/src/datetime/date_trunc.rs +++ b/datafusion/functions/src/datetime/date_trunc.rs @@ -28,8 +28,9 @@ use arrow::array::types::{ ArrowTimestampType, TimestampMicrosecondType, TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType, }; -use arrow::array::{Array, ArrayRef, Int64Array, PrimitiveArray}; +use arrow::array::{Array, ArrayRef, AsArray, PrimitiveArray}; use arrow::datatypes::DataType::{self, Null, Timestamp, Utf8, Utf8View}; +use arrow::datatypes::Int64Type; use arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second}; use datafusion_common::cast::as_primitive_array; use datafusion_common::{ @@ -455,36 +456,57 @@ fn general_date_trunc_array_fine_granularity( array: &PrimitiveArray, granularity: &str, ) -> Result { - let unit = match (tu, granularity) { - (Second, "minute") => Some(Int64Array::new_scalar(60)), - (Second, "hour") => Some(Int64Array::new_scalar(3600)), - (Second, "day") => Some(Int64Array::new_scalar(86400)), - - (Millisecond, "second") => Some(Int64Array::new_scalar(1_000)), - (Millisecond, "minute") => Some(Int64Array::new_scalar(60_000)), - (Millisecond, "hour") => Some(Int64Array::new_scalar(3_600_000)), - (Millisecond, "day") => Some(Int64Array::new_scalar(86_400_000)), - - (Microsecond, "millisecond") => Some(Int64Array::new_scalar(1_000)), - (Microsecond, "second") => Some(Int64Array::new_scalar(1_000_000)), - (Microsecond, "minute") => Some(Int64Array::new_scalar(60_000_000)), - (Microsecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000)), - (Microsecond, "day") => Some(Int64Array::new_scalar(86_400_000_000)), - - (Nanosecond, "microsecond") => Some(Int64Array::new_scalar(1_000)), - (Nanosecond, "millisecond") => Some(Int64Array::new_scalar(1_000_000)), - (Nanosecond, "second") => Some(Int64Array::new_scalar(1_000_000_000)), - (Nanosecond, "minute") => Some(Int64Array::new_scalar(60_000_000_000)), - (Nanosecond, "hour") => Some(Int64Array::new_scalar(3_600_000_000_000)), - (Nanosecond, "day") => Some(Int64Array::new_scalar(86_400_000_000_000)), + let unit: Option = match (tu, granularity) { + (Second, "minute") => Some(60), + (Second, "hour") => Some(3600), + (Second, "day") => Some(86400), + + (Millisecond, "second") => Some(1_000), + (Millisecond, "minute") => Some(60_000), + (Millisecond, "hour") => Some(3_600_000), + (Millisecond, "day") => Some(86_400_000), + + (Microsecond, "millisecond") => Some(1_000), + (Microsecond, "second") => Some(1_000_000), + (Microsecond, "minute") => Some(60_000_000), + (Microsecond, "hour") => Some(3_600_000_000), + (Microsecond, "day") => Some(86_400_000_000), + + (Nanosecond, "microsecond") => Some(1_000), + (Nanosecond, "millisecond") => Some(1_000_000), + (Nanosecond, "second") => Some(1_000_000_000), + (Nanosecond, "minute") => Some(60_000_000_000), + (Nanosecond, "hour") => Some(3_600_000_000_000), + (Nanosecond, "day") => Some(86_400_000_000_000), _ => None, }; if let Some(unit) = unit { let original_type = array.data_type(); - let array = arrow::compute::cast(array, &DataType::Int64)?; - let array = arrow::compute::kernels::numeric::div(&array, &unit)?; - let array = arrow::compute::kernels::numeric::mul(&array, &unit)?; + let input = arrow::compute::cast(array, &DataType::Int64)?; + // Optimize performance by doing operations in place if possible + let array = input.as_primitive::().clone(); + drop(input); // ensure the input reference is dropped (so we can reuse the memory if possible) + let array = try_unary_mut_or_clone(array, |i| { + i.checked_div(unit) + .ok_or_else(|| exec_datafusion_err!("division overflow")) + })?; + let array = try_unary_mut_or_clone(array, |i| { + i.checked_mul(unit) + .ok_or_else(|| exec_datafusion_err!("multiplication overflow")) + })?; + let array = try_unary_mut_or_clone(array, |i| { + // For timestamps before 1970-01-01T00:00:00Z (negative values) + // it is possible that the truncated value is actually later + // than the original value. Correct any such cases by + // subtracting `unit`. + if i > 0 { + Ok(i) + } else { + i.checked_sub(unit) + .ok_or_else(|| exec_datafusion_err!("subtraction overflow")) + } + })?; let array = arrow::compute::cast(&array, original_type)?; Ok(array) } else { @@ -493,6 +515,21 @@ fn general_date_trunc_array_fine_granularity( } } +/// Applies the unary operation in place if possible, or cloning the array if not +fn try_unary_mut_or_clone( + array: PrimitiveArray, + op: F, +) -> Result> +where + F: Fn(i64) -> Result, +{ + match array.try_unary_mut(&op) { + Ok(result) => result, + // on error, make a new array + Err(array) => array.try_unary(op), + } +} + // truncates a single value with the given timeunit to the specified granularity fn general_date_trunc( tu: TimeUnit, diff --git a/datafusion/sqllogictest/test_files/timestamps.slt b/datafusion/sqllogictest/test_files/timestamps.slt index 84dd7098a2ee..3d03bc326342 100644 --- a/datafusion/sqllogictest/test_files/timestamps.slt +++ b/datafusion/sqllogictest/test_files/timestamps.slt @@ -1687,6 +1687,13 @@ SELECT DATE_TRUNC('second', '2022-08-03 14:38:50Z'); ---- 2022-08-03T14:38:50 +# DATE_TRUNC handling of times before the unix epoch (issue 18334) +query PPP +SELECT d, DATE_TRUNC('hour', d), DATE_TRUNC('hour', TIMESTAMP '1900-06-15 07:09:00') +FROM (VALUES (TIMESTAMP '1900-06-15 07:09:00')) AS t(d); +---- +1900-06-15T07:09:00 1900-06-15T07:00:00 1900-06-15T07:00:00 + # Test that interval can add a timestamp query P SELECT timestamp '2013-07-01 12:00:00' + INTERVAL '8' DAY;