Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
177 changes: 176 additions & 1 deletion datafusion/functions/src/datetime/date_trunc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,7 @@ impl ScalarUDFImpl for DateTruncFunc {
T::UNIT,
array,
granularity,
tz_opt.clone(),
)?;
return Ok(ColumnarValue::Array(result));
}
Expand Down Expand Up @@ -522,6 +523,7 @@ fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
tu: TimeUnit,
array: &PrimitiveArray<T>,
granularity: DateTruncGranularity,
tz_opt: Option<Arc<str>>,
) -> Result<ArrayRef> {
let unit = match (tu, granularity) {
(Second, DateTruncGranularity::Minute) => NonZeroI64::new(60),
Expand Down Expand Up @@ -556,7 +558,8 @@ fn general_date_trunc_array_fine_granularity<T: ArrowTimestampType>(
.iter()
.map(|v| *v - i64::rem_euclid(*v, unit)),
array.nulls().cloned(),
);
)
.with_timezone_opt(tz_opt);
Ok(Arc::new(array))
} else {
// truncate to the same or smaller unit
Expand Down Expand Up @@ -1094,4 +1097,176 @@ mod tests {
}
});
}

#[test]
fn test_date_trunc_fine_granularity_timezones() {
let cases = [
// Test "second" granularity
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:42:30.500000Z",
"2020-09-08T13:42:31.999999Z",
],
Some("+00".into()),
"second",
vec![
"2020-09-08T13:42:29.000000Z",
"2020-09-08T13:42:30.000000Z",
"2020-09-08T13:42:31.000000Z",
],
),
(
vec![
"2020-09-08T13:42:29.190855+05",
"2020-09-08T13:42:30.500000+05",
"2020-09-08T13:42:31.999999+05",
],
Some("+05".into()),
"second",
vec![
"2020-09-08T13:42:29.000000+05",
"2020-09-08T13:42:30.000000+05",
"2020-09-08T13:42:31.000000+05",
],
),
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:42:30.500000Z",
"2020-09-08T13:42:31.999999Z",
],
Some("Europe/Berlin".into()),
"second",
vec![
"2020-09-08T13:42:29.000000Z",
"2020-09-08T13:42:30.000000Z",
"2020-09-08T13:42:31.000000Z",
],
),
// Test "minute" granularity
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:43:30.500000Z",
"2020-09-08T13:44:31.999999Z",
],
Some("+00".into()),
"minute",
vec![
"2020-09-08T13:42:00.000000Z",
"2020-09-08T13:43:00.000000Z",
"2020-09-08T13:44:00.000000Z",
],
),
(
vec![
"2020-09-08T13:42:29.190855+08",
"2020-09-08T13:43:30.500000+08",
"2020-09-08T13:44:31.999999+08",
],
Some("+08".into()),
"minute",
vec![
"2020-09-08T13:42:00.000000+08",
"2020-09-08T13:43:00.000000+08",
"2020-09-08T13:44:00.000000+08",
],
),
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:43:30.500000Z",
"2020-09-08T13:44:31.999999Z",
],
Some("America/Sao_Paulo".into()),
"minute",
vec![
"2020-09-08T13:42:00.000000Z",
"2020-09-08T13:43:00.000000Z",
"2020-09-08T13:44:00.000000Z",
],
),
// Test with None (no timezone)
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:43:30.500000Z",
"2020-09-08T13:44:31.999999Z",
],
None,
"minute",
vec![
"2020-09-08T13:42:00.000000Z",
"2020-09-08T13:43:00.000000Z",
"2020-09-08T13:44:00.000000Z",
],
),
// Test millisecond granularity
(
vec![
"2020-09-08T13:42:29.190855Z",
"2020-09-08T13:42:29.191999Z",
"2020-09-08T13:42:29.192500Z",
],
Some("Asia/Kolkata".into()),
"millisecond",
vec![
"2020-09-08T19:12:29.190000+05:30",
"2020-09-08T19:12:29.191000+05:30",
"2020-09-08T19:12:29.192000+05:30",
],
),
];

cases
.iter()
.for_each(|(original, tz_opt, granularity, expected)| {
let input = original
.iter()
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
.collect::<TimestampNanosecondArray>()
.with_timezone_opt(tz_opt.clone());
let right = expected
.iter()
.map(|s| Some(string_to_timestamp_nanos(s).unwrap()))
.collect::<TimestampNanosecondArray>()
.with_timezone_opt(tz_opt.clone());
let batch_len = input.len();
let arg_fields = vec![
Field::new("a", DataType::Utf8, false).into(),
Field::new("b", input.data_type().clone(), false).into(),
];
let args = datafusion_expr::ScalarFunctionArgs {
args: vec![
ColumnarValue::Scalar(ScalarValue::from(*granularity)),
ColumnarValue::Array(Arc::new(input)),
],
arg_fields,
number_rows: batch_len,
return_field: Field::new(
"f",
DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
true,
)
.into(),
config_options: Arc::new(ConfigOptions::default()),
};
let result = DateTruncFunc::new().invoke_with_args(args).unwrap();
if let ColumnarValue::Array(result) = result {
assert_eq!(
result.data_type(),
&DataType::Timestamp(TimeUnit::Nanosecond, tz_opt.clone()),
"Failed for granularity: {granularity}, timezone: {tz_opt:?}"
);
let left = as_primitive_array::<TimestampNanosecondType>(&result);
assert_eq!(
left, &right,
"Failed for granularity: {granularity}, timezone: {tz_opt:?}"
);
} else {
panic!("unexpected column type");
}
});
}
}
55 changes: 34 additions & 21 deletions datafusion/sqllogictest/test_files/timestamps.slt
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ create table ts_data_millis as select arrow_cast(ts / 1000000, 'Timestamp(Millis
statement ok
create table ts_data_secs as select arrow_cast(ts / 1000000000, 'Timestamp(Second, None)') as ts, value from ts_data;

statement ok
create table ts_data_micros_kolkata as select arrow_cast(ts / 1000, 'Timestamp(Microsecond, Some("Asia/Kolkata"))') as ts, value from ts_data;


##########
## Current date Tests
Expand Down Expand Up @@ -1873,27 +1876,6 @@ true false true true



##########
## Common timestamp data
##########

statement ok
drop table ts_data

statement ok
drop table ts_data_nanos

statement ok
drop table ts_data_micros

statement ok
drop table ts_data_millis

statement ok
drop table ts_data_secs



##########
## Timezone impact on scalar functions
#
Expand Down Expand Up @@ -3703,3 +3685,34 @@ SELECT
FROM (SELECT CAST('2005-09-10 13:31:00 +02:00' AS timestamp with time zone) AS a)
----
Timestamp(ns, "+00") 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z 2005-09-10T11:31:00Z

query P
SELECT
date_trunc('millisecond', ts)
FROM ts_data_micros_kolkata
----
2020-09-08T19:12:29.190+05:30
2020-09-08T18:12:29.190+05:30
2020-09-08T17:12:29.190+05:30

##########
## Common timestamp data
##########

statement ok
drop table ts_data

statement ok
drop table ts_data_nanos

statement ok
drop table ts_data_micros

statement ok
drop table ts_data_millis

statement ok
drop table ts_data_secs

statement ok
drop table ts_data_micros_kolkata
Loading