From d9f3fcf79209599f90de7cd6084e91da78e3cdf5 Mon Sep 17 00:00:00 2001 From: TCeason Date: Fri, 21 Nov 2025 13:06:14 +0800 Subject: [PATCH 1/2] optimize: decrease call tzdb parse --- .../src/scalars/timestamp/src/datetime.rs | 139 ++++++++++++++++-- .../02_0012_function_datetimes_tz.test | 35 +++++ 2 files changed, 165 insertions(+), 9 deletions(-) diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs index b5be6ebdbe445..a08d3d59b5977 100644 --- a/src/query/functions/src/scalars/timestamp/src/datetime.rs +++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs @@ -49,6 +49,7 @@ use databend_common_expression::types::timestamp::TIMESTAMP_MAX; use databend_common_expression::types::timestamp::TIMESTAMP_MIN; use databend_common_expression::types::timestamp_tz::string_to_timestamp_tz; use databend_common_expression::types::timestamp_tz::TimestampTzType; +use databend_common_expression::types::AccessType; use databend_common_expression::types::Bitmap; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; @@ -57,8 +58,10 @@ use databend_common_expression::types::Int32Type; use databend_common_expression::types::IntervalType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberType; +use databend_common_expression::types::ReturnType; use databend_common_expression::types::StringType; use databend_common_expression::types::TimestampType; +use databend_common_expression::types::ValueType; use databend_common_expression::types::F64; use databend_common_expression::utils::date_helper::*; use databend_common_expression::vectorize_1_arg; @@ -892,20 +895,138 @@ fn register_timestamp_to_date(registry: &mut FunctionRegistry) { ); fn eval_timestamp_to_date(val: Value, ctx: &mut EvalContext) -> Value { - vectorize_with_builder_1_arg::(|val, output, ctx| { - let tz = &ctx.func_ctx.tz; - output.push(calc_timestamp_to_date(val, tz)); - })(val, ctx) + match val { + Value::Scalar(v) => Value::Scalar(calc_timestamp_to_date(v, &ctx.func_ctx.tz)), + Value::Column(column) => { + let generics = ctx.generics.to_vec(); + let len = TimestampType::column_len(&column); + let mut builder = DateType::create_builder(len, &generics); + let mut cache = TimeZoneOffsetCache::new(&ctx.func_ctx.tz); + for v in TimestampType::iter_column(&column) { + let days = cache.days_for(v); + DateType::push_item(&mut builder, days); + } + Value::Column(DateType::build_column(builder)) + } + } } fn calc_timestamp_to_date(val: i64, tz: &TimeZone) -> i32 { - val.to_timestamp(tz) - .date() - .since((Unit::Day, Date::new(1970, 1, 1).unwrap())) - .unwrap() - .get_days() + let mut cache = TimeZoneOffsetCache::new(tz); + cache.days_for(val) + } +} + +struct OffsetInterval { + start: i64, + end: i64, + offset_micros: i64, +} + +struct TimeZoneOffsetCache<'a> { + tz: &'a TimeZone, + interval: Option, + fixed_offset_micros: Option, +} + +impl<'a> TimeZoneOffsetCache<'a> { + fn new(tz: &'a TimeZone) -> Self { + let fixed_offset_micros = determine_fixed_offset(tz); + Self { + tz, + interval: None, + fixed_offset_micros, + } + } + + fn days_for(&mut self, value: i64) -> i32 { + if self.tz == &TimeZone::UTC { + return utc_days_from_micros(value); + } + if let Some(offset) = self.fixed_offset_micros { + return apply_offset_micros(value, offset); + } + + let needs_refresh = match &self.interval { + Some(interval) => value < interval.start || value >= interval.end, + None => true, + }; + if needs_refresh { + self.interval = Some(self.build_interval(value)); + } + + let interval = self.interval.as_ref().unwrap(); + apply_offset_micros(value, interval.offset_micros) + } + + fn build_interval(&self, value: i64) -> OffsetInterval { + let ts = timestamp_from_microseconds_saturating(value); + let offset_micros = tz_offset_micros(self.tz, ts); + + let start = self + .tz + .preceding(ts) + .next() + .map(|t| t.timestamp().as_microsecond()) + .unwrap_or(i64::MIN); + let end = self + .tz + .following(ts) + .next() + .map(|t| t.timestamp().as_microsecond()) + .unwrap_or(i64::MAX); + + OffsetInterval { + start, + end, + offset_micros, + } } } +fn tz_offset_micros(tz: &TimeZone, ts: Timestamp) -> i64 { + let offset = tz.to_offset(ts); + (offset.seconds() as i64).saturating_mul(MICROS_PER_SEC) +} + +fn determine_fixed_offset(tz: &TimeZone) -> Option { + if tz == &TimeZone::UTC { + return Some(0); + } + + let epoch = Timestamp::UNIX_EPOCH; + let has_transitions = + tz.preceding(epoch).next().is_some() || tz.following(epoch).next().is_some(); + if has_transitions { + None + } else { + Some(tz_offset_micros(tz, epoch)) + } +} + +fn timestamp_from_microseconds_saturating(value: i64) -> Timestamp { + Timestamp::from_microsecond(value).unwrap_or_else(|_| { + if value < 0 { + Timestamp::MIN + } else { + Timestamp::MAX + } + }) +} + +fn utc_days_from_micros(value: i64) -> i32 { + timestamp_from_microseconds_saturating(value) + .to_zoned(TimeZone::UTC) + .date() + .since((Unit::Day, Date::new(1970, 1, 1).unwrap())) + .unwrap() + .get_days() +} + +fn apply_offset_micros(value: i64, offset_micros: i64) -> i32 { + let adjusted = value.saturating_add(offset_micros); + utc_days_from_micros(adjusted) +} + fn register_timestamp_tz_to_date(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "to_date", diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test index 11977c90d6272..71f51902133e5 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test @@ -161,6 +161,41 @@ select next_day(to_date('2024-10-25'), saturday); ---- 2024-10-26 +statement ok +set timezone='America/Toronto' + +statement ok +drop table if exists tz_dst + +statement ok +create table tz_dst(ts timestamp) + +statement ok +insert into tz_dst values + (to_timestamp('2024-03-10 01:30:00')), + (to_timestamp('2024-06-10 12:00:00')), + (to_timestamp('2024-11-03 01:30:00')) + +query T +select to_date(to_timestamp('2024-03-10 01:30:00')); +---- +2024-03-10 + +query T +select to_date(to_timestamp('2024-11-03 01:30:00')); +---- +2024-11-03 + +query RT +select to_date(ts) from tz_dst order by ts; +---- +2024-03-10 +2024-06-10 +2024-11-03 + +statement ok +drop table if exists tz_dst + query T select next_day(to_timestamp('2024-10-25 01:00:00'), monday); ---- From 302d5adf8e280cbd6f0a4c07858a6c7779a74c7f Mon Sep 17 00:00:00 2001 From: TCeason Date: Tue, 25 Nov 2025 12:41:06 +0800 Subject: [PATCH 2/2] feat(query): cache LUTs for fast datetime math - add the databend-common-timezone crate and bump jiff to 0.2.16 so the workspace shares a single LUT-backed converter - use the new helpers inside IO parsing, expression utilities, timestamp scalars, and age() to preserve offsets/DST behaviour even for pre-1970 dates - expand SQLLogic/unit coverage plus add a datetime_fast_path bench --- Cargo.lock | 23 +- Cargo.toml | 4 +- src/common/io/Cargo.toml | 1 + .../cursor_ext/cursor_read_datetime_ext.rs | 105 ++--- .../tests/it/cursor_ext/read_datetime_ext.rs | 4 +- src/common/timezone/Cargo.toml | 16 + src/common/timezone/src/lib.rs | 428 ++++++++++++++++++ src/query/expression/Cargo.toml | 1 + src/query/expression/src/utils/date_helper.rs | 291 +++++++++++- src/query/expression/tests/it/types.rs | 9 +- src/query/functions/benches/bench.rs | 194 ++++++++ .../src/scalars/timestamp/Cargo.toml | 1 + .../src/scalars/timestamp/src/datetime.rs | 222 ++++----- .../src/scalars/timestamp/src/interval.rs | 141 +++++- .../11_0001_data_type_date_time.test | 8 +- .../02_0012_function_datetimes_tz.test | 73 ++- 16 files changed, 1265 insertions(+), 256 deletions(-) create mode 100644 src/common/timezone/Cargo.toml create mode 100644 src/common/timezone/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index 3578dc0c12f76..aacd8b3076d18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3634,6 +3634,7 @@ dependencies = [ "databend-common-hashtable", "databend-common-io", "databend-common-metrics", + "databend-common-timezone", "databend_educe", "divan", "either", @@ -3848,6 +3849,7 @@ dependencies = [ "chrono-tz 0.8.6", "databend-common-base", "databend-common-exception", + "databend-common-timezone", "enquote", "enumflags2", "ethnum", @@ -4967,6 +4969,14 @@ dependencies = [ "tokio", ] +[[package]] +name = "databend-common-timezone" +version = "0.1.0" +dependencies = [ + "jiff", + "rand 0.8.5", +] + [[package]] name = "databend-common-tracing" version = "0.1.0" @@ -5262,6 +5272,7 @@ dependencies = [ "databend-common-column", "databend-common-exception", "databend-common-expression", + "databend-common-timezone", "dtparse", "jiff", "num-traits", @@ -9720,9 +9731,9 @@ dependencies = [ [[package]] name = "jiff" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f02000660d30638906021176af16b17498bd0d12813dbfe7b276d8bc7f3c0806" +checksum = "49cce2b81f2098e7e3efc35bc2e0a6b7abec9d34128283d7a26fa8f32a6dbb35" dependencies = [ "jiff-static", "jiff-tzdb", @@ -9730,15 +9741,15 @@ dependencies = [ "log", "portable-atomic", "portable-atomic-util", - "serde", - "windows-sys 0.59.0", + "serde_core", + "windows-sys 0.61.2", ] [[package]] name = "jiff-static" -version = "0.2.13" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3c30758ddd7188629c6713fc45d1188af4f44c90582311d0c8d8c9907f60c48" +checksum = "980af8b43c3ad5d8d349ace167ec8170839f753a42d233ba19e08afe1850fa69" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 8dc5a50a915e0..f9030d33c3a56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,7 @@ members = [ "src/common/tracing", "src/common/storage", "src/common/vector", + "src/common/timezone", "src/common/license", "src/common/version", "src/query/ast", @@ -175,6 +176,7 @@ databend-common-storages-stage = { path = "src/query/storages/stage" } databend-common-storages-stream = { path = "src/query/storages/stream" } databend-common-storages-system = { path = "src/query/storages/system" } databend-common-telemetry = { path = "src/common/telemetry" } +databend-common-timezone = { path = "src/common/timezone" } databend-common-tracing = { path = "src/common/tracing" } databend-common-users = { path = "src/query/users" } databend-common-vector = { path = "src/common/vector" } @@ -352,7 +354,7 @@ jaq-core = "1.5.1" jaq-interpret = "1.5.0" jaq-parse = "1.0.3" jaq-std = "1.6.0" -jiff = { version = "0.2.10", features = ["serde", "tzdb-bundle-always"] } +jiff = { version = "0.2.16", features = ["serde", "tzdb-bundle-always"] } jsonb = "0.5.5" jwt-simple = { version = "0.12.10", default-features = false, features = ["pure-rust"] } lenient_semver = "0.4.2" diff --git a/src/common/io/Cargo.toml b/src/common/io/Cargo.toml index fd950e58d3308..5a50960baa6c5 100644 --- a/src/common/io/Cargo.toml +++ b/src/common/io/Cargo.toml @@ -14,6 +14,7 @@ chrono = { workspace = true } chrono-tz = { workspace = true } databend-common-base = { workspace = true } databend-common-exception = { workspace = true } +databend-common-timezone = { workspace = true } enquote = { workspace = true } enumflags2 = { workspace = true } ethnum = { workspace = true } diff --git a/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs b/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs index 140de3782f73f..05057a9448a5a 100644 --- a/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs +++ b/src/common/io/src/cursor_ext/cursor_read_datetime_ext.rs @@ -26,7 +26,7 @@ use chrono_tz::Tz; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_exception::ToErrorCode; -use jiff::civil::date; +use databend_common_timezone::fast_utc_from_local; use jiff::civil::Date; use jiff::tz::Offset; use jiff::tz::TimeZone; @@ -45,7 +45,6 @@ pub trait BufferReadDateTimeExt { fn read_timestamp_text(&mut self, tz: &TimeZone) -> Result; fn parse_time_offset( &mut self, - tz: &TimeZone, buf: &mut Vec, dt: &Zoned, west_tz: bool, @@ -87,14 +86,12 @@ where T: AsRef<[u8]> // Only support HH:mm format fn parse_time_offset( &mut self, - tz: &TimeZone, buf: &mut Vec, dt: &Zoned, west_tz: bool, calc_offset: impl Fn(i64, i64, &Zoned) -> Result, ) -> Result { fn get_hour_minute_offset( - tz: &TimeZone, dt: &Zoned, west_tz: bool, calc_offset: &impl Fn(i64, i64, &Zoned) -> Result, @@ -104,24 +101,14 @@ where T: AsRef<[u8]> if (hour_offset == 14 && minute_offset == 0) || ((0..60).contains(&minute_offset) && hour_offset < 14) { - if dt.year() < 1970 { - Ok(date(1970, 1, 1) - .at(0, 0, 0, 0) - .to_zoned(tz.clone()) - .map_err_to_code(ErrorCode::BadBytes, || format!("dt parse error"))?) - } else { - let current_tz_sec = dt.offset().seconds(); - let mut val_tz_sec = - Offset::from_seconds(hour_offset * 3600 + minute_offset * 60) - .map_err_to_code(ErrorCode::BadBytes, || { - "calc offset failed.".to_string() - })? - .seconds(); - if west_tz { - val_tz_sec = -val_tz_sec; - } - calc_offset(current_tz_sec.into(), val_tz_sec.into(), dt) + let current_tz_sec = dt.offset().seconds(); + let mut val_tz_sec = Offset::from_seconds(hour_offset * 3600 + minute_offset * 60) + .map_err_to_code(ErrorCode::BadBytes, || "calc offset failed.".to_string())? + .seconds(); + if west_tz { + val_tz_sec = -val_tz_sec; } + calc_offset(current_tz_sec.into(), val_tz_sec.into(), dt) } else { Err(ErrorCode::BadBytes(format!( "Invalid Timezone Offset: The minute offset '{}' is outside the valid range. Expected range is [00-59] within a timezone gap of [-14:00, +14:00]", @@ -146,16 +133,9 @@ where T: AsRef<[u8]> let minute_offset: i32 = lexical_core::FromLexical::from_lexical(buf.as_slice()).map_err_to_code(ErrorCode::BadBytes, || "minute offset parse error".to_string())?; // max utc: 14:00, min utc: 00:00 - get_hour_minute_offset( - tz, - dt, - west_tz, - &calc_offset, - hour_offset, - minute_offset, - ) + get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, minute_offset) } else { - get_hour_minute_offset(tz, dt, west_tz, &calc_offset, hour_offset, 0) + get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, 0) } } else { Err(ErrorCode::BadBytes(format!( @@ -174,14 +154,7 @@ where T: AsRef<[u8]> buf.clear(); // max utc: 14:00, min utc: 00:00 if (0..15).contains(&hour_offset) { - get_hour_minute_offset( - tz, - dt, - west_tz, - &calc_offset, - hour_offset, - minute_offset, - ) + get_hour_minute_offset(dt, west_tz, &calc_offset, hour_offset, minute_offset) } else { Err(ErrorCode::BadBytes(format!( "Invalid Timezone Offset: The hour offset '{}' is outside the valid range. Expected range is [00-14] within a timezone gap of [-14:00, +14:00]", @@ -279,13 +252,9 @@ where T: AsRef<[u8]> buf.clear(); let calc_offset = |current_tz_sec: i64, val_tz_sec: i64, dt: &Zoned| { let offset = (current_tz_sec - val_tz_sec) * 1000 * 1000; - let mut ts = dt.timestamp().as_microsecond(); - ts += offset; - let (mut secs, mut micros) = (ts / 1_000_000, ts % 1_000_000); - if ts < 0 { - secs -= 1; - micros += 1_000_000; - } + let ts = dt.timestamp().as_microsecond() + offset; + let secs = ts.div_euclid(1_000_000); + let micros = ts.rem_euclid(1_000_000); Ok(Timestamp::new(secs, (micros as i32) * 1000) .map_err_to_code(ErrorCode::BadBytes, || { format!("Datetime {} add offset {} with error", dt, offset) @@ -302,7 +271,6 @@ where T: AsRef<[u8]> )?)) } else if self.ignore_byte(b'+') { Ok(DateTimeResType::Datetime(self.parse_time_offset( - tz, &mut buf, &dt, false, @@ -310,7 +278,6 @@ where T: AsRef<[u8]> )?)) } else if self.ignore_byte(b'-') { Ok(DateTimeResType::Datetime(self.parse_time_offset( - tz, &mut buf, &dt, true, @@ -324,6 +291,8 @@ where T: AsRef<[u8]> // only date part if need_date { Ok(DateTimeResType::Date(d)) + } else if let Some(zoned) = fast_local_to_zoned(tz, &d, 0, 0, 0, 0) { + Ok(DateTimeResType::Datetime(zoned)) } else { Ok(DateTimeResType::Datetime( d.to_zoned(tz.clone()) @@ -336,15 +305,41 @@ where T: AsRef<[u8]> } } -// Can not directly unwrap, because of DST. -// e.g. -// set timezone='Europe/London'; -// -- if unwrap() will cause session panic. -// -- https://github.com/chronotope/chrono/blob/v0.4.24/src/offset/mod.rs#L186 -// select to_date(to_timestamp('2021-03-28 01:00:00')); -// Now add a setting enable_dst_hour_fix to control this behavior. If true, try to add a hour. +/// Convert a local civil time into a `Zoned` instant by first attempting the +/// LUT-based `fast_utc_from_local`. When the LUT cannot represent the request +/// (e.g. outside 1900–2299 or in a DST gap), fall back to Jiff's slower but +/// fully general conversion. The behavior mirrors ClickHouse/Jiff: gaps return +/// `None`, folds prefer the later instant. +fn fast_local_to_zoned( + tz: &TimeZone, + date: &Date, + hour: u8, + minute: u8, + second: u8, + micro: u32, +) -> Option { + let micros = fast_utc_from_local( + tz, + i32::from(date.year()), + date.month() as u8, + date.day() as u8, + hour, + minute, + second, + micro, + )?; + let ts = Timestamp::from_microsecond(micros).ok()?; + Some(ts.to_zoned(tz.clone())) +} + fn get_local_time(tz: &TimeZone, d: &Date, times: &mut Vec) -> Result { - d.at(times[0] as i8, times[1] as i8, times[2] as i8, 0) + let hour = times[0] as u8; + let minute = times[1] as u8; + let second = times[2] as u8; + if let Some(zoned) = fast_local_to_zoned(tz, d, hour, minute, second, 0) { + return Ok(zoned); + } + d.at(hour as i8, minute as i8, second as i8, 0) .to_zoned(tz.clone()) .map_err_to_code(ErrorCode::BadBytes, || { format!("Invalid time provided in times: {:?}", times) diff --git a/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs b/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs index f7f84d68e4b11..8ffe59748496c 100644 --- a/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs +++ b/src/common/io/tests/it/cursor_ext/read_datetime_ext.rs @@ -53,7 +53,7 @@ fn test_read_timestamp_text() -> Result<()> { "2020-01-01T11:11:11.123+00:00[UTC]", "2055-02-03T02:00:20.234+00:00[UTC]", "2055-02-03T18:00:20.234+00:00[UTC]", - "1970-01-01T00:00:00+00:00[UTC]", + "1022-05-15T19:25:02+00:00[UTC]", ]; let mut res = vec![]; for _ in 0..expected.len() { @@ -123,7 +123,7 @@ fn test_read_date_text() -> Result<()> { "2020-01-01", "2055-02-03", "2055-02-03", - "1970-01-01", + "1022-05-15", "2055-01-01", ]; diff --git a/src/common/timezone/Cargo.toml b/src/common/timezone/Cargo.toml new file mode 100644 index 0000000000000..eb71aec84d983 --- /dev/null +++ b/src/common/timezone/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "databend-common-timezone" +version = { workspace = true } +authors = { workspace = true } +license = { workspace = true } +publish = { workspace = true } +edition = { workspace = true } + +[dependencies] +jiff = { workspace = true } + +[dev-dependencies] +rand = { workspace = true } + +[lints] +workspace = true diff --git a/src/common/timezone/src/lib.rs b/src/common/timezone/src/lib.rs new file mode 100644 index 0000000000000..ea59e0f36ad4b --- /dev/null +++ b/src/common/timezone/src/lib.rs @@ -0,0 +1,428 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; +use std::sync::LazyLock; +use std::sync::RwLock; + +use jiff::civil::date; +use jiff::civil::Date; +use jiff::civil::Time; +use jiff::civil::Weekday; +use jiff::tz::TimeZone; +use jiff::SignedDuration; + +const LUT_MIN_YEAR: i32 = 1900; +const LUT_MAX_YEAR: i32 = 2299; +const SECONDS_PER_DAY: i64 = 86_400; +const MICROS_PER_SEC: i64 = 1_000_000; + +#[derive(Clone)] +struct DayEntry { + /// UTC seconds for the local midnight that starts this day. + /// When a fold occurs (e.g. `America/New_York` 2024-11-03), + /// the “later” midnight is chosen so that every instant maps + /// to exactly one entry. + start_utc: i64, + /// Calendar components for the local day. + year: i32, + month: u8, + day: u8, + weekday: Weekday, + days_in_month: u8, + day_of_year: u16, + /// UTC offset (seconds) that is active at `start_utc`. + offset_at_start: i32, + /// When a DST transition occurs inside this day, this stores the UTC + /// second at which the new offset starts (e.g. `America/Los_Angeles` + /// jumps ahead at `2024-03-10T10:00:00Z`). + transition_utc: Option, + /// Local seconds since midnight when the transition takes place. + /// For the 2024-03-10 DST gap in Los Angeles, this is `2*3600`. + transition_elapsed: Option, + /// Offset delta (seconds) introduced by the transition: +3600 for a DST + /// gap, -3600 for a fold, 0 when no change occurs that day. + offset_change: i32, +} + +impl DayEntry { + fn new(date: Date, tz: &TimeZone) -> Self { + let midnight = date.to_datetime(Time::midnight()); + let ambiguous = tz.to_ambiguous_zoned(midnight); + let needs_later = ambiguous.is_ambiguous(); + let zoned = match (needs_later, ambiguous) { + (true, ambiguous) => ambiguous + .later() + .expect("construct timezone lut: disambiguate midnight via later transition"), + (false, ambiguous) => ambiguous + .compatible() + .expect("construct timezone lut: convert midnight to zoned"), + }; + + let start_ts = zoned.timestamp(); + let start_utc = start_ts.as_second(); + let offset = zoned.offset().seconds(); + + let mut transition_utc = None; + let mut offset_change = 0; + let mut transition_elapsed = None; + + let follow_start = start_ts + .saturating_sub(SignedDuration::from_secs(1)) + .unwrap_or(start_ts); + + let mut transitions = tz.following(follow_start); + for trans in &mut transitions { + let trans_sec = trans.timestamp().as_second(); + if trans_sec < start_utc { + continue; + } + if trans_sec >= start_utc + SECONDS_PER_DAY { + break; + } + + transition_utc = Some(trans_sec); + transition_elapsed = Some((trans_sec - start_utc) as i32); + let before_ts = trans + .timestamp() + .saturating_sub(SignedDuration::from_secs(1)) + .unwrap_or(trans.timestamp()); + let before_offset = tz.to_offset(before_ts).seconds(); + offset_change = trans.offset().seconds() - before_offset; + break; + } + + let year = date.year() as i32; + let month = date.month() as u8; + let day = date.day() as u8; + let day_of_year = day_of_year(year, month, day); + + Self { + start_utc, + year, + month, + day, + weekday: date.weekday(), + days_in_month: last_day_of_month(year, month), + day_of_year, + offset_at_start: offset, + transition_utc, + transition_elapsed, + offset_change, + } + } + + fn applies_to(&self, seconds: i64, next: &DayEntry) -> bool { + seconds >= self.start_utc && seconds < next.start_utc + } + + fn build_components(&self, seconds: i64, micros: u32) -> DateTimeComponents { + let mut local_seconds = seconds - self.start_utc; + let mut offset = self.offset_at_start; + if let (Some(trans), Some(transition_elapsed)) = + (self.transition_utc, self.transition_elapsed) + { + if seconds >= trans { + if transition_elapsed == 0 { + if self.offset_change > 0 { + // Gap at the top of the day: skip the missing span but retain + // the post-transition offset that `offset_at_start` already has. + local_seconds += self.offset_change as i64; + } + } else { + local_seconds += self.offset_change as i64; + offset += self.offset_change; + } + } + } + debug_assert!(local_seconds >= 0); + + let hour = (local_seconds / 3600) as u8; + let minute = ((local_seconds % 3600) / 60) as u8; + let second = (local_seconds % 60) as u8; + + DateTimeComponents { + year: self.year, + month: self.month, + day: self.day, + hour, + minute, + second, + micro: micros, + weekday: self.weekday, + days_in_month: self.days_in_month, + day_of_year: self.day_of_year, + offset_seconds: offset, + unix_seconds: seconds, + } + } + + fn local_elapsed_seconds(&self, local_seconds: i64) -> Option { + let mut elapsed = local_seconds; + if let (Some(_), Some(transition_elapsed)) = (self.transition_utc, self.transition_elapsed) + { + let offset_change = self.offset_change as i64; + if offset_change == 0 { + return Some(self.start_utc + elapsed); + } + + if transition_elapsed == 0 && offset_change < 0 { + // This is a fold that happens exactly at the day boundary. Since the + // day itself is anchored to the later midnight, treat it as if no + // transition occurs within the day. + } else if offset_change > 0 { + let gap_start = transition_elapsed as i64; + let gap_end = gap_start + offset_change; + if local_seconds < gap_start { + elapsed = local_seconds; + } else if local_seconds >= gap_end { + elapsed = local_seconds - offset_change; + } else { + return None; + } + } else if local_seconds >= transition_elapsed as i64 { + elapsed = local_seconds - offset_change; + } else { + elapsed = local_seconds; + } + } + Some(self.start_utc + elapsed) + } +} + +struct TimeZoneLut { + daynum_offset: i64, + entries: Vec, +} + +impl TimeZoneLut { + fn new(tz: &TimeZone) -> Self { + let mut entries = Vec::with_capacity(days_between(LUT_MIN_YEAR, LUT_MAX_YEAR + 2)); + let mut date = date(LUT_MIN_YEAR as i16, 1, 1); + + loop { + entries.push(DayEntry::new(date, tz)); + if date.year() as i32 == LUT_MAX_YEAR + 1 && date.month() == 1 && date.day() == 1 { + break; + } + date = date + .checked_add(SignedDuration::from_hours(24)) + .expect("construct timezone lut: increment date"); + } + + let daynum_offset = days_before_year(1970) - days_before_year(LUT_MIN_YEAR); + + Self { + daynum_offset, + entries, + } + } + + fn entry_for_local_date(&self, year: i32, month: u8, day: u8) -> Option<&DayEntry> { + let index = day_index_for_date(year, month, day)?; + if index >= self.entries.len() - 1 { + return None; + } + Some(&self.entries[index]) + } + + fn lookup(&self, seconds: i64) -> Option<&DayEntry> { + let guess = seconds.div_euclid(SECONDS_PER_DAY) + self.daynum_offset; + if guess < 0 { + return None; + } + + let last_index = self.entries.len() - 1; + if guess as usize >= last_index { + return None; + } + + let mut index = guess as usize; + + if seconds < self.entries[index].start_utc { + if index == 0 { + return None; + } + index -= 1; + } else if !self.entries[index].applies_to(seconds, &self.entries[index + 1]) { + index += 1; + if index >= last_index { + return None; + } + } + + Some(&self.entries[index]) + } +} + +type LutCache = RwLock)>>; + +static TZ_LUTS: LazyLock = LazyLock::new(|| RwLock::new(Vec::new())); + +fn get_or_init_lut(tz: &TimeZone) -> Arc { + { + let guard = TZ_LUTS.read().unwrap(); + if let Some((_, lut)) = guard.iter().find(|(key, _)| key == tz) { + return lut.clone(); + } + } + + let mut guard = TZ_LUTS.write().unwrap(); + if let Some((_, lut)) = guard.iter().find(|(key, _)| key == tz) { + return lut.clone(); + } + + let lut = Arc::new(TimeZoneLut::new(tz)); + guard.push((tz.clone(), lut.clone())); + lut +} + +#[derive(Debug, Clone)] +pub struct DateTimeComponents { + pub year: i32, + pub month: u8, + pub day: u8, + pub hour: u8, + pub minute: u8, + pub second: u8, + pub micro: u32, + pub weekday: Weekday, + pub days_in_month: u8, + pub day_of_year: u16, + pub offset_seconds: i32, + pub unix_seconds: i64, +} + +impl DateTimeComponents { + pub fn iso_year_week(&self) -> (i32, u32) { + let day = self.day_of_year as i32; + let weekday = self.weekday.to_monday_one_offset() as i32; + let mut week = (day - weekday + 10).div_euclid(7); + let mut year = self.year; + + if week < 1 { + year -= 1; + week = weeks_in_year(year) as i32; + } else { + let weeks_current = weeks_in_year(year) as i32; + if week > weeks_current { + year += 1; + week = 1; + } + } + + (year, week as u32) + } +} + +pub fn fast_components_from_timestamp(micros: i64, tz: &TimeZone) -> Option { + let seconds = micros.div_euclid(1_000_000); + let micros = micros.rem_euclid(1_000_000) as u32; + let lut = get_or_init_lut(tz); + let entry = lut.lookup(seconds)?; + Some(entry.build_components(seconds, micros)) +} + +/// Convert a local calendar time into UTC microseconds using the cached LUT. +/// Returns `None` when the request lies outside the supported year range +/// (1900–2299) or when the local timestamp falls in a DST gap. For DST folds +/// the “later” instant is returned, matching how `fast_components_from_timestamp` +/// anchors each day. +pub fn fast_utc_from_local( + tz: &TimeZone, + year: i32, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + micro: u32, +) -> Option { + if hour >= 24 || minute >= 60 || second >= 60 || micro >= 1_000_000 { + return None; + } + let local_seconds = (hour as i64) * 3600 + (minute as i64) * 60 + (second as i64); + let lut = get_or_init_lut(tz); + let entry = lut.entry_for_local_date(year, month, day)?; + let utc_seconds = entry.local_elapsed_seconds(local_seconds)?; + let total = (utc_seconds as i128) * (MICROS_PER_SEC as i128) + micro as i128; + if total > i64::MAX as i128 || total < i64::MIN as i128 { + return None; + } + Some(total as i64) +} + +fn days_before_year(year: i32) -> i64 { + let y = (year - 1) as i64; + 365 * y + y / 4 - y / 100 + y / 400 +} + +fn days_between(start_year: i32, end_year: i32) -> usize { + (days_before_year(end_year) - days_before_year(start_year)) as usize +} + +const CUMULATIVE_DAYS: [u16; 12] = [0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334]; + +fn day_of_year(year: i32, month: u8, day: u8) -> u16 { + let mut ordinal = CUMULATIVE_DAYS[(month - 1) as usize] + day as u16; + if month > 2 && is_leap_year(year) { + ordinal += 1; + } + ordinal +} + +fn day_index_for_date(year: i32, month: u8, day: u8) -> Option { + if !(LUT_MIN_YEAR..=LUT_MAX_YEAR).contains(&year) { + return None; + } + if month == 0 || month > 12 { + return None; + } + let last = last_day_of_month(year, month); + if day == 0 || day > last { + return None; + } + let ordinal = day_of_year(year, month, day) as i64; + let days_before = days_before_year(year); + let offset = days_before - days_before_year(LUT_MIN_YEAR); + Some((offset + ordinal - 1) as usize) +} + +fn last_day_of_month(year: i32, month: u8) -> u8 { + match month { + 1 | 3 | 5 | 7 | 8 | 10 | 12 => 31, + 4 | 6 | 9 | 11 => 30, + 2 => { + if is_leap_year(year) { + 29 + } else { + 28 + } + } + _ => unreachable!("invalid month"), + } +} + +fn is_leap_year(year: i32) -> bool { + (year % 4 == 0 && year % 100 != 0) || year % 400 == 0 +} + +fn weeks_in_year(year: i32) -> u32 { + let first_day = date(year as i16, 1, 1); + match first_day.weekday() { + Weekday::Thursday => 53, + Weekday::Wednesday if is_leap_year(year) => 53, + _ => 52, + } +} diff --git a/src/query/expression/Cargo.toml b/src/query/expression/Cargo.toml index 2bc9634aa4ccb..20b70d4a00f72 100644 --- a/src/query/expression/Cargo.toml +++ b/src/query/expression/Cargo.toml @@ -30,6 +30,7 @@ databend-common-grpc = { workspace = true } databend-common-hashtable = { workspace = true } databend-common-io = { workspace = true } databend-common-metrics = { workspace = true } +databend-common-timezone = { workspace = true } educe = { workspace = true } either = { workspace = true } enum-as-inner = { workspace = true } diff --git a/src/query/expression/src/utils/date_helper.rs b/src/query/expression/src/utils/date_helper.rs index 495ada4f19b83..8825e9a8b524c 100644 --- a/src/query/expression/src/utils/date_helper.rs +++ b/src/query/expression/src/utils/date_helper.rs @@ -15,6 +15,9 @@ use std::sync::LazyLock; use databend_common_exception::Result; +use databend_common_timezone::fast_components_from_timestamp; +use databend_common_timezone::fast_utc_from_local; +use databend_common_timezone::DateTimeComponents; use jiff::civil::date; use jiff::civil::datetime; use jiff::civil::Date; @@ -177,14 +180,67 @@ macro_rules! impl_interval_year_month { add_months: bool, ) -> std::result::Result { let ts = us.to_timestamp(tz); + let original_offset = ts.offset().seconds(); + + if let Some(components) = fast_components_from_timestamp(us, tz) { + let new_date = $op( + components.year as i16, + components.month as i8, + components.day as i8, + delta.as_(), + add_months, + )?; + if let Some(mut new_ts) = fast_utc_from_local( + tz, + new_date.year() as i32, + new_date.month() as u8, + new_date.day() as u8, + components.hour, + components.minute, + components.second, + components.micro, + ) { + if let Some(new_components) = fast_components_from_timestamp(new_ts, tz) { + if new_components.offset_seconds != original_offset { + let shift_secs = + (new_components.offset_seconds - original_offset) as i64; + let shift_micros = shift_secs.saturating_mul(MICROS_PER_SEC); + new_ts = new_ts.checked_add(shift_micros).unwrap_or_else(|| { + if shift_micros.is_negative() { + i64::MIN + } else { + i64::MAX + } + }); + } + clamp_timestamp(&mut new_ts); + return Ok(new_ts); + } + } + } + let new_date = $op(ts.year(), ts.month(), ts.day(), delta.as_(), add_months)?; - let mut ts = new_date - .at(ts.hour(), ts.minute(), ts.second(), ts.subsec_nanosecond()) - .to_zoned(tz.clone()) - .map_err(|e| format!("{}", e))? - .timestamp() - .as_microsecond(); + let local = + new_date.at(ts.hour(), ts.minute(), ts.second(), ts.subsec_nanosecond()); + let mut zoned = match local.to_zoned(tz.clone()) { + Ok(z) => z, + Err(e) => match local.checked_add(SignedDuration::from_secs(3600)) { + Ok(res2) => res2 + .to_zoned(tz.clone()) + .map_err(|err| format!("{}", err))?, + Err(_) => return Err(format!("{}", e)), + }, + }; + if zoned.offset().seconds() != original_offset { + let shift = (zoned.offset().seconds() - original_offset) as i64; + if let Ok(adj_local) = local.checked_add(SignedDuration::from_secs(shift)) { + if let Ok(adj_zoned) = adj_local.to_zoned(tz.clone()) { + zoned = adj_zoned; + } + } + } + let mut ts = zoned.timestamp().as_microsecond(); clamp_timestamp(&mut ts); Ok(ts) } @@ -195,6 +251,25 @@ macro_rules! impl_interval_year_month { impl_interval_year_month!(EvalYearsImpl, eval_years_base); impl_interval_year_month!(EvalMonthsImpl, eval_months_base); +/// Compare two `DateTimeComponents` by their time-of-day portion only. +fn components_time_less_than(a: &DateTimeComponents, b: &DateTimeComponents) -> bool { + (a.hour, a.minute, a.second, a.micro) < (b.hour, b.minute, b.second, b.micro) +} + +fn date_from_components(c: &DateTimeComponents) -> Option { + Date::new(c.year as i16, c.month as i8, c.day as i8).ok() +} + +fn datetime_from_components(c: &DateTimeComponents) -> Option { + let date = date_from_components(c)?; + Some(date.at( + c.hour as i8, + c.minute as i8, + c.second as i8, + (c.micro * 1_000) as i32, + )) +} + impl EvalYearsImpl { pub fn eval_date_diff(date_start: i32, date_end: i32, tz: &TimeZone) -> i32 { let date_start = date_start.to_date(tz); @@ -228,6 +303,12 @@ impl EvalYearsImpl { } pub fn eval_timestamp_diff(date_start: i64, date_end: i64, tz: &TimeZone) -> i64 { + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + return (end.year as i64) - (start.year as i64); + } let date_start = date_start.to_timestamp(tz); let date_end = date_end.to_timestamp(tz); date_end.year() as i64 - date_start.year() as i64 @@ -240,6 +321,23 @@ impl EvalYearsImpl { if date_start > date_end { return -Self::eval_timestamp_between(date_end, date_start, tz); } + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + let mut years = end.year - start.year; + let start_is_feb_29 = start.month == 2 && start.day == 29; + let end_is_feb_28 = end.month == 2 && end.day == 28; + let end_before_start_date = (end.month < start.month) + || (end.month == start.month && end.day < start.day) + || (end.month == start.month + && end.day == start.day + && components_time_less_than(&end, &start)); + if !(start_is_feb_29 && end_is_feb_28) && end_before_start_date { + years -= 1; + } + return years as i64; + } let start = date_start.to_timestamp(tz); let end = date_end.to_timestamp(tz); @@ -298,6 +396,14 @@ impl EvalISOYearsImpl { } pub fn eval_timestamp_diff(date_start: i64, date_end: i64, tz: &TimeZone) -> i64 { + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + let (start_year, _) = start.iso_year_week(); + let (end_year, _) = end.iso_year_week(); + return (end_year - start_year) as i64; + } let date_start = date_start.to_timestamp(tz); let date_end = date_end.to_timestamp(tz); date_end.date().iso_week_date().year() as i64 - date_start.iso_week_date().year() as i64 @@ -310,6 +416,23 @@ impl EvalISOYearsImpl { if date_start > date_end { return -Self::eval_timestamp_between(date_end, date_start, tz); } + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + let mut years = end.year - start.year; + let start_is_feb_29 = start.month == 2 && start.day == 29; + let end_is_feb_28 = end.month == 2 && end.day == 28; + let end_before_start_date = (end.month < start.month) + || (end.month == start.month && end.day < start.day) + || (end.month == start.month + && end.day == start.day + && components_time_less_than(&end, &start)); + if !(start_is_feb_29 && end_is_feb_28) && end_before_start_date { + years -= 1; + } + return years as i64; + } let start = date_start.to_timestamp(tz); let end = date_end.to_timestamp(tz); @@ -344,6 +467,11 @@ impl EvalYearWeeksImpl { (iso_week.year() as i32 * 100) + iso_week.week() as i32 } + fn yearweek_from_components(components: &DateTimeComponents) -> i32 { + let (year, week) = components.iso_year_week(); + year * 100 + week as i32 + } + pub fn eval_date_diff(date_start: i32, date_end: i32, tz: &TimeZone) -> i32 { let date_start = date_start.to_date(tz); let date_end = date_end.to_date(tz); @@ -354,6 +482,14 @@ impl EvalYearWeeksImpl { } pub fn eval_timestamp_diff(date_start: i64, date_end: i64, tz: &TimeZone) -> i64 { + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + let start_yw = Self::yearweek_from_components(&start) as i64; + let end_yw = Self::yearweek_from_components(&end) as i64; + return end_yw - start_yw; + } let date_start = date_start.to_timestamp(tz); let date_end = date_end.to_timestamp(tz); let end = Self::yearweek(date_end.date()) as i64; @@ -436,6 +572,14 @@ impl EvalQuartersImpl { } pub fn eval_timestamp_diff(date_start: i64, date_end: i64, tz: &TimeZone) -> i64 { + if let (Some(start), Some(end)) = ( + fast_components_from_timestamp(date_start, tz), + fast_components_from_timestamp(date_end, tz), + ) { + let start_quarter = ((start.month as i64 - 1) / 3) + 1; + let end_quarter = ((end.month as i64 - 1) / 3) + 1; + return (end.year as i64 - start.year as i64) * 4 + end_quarter - start_quarter; + } let date_start = date_start.to_timestamp(tz); let date_end = date_end.to_timestamp(tz); (date_end.year() - date_start.year()) as i64 * 4 + ToQuarter::to_number(&date_end) as i64 @@ -566,6 +710,20 @@ impl EvalMonthsImpl { if start > end { return -Self::eval_timestamp_between(end, start, tz); } + if let (Some(start_c), Some(end_c)) = ( + fast_components_from_timestamp(start, tz), + fast_components_from_timestamp(end, tz), + ) { + let year_diff = end_c.year - start_c.year; + let month_diff = end_c.month as i32 - start_c.month as i32; + let mut months = year_diff as i64 * 12 + month_diff as i64; + if (end_c.day < start_c.day) + || (end_c.day == start_c.day && components_time_less_than(&end_c, &start_c)) + { + months -= 1; + } + return months; + } let start = start.to_timestamp(tz); let end = end.to_timestamp(tz); @@ -714,6 +872,31 @@ impl EvalWeeksImpl { if start > end { return -Self::eval_timestamp_between(end, start, tz); } + if let (Some(start_c), Some(end_c)) = ( + fast_components_from_timestamp(start, tz), + fast_components_from_timestamp(end, tz), + ) { + if let (Some(start_date), Some(end_date)) = + (date_from_components(&start_c), date_from_components(&end_c)) + { + let mut weeks = Self::calculate_weeks_between_years( + start_date.year() as i32, + end_date.year() as i32, + start_date.iso_week_date().week() as u32, + end_date.iso_week_date().week() as u32, + ) as i64; + let days_since_monday = end_c.weekday.to_monday_one_offset() - 1; + let dur = SignedDuration::from_hours(days_since_monday as i64 * 24); + let monday_of_end_week = end_date.checked_sub(dur).unwrap(); + let monday_dt = monday_of_end_week.at(0, 0, 0, 0); + if let Some(end_dt) = datetime_from_components(&end_c) { + if end_dt < monday_dt { + weeks -= 1; + } + } + return weeks; + } + } let earlier = start.to_timestamp(tz); let later = end.to_timestamp(tz); @@ -896,12 +1079,21 @@ pub fn calc_date_to_timestamp(val: i32, tz: &TimeZone) -> std::result::Result { fn to_number(dt: &Zoned) -> N; + + fn from_components(_components: &DateTimeComponents) -> Option { + None + } } pub struct ToNumberImpl; impl ToNumberImpl { pub fn eval_timestamp, R>(us: i64, tz: &TimeZone) -> R { + if let Some(components) = fast_components_from_timestamp(us, tz) { + if let Some(value) = T::from_components(&components) { + return value; + } + } let dt = us.to_timestamp(tz); T::to_number(&dt) } @@ -943,24 +1135,42 @@ impl ToNumber for ToYYYYMM { fn to_number(dt: &Zoned) -> u32 { dt.year() as u32 * 100 + dt.month() as u32 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.year as u32 * 100 + components.month as u32) + } } impl ToNumber for ToMillennium { fn to_number(dt: &Zoned) -> u16 { dt.year() as u16 / 1000 + 1 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.year as u16 / 1000 + 1) + } } impl ToNumber for ToWeekOfYear { fn to_number(dt: &Zoned) -> u32 { dt.date().iso_week_date().week() as u32 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.iso_year_week().1) + } } impl ToNumber for ToYYYYMMDD { fn to_number(dt: &Zoned) -> u32 { dt.year() as u32 * 10_000 + dt.month() as u32 * 100 + dt.day() as u32 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some( + components.year as u32 * 10_000 + components.month as u32 * 100 + components.day as u32, + ) + } } impl ToNumber for ToYYYYMMDDHH { @@ -970,6 +1180,15 @@ impl ToNumber for ToYYYYMMDDHH { + dt.day() as u64 * 100 + dt.hour() as u64 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some( + components.year as u64 * 1_000_000 + + components.month as u64 * 10_000 + + components.day as u64 * 100 + + components.hour as u64, + ) + } } impl ToNumber for ToYYYYMMDDHHMMSS { @@ -981,30 +1200,57 @@ impl ToNumber for ToYYYYMMDDHHMMSS { + dt.minute() as u64 * 100 + dt.second() as u64 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some( + components.year as u64 * 10_000_000_000 + + components.month as u64 * 100_000_000 + + components.day as u64 * 1_000_000 + + components.hour as u64 * 10_000 + + components.minute as u64 * 100 + + components.second as u64, + ) + } } impl ToNumber for ToYear { fn to_number(dt: &Zoned) -> u16 { dt.year() as u16 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.year as u16) + } } impl ToNumber for ToTimezoneHour { fn to_number(dt: &Zoned) -> i16 { dt.offset().seconds().div_ceil(3600) as i16 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.offset_seconds.div_ceil(3600) as i16) + } } impl ToNumber for ToTimezoneMinute { fn to_number(dt: &Zoned) -> i16 { (dt.offset().seconds() % 3600).div_ceil(60) as i16 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some((components.offset_seconds % 3600).div_ceil(60) as i16) + } } impl ToNumber for ToISOYear { fn to_number(dt: &Zoned) -> u16 { dt.date().iso_week_date().year() as _ } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.iso_year_week().0 as u16) + } } impl ToNumber for ToYYYYWW { @@ -1013,6 +1259,11 @@ impl ToNumber for ToYYYYWW { let year = week_date.year() as u32 * 100; year + dt.date().iso_week_date().week() as u32 } + + fn from_components(components: &DateTimeComponents) -> Option { + let (iso_year, iso_week) = components.iso_year_week(); + Some(iso_year as u32 * 100 + iso_week) + } } impl ToNumber for ToQuarter { @@ -1020,42 +1271,70 @@ impl ToNumber for ToQuarter { // begin with 0 ((dt.month() - 1) / 3 + 1) as u8 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some((components.month - 1) / 3 + 1) + } } impl ToNumber for ToMonth { fn to_number(dt: &Zoned) -> u8 { dt.month() as u8 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.month) + } } impl ToNumber for ToDayOfYear { fn to_number(dt: &Zoned) -> u16 { dt.day_of_year() as u16 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.day_of_year) + } } impl ToNumber for ToDayOfMonth { fn to_number(dt: &Zoned) -> u8 { dt.day() as u8 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.day) + } } impl ToNumber for ToDayOfWeek { fn to_number(dt: &Zoned) -> u8 { dt.weekday().to_monday_one_offset() as u8 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.weekday.to_monday_one_offset() as u8) + } } impl ToNumber for DayOfWeek { fn to_number(dt: &Zoned) -> u8 { dt.weekday().to_sunday_zero_offset() as u8 } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.weekday.to_sunday_zero_offset() as u8) + } } impl ToNumber for ToUnixTimestamp { fn to_number(dt: &Zoned) -> i64 { dt.with_time_zone(TimeZone::UTC).timestamp().as_second() } + + fn from_components(components: &DateTimeComponents) -> Option { + Some(components.unix_seconds) + } } #[derive(Clone, Copy)] diff --git a/src/query/expression/tests/it/types.rs b/src/query/expression/tests/it/types.rs index ffa94734b06f8..1c997b1e12671 100644 --- a/src/query/expression/tests/it/types.rs +++ b/src/query/expression/tests/it/types.rs @@ -57,9 +57,14 @@ fn test_parse_jiff() { offset ); - let (mut tm, _) = BrokenDownTime::parse_prefix("%s,%Y", "200,2000").unwrap(); + // Jiff 0.2.16 requires a full civil date to build a datetime. For inputs + // that only specify a Unix timestamp (`%s`), verify via `to_timestamp`. + let (mut tm, _) = BrokenDownTime::parse_prefix("%s", "200").unwrap(); tm.set_offset(Some(tz::offset(0 as _))); - assert_eq!("2000-01-01T00:03:20", tm.to_datetime().unwrap().to_string()); + assert_eq!( + "1970-01-01T00:03:20Z", + tm.to_timestamp().unwrap().to_string() + ); } #[test] diff --git a/src/query/functions/benches/bench.rs b/src/query/functions/benches/bench.rs index 84036f24d2d67..218a63dfe87a2 100644 --- a/src/query/functions/benches/bench.rs +++ b/src/query/functions/benches/bench.rs @@ -68,3 +68,197 @@ mod dummy { }); } } + +#[divan::bench_group(max_time = 0.5)] +mod datetime_fast_path { + use std::sync::LazyLock; + + use databend_common_expression::type_check; + use databend_common_expression::types::string::StringColumn; + use databend_common_expression::types::string::StringColumnBuilder; + use databend_common_expression::types::timestamp::microseconds_to_days; + use databend_common_expression::types::timestamp::timestamp_to_string; + use databend_common_expression::types::DataType; + use databend_common_expression::BlockEntry; + use databend_common_expression::Column; + use databend_common_expression::DataBlock; + use databend_common_expression::Evaluator; + use databend_common_expression::Expr; + use databend_common_expression::FunctionContext; + use databend_common_functions::test_utils as parser; + use databend_common_functions::BUILTIN_FUNCTIONS; + use jiff::civil::date; + use jiff::tz::TimeZone; + use rand::rngs::StdRng; + use rand::Rng; + use rand::SeedableRng; + + const ROWS: usize = 100_000; + const SPECIAL_EVERY: usize = 20_000; + + static SAMPLES: LazyLock = + LazyLock::new(|| DateTimeSamples::new(ROWS, SPECIAL_EVERY)); + + struct DateTimeSamples { + timestamps: databend_common_column::buffer::Buffer, + dates: databend_common_column::buffer::Buffer, + timestamp_strings: StringColumn, + } + + impl DateTimeSamples { + fn new(rows: usize, interval: usize) -> Self { + let timestamps = generate_timestamp_values(rows, interval); + let dates: Vec = timestamps + .iter() + .map(|µs| microseconds_to_days(micros)) + .collect(); + let tz_sh = TimeZone::get("Asia/Shanghai").unwrap(); + let mut string_builder = StringColumnBuilder::with_capacity(rows); + for µs in timestamps.iter() { + let formatted = timestamp_to_string(micros, &tz_sh).to_string(); + string_builder.put_and_commit(formatted); + } + Self { + timestamps: timestamps.into(), + dates: dates.into(), + timestamp_strings: string_builder.build(), + } + } + + fn rows(&self) -> usize { + self.timestamps.len() + } + + fn timestamp_entry(&self) -> BlockEntry { + BlockEntry::Column(Column::Timestamp(self.timestamps.clone())) + } + + fn date_entry(&self) -> BlockEntry { + BlockEntry::Column(Column::Date(self.dates.clone())) + } + + fn string_entry(&self) -> BlockEntry { + BlockEntry::Column(Column::String(self.timestamp_strings.clone())) + } + } + + #[divan::bench] + fn timestamp_extract_components(bencher: divan::Bencher) { + let expr = build_expr( + "tuple(to_year(ts), to_month(ts), to_day_of_year(ts), to_hour(ts))", + &[("ts", DataType::Timestamp)], + ); + let data = &*SAMPLES; + let block = DataBlock::new(vec![data.timestamp_entry()], data.rows()); + let func_ctx = FunctionContext { + tz: TimeZone::get("Asia/Shanghai").unwrap(), + ..Default::default() + }; + let evaluator = Evaluator::new(&block, &func_ctx, &BUILTIN_FUNCTIONS); + + bencher.bench(|| { + let value = evaluator.run(&expr).unwrap(); + divan::black_box(value); + }); + } + + #[divan::bench] + fn timestamp_add_months(bencher: divan::Bencher) { + let expr = build_expr("add_months(ts, 1)", &[("ts", DataType::Timestamp)]); + let data = &*SAMPLES; + let block = DataBlock::new(vec![data.timestamp_entry()], data.rows()); + let func_ctx = FunctionContext { + tz: TimeZone::get("Asia/Shanghai").unwrap(), + ..Default::default() + }; + let evaluator = Evaluator::new(&block, &func_ctx, &BUILTIN_FUNCTIONS); + + bencher.bench(|| { + let value = evaluator.run(&expr).unwrap(); + divan::black_box(value); + }); + } + + #[divan::bench] + fn date_add_days(bencher: divan::Bencher) { + let expr = build_expr("add_days(d, 7)", &[("d", DataType::Date)]); + let data = &*SAMPLES; + let block = DataBlock::new(vec![data.date_entry()], data.rows()); + let func_ctx = FunctionContext { + tz: TimeZone::get("Asia/Shanghai").unwrap(), + ..Default::default() + }; + let evaluator = Evaluator::new(&block, &func_ctx, &BUILTIN_FUNCTIONS); + + bencher.bench(|| { + let value = evaluator.run(&expr).unwrap(); + divan::black_box(value); + }); + } + + #[divan::bench] + fn string_parse_to_date(bencher: divan::Bencher) { + let expr = build_expr("to_date(to_timestamp(s))", &[("s", DataType::String)]); + let data = &*SAMPLES; + let block = DataBlock::new(vec![data.string_entry()], data.rows()); + let func_ctx = FunctionContext { + tz: TimeZone::get("Asia/Shanghai").unwrap(), + ..Default::default() + }; + let evaluator = Evaluator::new(&block, &func_ctx, &BUILTIN_FUNCTIONS); + + bencher.bench(|| { + let value = evaluator.run(&expr).unwrap(); + divan::black_box(value); + }); + } + + fn build_expr(sql: &str, columns: &[(&str, DataType)]) -> Expr { + let raw_expr = parser::parse_raw_expr(sql, columns); + type_check::check(&raw_expr, &BUILTIN_FUNCTIONS).unwrap() + } + + fn generate_timestamp_values(rows: usize, interval: usize) -> Vec { + let tz_sh = TimeZone::get("Asia/Shanghai").unwrap(); + let tz_alg = TimeZone::get("Africa/Algiers").unwrap(); + let specials = [ + local_micros(&tz_sh, 1941, 3, 14, 23, 55, 0), + local_micros(&tz_sh, 1941, 3, 15, 1, 5, 0), + local_micros(&tz_sh, 1941, 11, 1, 0, 30, 0), + local_micros(&tz_sh, 1941, 11, 1, 1, 30, 0), + local_micros(&tz_alg, 1939, 11, 18, 23, 30, 0), + local_micros(&tz_alg, 1939, 11, 19, 0, 0, 30), + ]; + + let mut rng = StdRng::seed_from_u64(0x5453_5450); + let mut values = Vec::with_capacity(rows); + for i in 0..rows { + if (i % interval) < specials.len() { + values.push(specials[i % specials.len()]); + } else { + let secs = rng.gen_range(-2_208_988_800_i64..4_102_444_800_i64); + let micros = secs * 1_000_000 + rng.gen_range(0..1_000_000) as i64; + values.push(micros); + } + } + values + } + + fn local_micros( + tz: &TimeZone, + year: i32, + month: u8, + day: u8, + hour: u8, + minute: u8, + second: u8, + ) -> i64 { + let dt = + date(year as i16, month as i8, day as i8).at(hour as i8, minute as i8, second as i8, 0); + tz.to_ambiguous_zoned(dt) + .later() + .unwrap() + .timestamp() + .as_microsecond() + } +} diff --git a/src/query/functions/src/scalars/timestamp/Cargo.toml b/src/query/functions/src/scalars/timestamp/Cargo.toml index 12879b86cb937..83c14fe0dcdec 100644 --- a/src/query/functions/src/scalars/timestamp/Cargo.toml +++ b/src/query/functions/src/scalars/timestamp/Cargo.toml @@ -9,6 +9,7 @@ databend-common-base = { workspace = true } databend-common-column = { workspace = true } databend-common-exception = { workspace = true } databend-common-expression = { workspace = true } +databend-common-timezone = { workspace = true } dtparse = { workspace = true } jiff = { workspace = true } num-traits = { workspace = true } diff --git a/src/query/functions/src/scalars/timestamp/src/datetime.rs b/src/query/functions/src/scalars/timestamp/src/datetime.rs index a08d3d59b5977..35c999031b7f1 100644 --- a/src/query/functions/src/scalars/timestamp/src/datetime.rs +++ b/src/query/functions/src/scalars/timestamp/src/datetime.rs @@ -49,7 +49,6 @@ use databend_common_expression::types::timestamp::TIMESTAMP_MAX; use databend_common_expression::types::timestamp::TIMESTAMP_MIN; use databend_common_expression::types::timestamp_tz::string_to_timestamp_tz; use databend_common_expression::types::timestamp_tz::TimestampTzType; -use databend_common_expression::types::AccessType; use databend_common_expression::types::Bitmap; use databend_common_expression::types::DataType; use databend_common_expression::types::DateType; @@ -58,10 +57,8 @@ use databend_common_expression::types::Int32Type; use databend_common_expression::types::IntervalType; use databend_common_expression::types::NullableType; use databend_common_expression::types::NumberType; -use databend_common_expression::types::ReturnType; use databend_common_expression::types::StringType; use databend_common_expression::types::TimestampType; -use databend_common_expression::types::ValueType; use databend_common_expression::types::F64; use databend_common_expression::utils::date_helper::*; use databend_common_expression::vectorize_1_arg; @@ -76,6 +73,8 @@ use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionProperty; use databend_common_expression::FunctionRegistry; use databend_common_expression::Value; +use databend_common_timezone::fast_components_from_timestamp; +use databend_common_timezone::fast_utc_from_local; use dtparse::parse; use jiff::civil::date; use jiff::civil::Date; @@ -511,6 +510,7 @@ fn string_to_format_datetime( return Ok((0, true)); } + let raw_format = format; let format = if ctx.func_ctx.date_format_style == *"oracle" { pg_format_to_strftime(format) } else { @@ -519,6 +519,20 @@ fn string_to_format_datetime( let (mut tm, offset) = BrokenDownTime::parse_prefix(&format, timestamp) .map_err(|err| Box::new(ErrorCode::BadArguments(format!("{err}"))))?; + let parsed_unix_timestamp = tm.timestamp(); + let had_explicit_time = tm.hour().is_some() || tm.minute().is_some() || tm.second().is_some(); + let had_civil_date = tm.year().is_some() + || tm.month().is_some() + || tm.day().is_some() + || tm.day_of_year().is_some() + || tm.iso_week_year().is_some() + || tm.iso_week().is_some() + || tm.sunday_based_week().is_some() + || tm.monday_based_week().is_some() + || tm.weekday().is_some(); + let had_subsecond = tm.subsec_nanosecond().is_some(); + let had_meridiem = tm.meridiem().is_some(); + let had_timezone = tm.offset().is_some() || tm.iana_time_zone().is_some(); if !ctx.func_ctx.parse_datetime_ignore_remainder && offset != timestamp.len() { return Err(Box::new(ErrorCode::BadArguments(format!( @@ -536,13 +550,47 @@ fn string_to_format_datetime( let _ = tm.set_second(Some(0)); } - if !ctx.func_ctx.enable_strict_datetime_parser { + // Jiff 0.2.16 requires a complete civil date when converting to a datetime. + // To preserve historical to_timestamp() behaviour (which accepted inputs + // like `%s,%Y`), synthesize missing date parts when we're parsing a + // timestamp, but only when there isn't already alternate date information + // (e.g. ISO week fields) present. Non-timestamp callers can still opt-in by + // disabling the strict parser. + if needs_civil_date_synthesis(&tm, ctx, parse_timestamp) { if tm.day().is_none() { let _ = tm.set_day(Some(1)); } if tm.month().is_none() { let _ = tm.set_month(Some(1)); } + if parse_timestamp && tm.year().is_none() { + let _ = tm.set_year(Some(1970)); + } + } + + if parse_timestamp && parsed_unix_timestamp.is_some() { + let has_conflicting_directives = + had_civil_date || had_explicit_time || had_subsecond || had_meridiem || had_timezone; + if has_conflicting_directives { + return Err(Box::new(ErrorCode::BadArguments(format!( + "Can't parse '{timestamp}' as timestamp with format '{raw_format}'" + )))); + } + + // When `%s` is present the parsed Unix timestamp already encodes the full + // instant, so return it directly instead of trying to synthesize a civil + // date (which would lose the seconds component). + return Ok((parsed_unix_timestamp.unwrap().as_microsecond(), false)); + } + + if parse_timestamp + && parsed_unix_timestamp.is_none() + && tm.offset().is_none() + && tm.iana_time_zone().is_none() + { + if let Some(micros) = fast_timestamp_from_tm(&tm, &ctx.func_ctx.tz) { + return Ok((micros, false)); + } } let z = if tm.offset().is_none() { @@ -562,6 +610,34 @@ fn string_to_format_datetime( Ok((z.timestamp().as_microsecond(), false)) } +fn needs_civil_date_synthesis( + tm: &BrokenDownTime, + ctx: &EvalContext, + parse_timestamp: bool, +) -> bool { + if parse_timestamp || !ctx.func_ctx.enable_strict_datetime_parser { + !(tm.day_of_year().is_some() + || tm.iso_week_year().is_some() + || tm.iso_week().is_some() + || tm.sunday_based_week().is_some() + || tm.monday_based_week().is_some()) + } else { + false + } +} + +fn fast_timestamp_from_tm(tm: &BrokenDownTime, tz: &TimeZone) -> Option { + let year = i32::from(tm.year()?); + let month: u8 = tm.month()?.try_into().ok()?; + let day: u8 = tm.day()?.try_into().ok()?; + let hour: u8 = tm.hour().unwrap_or(0).try_into().ok()?; + let minute: u8 = tm.minute().unwrap_or(0).try_into().ok()?; + let second: u8 = tm.second().unwrap_or(0).try_into().ok()?; + let nanos = tm.subsec_nanosecond().unwrap_or(0); + let micro = (nanos / 1_000).max(0) as u32; + fast_utc_from_local(tz, year, month, day, hour, minute, second, micro) +} + fn register_date_to_timestamp(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "to_timestamp", @@ -895,138 +971,38 @@ fn register_timestamp_to_date(registry: &mut FunctionRegistry) { ); fn eval_timestamp_to_date(val: Value, ctx: &mut EvalContext) -> Value { - match val { - Value::Scalar(v) => Value::Scalar(calc_timestamp_to_date(v, &ctx.func_ctx.tz)), - Value::Column(column) => { - let generics = ctx.generics.to_vec(); - let len = TimestampType::column_len(&column); - let mut builder = DateType::create_builder(len, &generics); - let mut cache = TimeZoneOffsetCache::new(&ctx.func_ctx.tz); - for v in TimestampType::iter_column(&column) { - let days = cache.days_for(v); - DateType::push_item(&mut builder, days); - } - Value::Column(DateType::build_column(builder)) - } - } + vectorize_with_builder_1_arg::(|val, output, ctx| { + output.push(timestamp_to_date_days(val, &ctx.func_ctx.tz)); + })(val, ctx) } fn calc_timestamp_to_date(val: i64, tz: &TimeZone) -> i32 { - let mut cache = TimeZoneOffsetCache::new(tz); - cache.days_for(val) + timestamp_to_date_days(val, tz) } } -struct OffsetInterval { - start: i64, - end: i64, - offset_micros: i64, +fn timestamp_to_date_days(value: i64, tz: &TimeZone) -> i32 { + timestamp_days_via_lut(value, tz).unwrap_or_else(|| timestamp_days_via_jiff(value, tz)) } -struct TimeZoneOffsetCache<'a> { - tz: &'a TimeZone, - interval: Option, - fixed_offset_micros: Option, +fn timestamp_days_via_lut(value: i64, tz: &TimeZone) -> Option { + let components = fast_components_from_timestamp(value, tz)?; + days_from_components(components.year, components.month, components.day) } -impl<'a> TimeZoneOffsetCache<'a> { - fn new(tz: &'a TimeZone) -> Self { - let fixed_offset_micros = determine_fixed_offset(tz); - Self { - tz, - interval: None, - fixed_offset_micros, - } - } - - fn days_for(&mut self, value: i64) -> i32 { - if self.tz == &TimeZone::UTC { - return utc_days_from_micros(value); - } - if let Some(offset) = self.fixed_offset_micros { - return apply_offset_micros(value, offset); - } - - let needs_refresh = match &self.interval { - Some(interval) => value < interval.start || value >= interval.end, - None => true, - }; - if needs_refresh { - self.interval = Some(self.build_interval(value)); - } - - let interval = self.interval.as_ref().unwrap(); - apply_offset_micros(value, interval.offset_micros) - } - - fn build_interval(&self, value: i64) -> OffsetInterval { - let ts = timestamp_from_microseconds_saturating(value); - let offset_micros = tz_offset_micros(self.tz, ts); - - let start = self - .tz - .preceding(ts) - .next() - .map(|t| t.timestamp().as_microsecond()) - .unwrap_or(i64::MIN); - let end = self - .tz - .following(ts) - .next() - .map(|t| t.timestamp().as_microsecond()) - .unwrap_or(i64::MAX); - - OffsetInterval { - start, - end, - offset_micros, - } - } -} - -fn tz_offset_micros(tz: &TimeZone, ts: Timestamp) -> i64 { - let offset = tz.to_offset(ts); - (offset.seconds() as i64).saturating_mul(MICROS_PER_SEC) +fn days_from_components(year: i32, month: u8, day: u8) -> Option { + NaiveDate::from_ymd_opt(year, month as u32, day as u32) + .map(|d| clamp_date((d.num_days_from_ce() - EPOCH_DAYS_FROM_CE) as i64)) } -fn determine_fixed_offset(tz: &TimeZone) -> Option { - if tz == &TimeZone::UTC { - return Some(0); - } - - let epoch = Timestamp::UNIX_EPOCH; - let has_transitions = - tz.preceding(epoch).next().is_some() || tz.following(epoch).next().is_some(); - if has_transitions { - None - } else { - Some(tz_offset_micros(tz, epoch)) - } -} - -fn timestamp_from_microseconds_saturating(value: i64) -> Timestamp { - Timestamp::from_microsecond(value).unwrap_or_else(|_| { - if value < 0 { - Timestamp::MIN - } else { - Timestamp::MAX - } - }) -} - -fn utc_days_from_micros(value: i64) -> i32 { - timestamp_from_microseconds_saturating(value) - .to_zoned(TimeZone::UTC) +fn timestamp_days_via_jiff(value: i64, tz: &TimeZone) -> i32 { + value + .to_timestamp(tz) .date() .since((Unit::Day, Date::new(1970, 1, 1).unwrap())) .unwrap() .get_days() } -fn apply_offset_micros(value: i64, offset_micros: i64) -> i32 { - let adjusted = value.saturating_add(offset_micros); - utc_days_from_micros(adjusted) -} - fn register_timestamp_tz_to_date(registry: &mut FunctionRegistry) { registry.register_passthrough_nullable_1_arg::( "to_date", @@ -1328,7 +1304,7 @@ macro_rules! impl_register_arith_functions { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |ts, delta, builder, ctx| { - match EvalYearsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper!{delta}, false) { + match EvalYearsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper! {delta}, false) { Ok(t) => builder.push(t), Err(e) => { ctx.set_error(builder.len(), e); @@ -1359,7 +1335,7 @@ macro_rules! impl_register_arith_functions { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |ts, delta, builder, ctx| { - match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper!{delta} * 3, false) { + match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper! {delta} * 3, false) { Ok(t) => builder.push(t), Err(e) => { ctx.set_error(builder.len(), e); @@ -1390,7 +1366,7 @@ macro_rules! impl_register_arith_functions { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |ts, delta, builder, ctx| { - match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper!{delta}, false) { + match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper! {delta}, false) { Ok(t) => builder.push(t), Err(e) => { ctx.set_error(builder.len(), e); @@ -1423,7 +1399,7 @@ macro_rules! impl_register_arith_functions { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |ts, delta, builder, ctx| { - match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper!{delta}, true) { + match EvalMonthsImpl::eval_timestamp(ts, &ctx.func_ctx.tz, $signed_wrapper! {delta}, true) { Ok(t) => builder.push(t), Err(e) => { ctx.set_error(builder.len(), e); diff --git a/src/query/functions/src/scalars/timestamp/src/interval.rs b/src/query/functions/src/scalars/timestamp/src/interval.rs index 8626bbdbf4175..2e00f046c87eb 100644 --- a/src/query/functions/src/scalars/timestamp/src/interval.rs +++ b/src/query/functions/src/scalars/timestamp/src/interval.rs @@ -37,6 +37,8 @@ use databend_common_expression::EvalContext; use databend_common_expression::FunctionDomain; use databend_common_expression::FunctionRegistry; use databend_common_expression::Value; +use databend_common_timezone::fast_components_from_timestamp; +use databend_common_timezone::DateTimeComponents; use jiff::tz::Offset; use jiff::tz::TimeZone; use jiff::Timestamp; @@ -260,9 +262,16 @@ fn register_interval_add_sub_mul(registry: &mut FunctionRegistry) { is_negative = true; } let tz = &ctx.func_ctx.tz; - let t1 = t1.to_timestamp(tz); - let t2 = t2.to_timestamp(tz); - output.push(calc_age(t1, t2, is_negative)); + if let (Some(c1), Some(c2)) = ( + fast_components_from_timestamp(t1, tz), + fast_components_from_timestamp(t2, tz), + ) { + output.push(calc_age_from_components(&c1, &c2, is_negative)); + } else { + let t1 = t1.to_timestamp(tz); + let t2 = t2.to_timestamp(tz); + output.push(calc_age(t1, t2, is_negative)); + } }, ), ); @@ -273,13 +282,6 @@ fn register_interval_add_sub_mul(registry: &mut FunctionRegistry) { |_, _, _| FunctionDomain::MayThrow, vectorize_with_builder_2_arg::( |t1, t2, output, ctx| { - let fn_to_zoned = |ts_tz: timestamp_tz| { - let ts = Timestamp::from_microsecond(ts_tz.timestamp())?; - let zone = TimeZone::fixed(Offset::from_seconds(ts_tz.seconds_offset())?); - - Result::Ok(ts.to_zoned(zone)) - }; - let mut is_negative = false; let mut t1 = t1; let mut t2 = t2; @@ -287,10 +289,42 @@ fn register_interval_add_sub_mul(registry: &mut FunctionRegistry) { std::mem::swap(&mut t1, &mut t2); is_negative = true; } - let (t1, t2) = match (fn_to_zoned(t1), fn_to_zoned(t2)) { + let zone1 = match Offset::from_seconds(t1.seconds_offset()) + .map(TimeZone::fixed) + { + Ok(zone) => zone, + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + return; + } + }; + let zone2 = match Offset::from_seconds(t2.seconds_offset()) + .map(TimeZone::fixed) + { + Ok(zone) => zone, + Err(err) => { + ctx.set_error(output.len(), err.to_string()); + return; + } + }; + if let (Some(c1), Some(c2)) = ( + fast_components_from_timestamp(t1.timestamp(), &zone1), + fast_components_from_timestamp(t2.timestamp(), &zone2), + ) { + output.push(calc_age_from_components(&c1, &c2, is_negative)); + return; + } + let to_zoned = |ts_tz: timestamp_tz, + zone: &TimeZone| + -> std::result::Result { + let ts = + Timestamp::from_microsecond(ts_tz.timestamp()).map_err(|err| err.to_string())?; + Ok(ts.to_zoned(zone.clone())) + }; + let (t1, t2) = match (to_zoned(t1, &zone1), to_zoned(t2, &zone2)) { (Ok(t1), Ok(t2)) => (t1, t2), (Err(err), _) | (_, Err(err)) => { - ctx.set_error(output.len(), err.to_string()); + ctx.set_error(output.len(), err); return; } }; @@ -310,14 +344,28 @@ fn register_interval_add_sub_mul(registry: &mut FunctionRegistry) { let today_date = today_date(&ctx.func_ctx.now, &ctx.func_ctx.tz); match calc_date_to_timestamp(today_date, tz) { Ok(t) => { - let mut t1 = t.to_timestamp(tz); - let mut t2 = t2.to_timestamp(tz); + let mut t1 = t; + let mut t2_val = t2; - if t1 < t2 { - std::mem::swap(&mut t1, &mut t2); + if t1 < t2_val { + std::mem::swap(&mut t1, &mut t2_val); is_negative = true; } - output.push(calc_age(t1, t2, is_negative)); + if let (Some(c1), Some(c2)) = ( + fast_components_from_timestamp(t1, tz), + fast_components_from_timestamp(t2_val, tz), + ) { + output.push(calc_age_from_components(&c1, &c2, is_negative)); + } else { + let mut t1 = t1.to_timestamp(tz); + let mut t2 = t2_val.to_timestamp(tz); + + if t1 < t2 { + std::mem::swap(&mut t1, &mut t2); + is_negative = true; + } + output.push(calc_age(t1, t2, is_negative)); + } } Err(e) => { ctx.set_error(output.len(), e); @@ -337,8 +385,21 @@ fn register_interval_add_sub_mul(registry: &mut FunctionRegistry) { let zone = TimeZone::fixed(Offset::from_seconds(t2.seconds_offset())?); let today_date = today_date(&ctx.func_ctx.now, &zone); - let mut t2 = Timestamp::from_microsecond(t2.timestamp())?.to_zoned(zone.clone()); - let mut t1 = calc_date_to_timestamp(today_date, &zone)?.to_timestamp(&zone); + let mut t1 = calc_date_to_timestamp(today_date, &zone)?; + let mut t2_micros = t2.timestamp(); + + if t1 < t2_micros { + std::mem::swap(&mut t1, &mut t2_micros); + is_negative = true; + } + if let (Some(c1), Some(c2)) = ( + fast_components_from_timestamp(t1, &zone), + fast_components_from_timestamp(t2_micros, &zone), + ) { + return Result::Ok(calc_age_from_components(&c1, &c2, is_negative)); + } + let mut t1 = Timestamp::from_microsecond(t1)?.to_zoned(zone.clone()); + let mut t2 = Timestamp::from_microsecond(t2_micros)?.to_zoned(zone.clone()); if t1 < t2 { std::mem::swap(&mut t1, &mut t2); @@ -614,6 +675,48 @@ fn register_number_to_interval(registry: &mut FunctionRegistry) { ); } +fn calc_age_from_components( + t1: &DateTimeComponents, + t2: &DateTimeComponents, + is_negative: bool, +) -> months_days_micros { + let mut years = t1.year - t2.year; + let mut months = t1.month as i32 - t2.month as i32; + let mut days = t1.day as i32 - t2.day as i32; + + let t1_total_nanos = (t1.hour as i64 * 3600 + t1.minute as i64 * 60 + t1.second as i64) + * 1_000_000_000 + + (t1.micro as i64) * 1_000; + let t2_total_nanos = (t2.hour as i64 * 3600 + t2.minute as i64 * 60 + t2.second as i64) + * 1_000_000_000 + + (t2.micro as i64) * 1_000; + let mut total_nanoseconds_diff = t1_total_nanos - t2_total_nanos; + + if total_nanoseconds_diff < 0 { + total_nanoseconds_diff += 24 * 3600 * 1_000_000_000; + days -= 1; + } + + if days < 0 { + days += t2.days_in_month as i32; + months -= 1; + } + + if months < 0 { + months += 12; + years -= 1; + } + + let total_months = months + years * 12; + let diff_micros = total_nanoseconds_diff / 1_000; + + if is_negative { + months_days_micros::new(-total_months, -days, -diff_micros) + } else { + months_days_micros::new(total_months, days, diff_micros) + } +} + fn calc_age(t1: Zoned, t2: Zoned, is_negative: bool) -> months_days_micros { let mut years = t1.year() - t2.year(); let mut months = t1.month() - t2.month(); diff --git a/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test b/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test index 58ed6366456c5..b38fc842ab0d0 100644 --- a/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test +++ b/tests/sqllogictests/suites/base/11_data_type/11_0001_data_type_date_time.test @@ -44,22 +44,22 @@ select '9999-12-12 03:25:02.868894'::TIMESTAMP query T select '0099-05-16T03:25:02.868894+08:00'::TIMESTAMP ---- -1970-01-01 00:00:00.000000 +0099-05-15 19:25:02.868894 query T select '1022-05-16T03:25:02.868894+08:00'::TIMESTAMP ---- -1970-01-01 00:00:00.000000 +1022-05-15 19:25:02.868894 query T select '1022-05-16T03:25:02.000000+08:00'::TIMESTAMP ---- -1970-01-01 00:00:00.000000 +1022-05-15 19:25:02.000000 query T select '1000-01-01T03:25:02.868894-07:00'::TIMESTAMP ---- -1970-01-01 00:00:00.000000 +1000-01-01 10:25:02.868894 query T select '2044-05-06T03:25:02.868894-07:00'::TIMESTAMP diff --git a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test index 71f51902133e5..4e9b2f387eed5 100644 --- a/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test +++ b/tests/sqllogictests/suites/query/functions/02_0012_function_datetimes_tz.test @@ -161,41 +161,6 @@ select next_day(to_date('2024-10-25'), saturday); ---- 2024-10-26 -statement ok -set timezone='America/Toronto' - -statement ok -drop table if exists tz_dst - -statement ok -create table tz_dst(ts timestamp) - -statement ok -insert into tz_dst values - (to_timestamp('2024-03-10 01:30:00')), - (to_timestamp('2024-06-10 12:00:00')), - (to_timestamp('2024-11-03 01:30:00')) - -query T -select to_date(to_timestamp('2024-03-10 01:30:00')); ----- -2024-03-10 - -query T -select to_date(to_timestamp('2024-11-03 01:30:00')); ----- -2024-11-03 - -query RT -select to_date(ts) from tz_dst order by ts; ----- -2024-03-10 -2024-06-10 -2024-11-03 - -statement ok -drop table if exists tz_dst - query T select next_day(to_timestamp('2024-10-25 01:00:00'), monday); ---- @@ -617,6 +582,35 @@ select count_if(y = true) from (select to_timestamp(to_date(number)) as ts, to_ statement ok SET timezone ='America/Toronto'; +statement ok +create or replace table tz_dst(ts timestamp) + +statement ok +insert into tz_dst values + (to_timestamp('2024-03-10 01:30:00')), + (to_timestamp('2024-06-10 12:00:00')), + (to_timestamp('2024-11-03 01:30:00')) + +query T +select to_date(to_timestamp('2024-03-10 01:30:00')); +---- +2024-03-10 + +query T +select to_date(to_timestamp('2024-11-03 01:30:00')); +---- +2024-11-03 + +query RT +select to_date(ts) from tz_dst order by ts; +---- +2024-03-10 +2024-06-10 +2024-11-03 + +statement ok +drop table if exists tz_dst + ##check to '2106-11-23'::date query T select count_if(y = true) from (select to_timestamp(to_date(number)) as ts, to_date(ts) = to_date(number) y from numbers(50000)); @@ -865,10 +859,8 @@ select to_datetime('1', '%s') ---- 1970-01-01 00:00:01.000000 -query T +query error 1006.*Can't parse '200,2000' as timestamp with format '%s,%Y' select to_timestamp('200,2000', '%s,%Y'); ----- -2000-01-01 00:03:20.000000 statement ok unset parse_datetime_ignore_remainder; @@ -881,6 +873,11 @@ select to_datetime('1', '%s') statement ok set timezone='UTC'; +query T +select to_timestamp('2024-10-2 03:04','%G-%V-%u %H:%M'); +---- +2024-03-05 03:04:00.000000 + query T select to_monday(to_date('1919-04-13','%Y-%m-%d')); ----