From 09788cbeafad5acb3b67b9072ab8a2c09d413992 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 17:47:11 +0800 Subject: [PATCH 1/6] small refactor --- sql/src/value/arrow_decoder.rs | 55 +++++++++++++++++----------------- 1 file changed, 27 insertions(+), 28 deletions(-) diff --git a/sql/src/value/arrow_decoder.rs b/sql/src/value/arrow_decoder.rs index 9ce0dac9..a39d49cc 100644 --- a/sql/src/value/arrow_decoder.rs +++ b/sql/src/value/arrow_decoder.rs @@ -38,6 +38,33 @@ use crate::error::{ConvertError, Error, Result}; use super::{Interval, NumberValue, Value}; +/// The in-memory representation of the MonthDayMicros variant of the "Interval" logical type. +#[allow(non_camel_case_types)] +#[repr(C)] +struct months_days_micros(pub i128); + +/// Mask for extracting the lower 64 bits (microseconds). +const MICROS_MASK: i128 = 0xFFFFFFFFFFFFFFFF; +/// Mask for extracting the middle 32 bits (days or months). +const DAYS_MONTHS_MASK: i128 = 0xFFFFFFFF; + +impl months_days_micros { + #[inline] + pub fn months(&self) -> i32 { + ((self.0 >> 96) & DAYS_MONTHS_MASK) as i32 + } + + #[inline] + pub fn days(&self) -> i32 { + ((self.0 >> 64) & DAYS_MONTHS_MASK) as i32 + } + + #[inline] + pub fn microseconds(&self) -> i64 { + (self.0 & MICROS_MASK) as i64 + } +} + impl TryFrom<(&ArrowField, &Arc, usize, Tz)> for Value { type Error = Error; fn try_from( @@ -386,31 +413,3 @@ fn parse_geometry(raw_data: &[u8]) -> Result { let wkt = Ewkt::from_wkb(&mut data, WkbDialect::Ewkb)?; Ok(wkt.0) } - -/// The in-memory representation of the MonthDayMicros variant of the "Interval" logical type. -#[derive(Debug, Copy, Clone, Default, PartialEq, PartialOrd, Ord, Eq, Hash)] -#[allow(non_camel_case_types)] -#[repr(C)] -pub struct months_days_micros(pub i128); - -/// Mask for extracting the lower 64 bits (microseconds). -pub const MICROS_MASK: i128 = 0xFFFFFFFFFFFFFFFF; -/// Mask for extracting the middle 32 bits (days or months). -pub const DAYS_MONTHS_MASK: i128 = 0xFFFFFFFF; - -impl months_days_micros { - #[inline] - pub fn months(&self) -> i32 { - ((self.0 >> 96) & DAYS_MONTHS_MASK) as i32 - } - - #[inline] - pub fn days(&self) -> i32 { - ((self.0 >> 64) & DAYS_MONTHS_MASK) as i32 - } - - #[inline] - pub fn microseconds(&self) -> i64 { - (self.0 & MICROS_MASK) as i64 - } -} From 313e088e19d4c2c1aaf31b349db88520ed11c182 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 17:47:28 +0800 Subject: [PATCH 2/6] add some comments --- sql/src/value/format.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sql/src/value/format.rs b/sql/src/value/format.rs index c4e52756..22d0901a 100644 --- a/sql/src/value/format.rs +++ b/sql/src/value/format.rs @@ -45,7 +45,8 @@ impl std::fmt::Display for Value { } } -// Compatible with Databend, inner values of nested types are quoted. +// Used as output of cli +// Compatible with Databend, strings inside nested types are quoted. fn encode_value(f: &mut std::fmt::Formatter<'_>, val: &Value, raw: bool) -> std::fmt::Result { match val { Value::Null => write!(f, "NULL"), @@ -222,6 +223,8 @@ pub fn display_decimal_256(num: i256, scale: u8) -> String { } impl Value { + // for now only used in ORM to fmt values to insert, + // for Params, rust use Param::as_sql_string, and py/js bindings are handled in binding code pub fn to_sql_string(&self) -> String { match self { Value::Null => "NULL".to_string(), From 440c12152d07f31bebf4ef83c948428f1bcff90d Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 17:50:31 +0800 Subject: [PATCH 3/6] small refactor --- sql/src/value/string_decoder.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/sql/src/value/string_decoder.rs b/sql/src/value/string_decoder.rs index 5b37efee..66798e7c 100644 --- a/sql/src/value/string_decoder.rs +++ b/sql/src/value/string_decoder.rs @@ -27,9 +27,9 @@ use std::io::{BufRead, Cursor}; use super::{NumberValue, Value, DAYS_FROM_CE, TIMESTAMP_TIMEZONE_FORMAT}; -pub(crate) const NULL_VALUE: &str = "NULL"; -pub(crate) const TRUE_VALUE: &str = "1"; -pub(crate) const FALSE_VALUE: &str = "0"; +const NULL_VALUE: &str = "NULL"; +const TRUE_VALUE: &str = "1"; +const FALSE_VALUE: &str = "0"; impl TryFrom<(&DataType, Option, Tz)> for Value { type Error = Error; From 40c4d3d9d968b6e7cc4500381115d63875e5b409 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 18:11:22 +0800 Subject: [PATCH 4/6] fix timestamp decode in nested value --- .../python/tests/blocking/steps/binding.py | 27 ++++++---- sql/src/value/string_decoder.rs | 53 ++++++++++--------- 2 files changed, 44 insertions(+), 36 deletions(-) diff --git a/bindings/python/tests/blocking/steps/binding.py b/bindings/python/tests/blocking/steps/binding.py index 77cfd6f0..4024a597 100644 --- a/bindings/python/tests/blocking/steps/binding.py +++ b/bindings/python/tests/blocking/steps/binding.py @@ -157,21 +157,28 @@ def _(context): assert row.values()[0] == exp, f"Tuple: {row.values()}" context.conn.exec("set timezone='UTC'") - # wait for release 1.2.839 - # if DB_VERSION >= (1, 2, 839): - # row = context.conn.query_row( - # f"settings(timezone='{tz}') select to_datetime('2024-04-16 12:34:56.789')" - # ) - # exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) - # assert row.values()[0] == exp, f"Tuple: {row.values()}" + if DB_VERSION >= (1, 2, 839): + exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) + row = context.conn.query_row( + f"settings(timezone='{tz}') select to_datetime('2024-04-16 12:34:56.789')" + ) + assert row.values()[0] == exp, f"Tuple: {row.values()}" + + row = context.conn.query_row( + f"settings(timezone='{tz}') select (to_datetime('2024-04-16 12:34:56.789'), 10)" + ) + assert row.values()[0] == exp, f"Tuple: {row.values()}" tz_expected = timezone(timedelta(hours=6)) row = context.conn.query_row( f"settings(timezone='{tz}') select to_timestamp_tz('2024-04-16 12:34:56.789 +0600')" ) - exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) - exp_bug = datetime(2024, 4, 16, 18, 34, 56, 789000, tzinfo=tz_expected) - assert row.values()[0] in (exp, exp_bug), f"Tuple: {row.values()[0]} {exp}" + if DB_VERSION >= (1, 2, 840): + exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) + else: + # bug + exp = datetime(2024, 4, 16, 18, 34, 56, 789000, tzinfo=tz_expected) + assert row.values()[0] == exp, f"Tuple: {row.values()[0]} {exp}" @then("Select numbers should iterate all rows") diff --git a/sql/src/value/string_decoder.rs b/sql/src/value/string_decoder.rs index 66798e7c..e74251f8 100644 --- a/sql/src/value/string_decoder.rs +++ b/sql/src/value/string_decoder.rs @@ -97,20 +97,7 @@ impl TryFrom<(&DataType, String, Tz)> for Value { let d = parse_decimal(v.as_str(), *size)?; Ok(Self::Number(d)) } - DataType::Timestamp => { - let naive_dt = NaiveDateTime::parse_from_str(v.as_str(), "%Y-%m-%d %H:%M:%S%.6f")?; - let dt_with_tz = match tz.from_local_datetime(&naive_dt) { - LocalResult::Single(dt) => dt, - LocalResult::None => { - return Err(Error::Parsing(format!( - "time {v} not exists in timezone {tz}" - ))) - } - LocalResult::Ambiguous(dt1, _dt2) => dt1, - }; - let ts = dt_with_tz.timestamp_micros(); - Ok(Self::Timestamp(ts, tz)) - } + DataType::Timestamp => parse_timestamp(v.as_str(), tz), DataType::TimestampTz => { let t = DateTime::::parse_from_str(v.as_str(), TIMESTAMP_TIMEZONE_FORMAT)?; @@ -127,7 +114,7 @@ impl TryFrom<(&DataType, String, Tz)> for Value { DataType::Interval => Ok(Self::Interval(v)), DataType::Array(_) | DataType::Map(_) | DataType::Tuple(_) | DataType::Vector(_) => { let mut reader = Cursor::new(v.as_str()); - let decoder = ValueDecoder {}; + let decoder = ValueDecoder { timezone: tz }; decoder.read_field(t, &mut reader) } DataType::Nullable(inner) => match inner.as_ref() { @@ -146,7 +133,9 @@ impl TryFrom<(&DataType, String, Tz)> for Value { } } -pub(super) struct ValueDecoder {} +struct ValueDecoder { + pub timezone: Tz, +} impl ValueDecoder { pub(super) fn read_field>( @@ -310,16 +299,7 @@ impl ValueDecoder { let mut buf = Vec::new(); reader.read_quoted_text(&mut buf, b'\'')?; let v = unsafe { std::str::from_utf8_unchecked(&buf) }; - let ts = NaiveDateTime::parse_from_str(v, "%Y-%m-%d %H:%M:%S%.6f")? - .and_utc() - .timestamp_micros(); - Ok(Value::Timestamp(ts, Tz::UTC)) - } - - fn read_interval>(&self, reader: &mut Cursor) -> Result { - let mut buf = Vec::new(); - reader.read_quoted_text(&mut buf, b'\'')?; - Ok(Value::Interval(unsafe { String::from_utf8_unchecked(buf) })) + parse_timestamp(v, self.timezone) } fn read_timestamp_tz>(&self, reader: &mut Cursor) -> Result { @@ -330,6 +310,12 @@ impl ValueDecoder { Ok(Value::TimestampTz(t)) } + fn read_interval>(&self, reader: &mut Cursor) -> Result { + let mut buf = Vec::new(); + reader.read_quoted_text(&mut buf, b'\'')?; + Ok(Value::Interval(unsafe { String::from_utf8_unchecked(buf) })) + } + fn read_bitmap>(&self, reader: &mut Cursor) -> Result { let mut buf = Vec::new(); reader.read_quoted_text(&mut buf, b'\'')?; @@ -467,6 +453,21 @@ impl ValueDecoder { } } +fn parse_timestamp(ts_string: &str, tz: Tz) -> Result { + let naive_dt = NaiveDateTime::parse_from_str(ts_string, "%Y-%m-%d %H:%M:%S%.6f")?; + let dt_with_tz = match tz.from_local_datetime(&naive_dt) { + LocalResult::Single(dt) => dt, + LocalResult::None => { + return Err(Error::Parsing(format!( + "time {v} not exists in timezone {tz}" + ))) + } + LocalResult::Ambiguous(dt1, _dt2) => dt1, + }; + let ts = dt_with_tz.timestamp_micros(); + Ok(Value::Timestamp(ts, tz)) +} + fn parse_decimal(text: &str, size: DecimalSize) -> Result { let mut start = 0; let bytes = text.as_bytes(); From 572c5eced0c56f889007194f98e89e6733f10d80 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 18:16:49 +0800 Subject: [PATCH 5/6] fix --- sql/src/value/string_decoder.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/src/value/string_decoder.rs b/sql/src/value/string_decoder.rs index e74251f8..42901011 100644 --- a/sql/src/value/string_decoder.rs +++ b/sql/src/value/string_decoder.rs @@ -459,7 +459,7 @@ fn parse_timestamp(ts_string: &str, tz: Tz) -> Result { LocalResult::Single(dt) => dt, LocalResult::None => { return Err(Error::Parsing(format!( - "time {v} not exists in timezone {tz}" + "time {ts_string} not exists in timezone {tz}" ))) } LocalResult::Ambiguous(dt1, _dt2) => dt1, From 247a3677ede802f128eecd18af13fad4b64997a2 Mon Sep 17 00:00:00 2001 From: Yang Xiufeng Date: Fri, 14 Nov 2025 20:12:27 +0800 Subject: [PATCH 6/6] fix --- .../python/tests/blocking/steps/binding.py | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/bindings/python/tests/blocking/steps/binding.py b/bindings/python/tests/blocking/steps/binding.py index 4024a597..0957e910 100644 --- a/bindings/python/tests/blocking/steps/binding.py +++ b/bindings/python/tests/blocking/steps/binding.py @@ -154,31 +154,34 @@ def _(context): context.conn.exec(f"set timezone='{tz}'") row = context.conn.query_row("select to_datetime('2024-04-16 12:34:56.789')") exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) - assert row.values()[0] == exp, f"Tuple: {row.values()}" + assert row.values()[0] == exp, f"datetime(session level tz): {row.values()}" context.conn.exec("set timezone='UTC'") if DB_VERSION >= (1, 2, 839): - exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) row = context.conn.query_row( f"settings(timezone='{tz}') select to_datetime('2024-04-16 12:34:56.789')" ) - assert row.values()[0] == exp, f"Tuple: {row.values()}" + assert row.values()[0] == exp, f"datetime(query level tz): {row.values()}" row = context.conn.query_row( - f"settings(timezone='{tz}') select (to_datetime('2024-04-16 12:34:56.789'), 10)" + f"settings(timezone='{tz}') select to_datetime('2024-04-16 12:34:56.789'), 10" + ) + assert row.values()[0] == exp, ( + f"datetime in Tuple: {row.values()[0]} != {exp}" ) - assert row.values()[0] == exp, f"Tuple: {row.values()}" tz_expected = timezone(timedelta(hours=6)) row = context.conn.query_row( f"settings(timezone='{tz}') select to_timestamp_tz('2024-04-16 12:34:56.789 +0600')" ) - if DB_VERSION >= (1, 2, 840): - exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) + exp = datetime(2024, 4, 16, 12, 34, 56, 789000, tzinfo=tz_expected) + exp_bug = datetime(2024, 4, 16, 18, 34, 56, 789000, tzinfo=tz_expected) + if DB_VERSION >= (1, 2, 840) and os.getenv("BODY_FORMAT") == "json": + assert row.values()[0] == exp, f"timestamp_tz: {row.values()[0]} {exp}" else: - # bug - exp = datetime(2024, 4, 16, 18, 34, 56, 789000, tzinfo=tz_expected) - assert row.values()[0] == exp, f"Tuple: {row.values()[0]} {exp}" + assert row.values()[0] == exp_bug, ( + f"timestamp_tz: {row.values()[0]} {exp_bug}" + ) @then("Select numbers should iterate all rows")