-
Notifications
You must be signed in to change notification settings - Fork 3.8k
[fix](variant) preserve TIMESTAMPTZ values in sparse path #63522
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,16 @@ | ||
| NULL | ||
| 0000-01-01 00:00:00 +00:00 | ||
| 0000-01-01 00:00:00.000 +00:00 | ||
| 0000-01-01 00:00:00.001 +00:00 | ||
| 0000-01-01 00:00:00.123 +00:00 | ||
| 0000-01-01 00:00:00.999 +00:00 | ||
| 2023-08-08 20:20:20 +00:00 | ||
| 2023-08-08 20:20:20.000 +00:00 | ||
| 2023-08-08 20:20:20.001 +00:00 | ||
| 2023-08-08 20:20:20.123 +00:00 | ||
| 2023-08-08 20:20:20.999 +00:00 | ||
| 9999-12-31 23:59:59 +08:00 | ||
| 9999-12-31 23:59:59.000 +08:00 | ||
| 9999-12-31 23:59:59.001 +08:00 | ||
| 9999-12-31 23:59:59.123 +08:00 | ||
| 9999-12-31 23:59:59.999 +08:00 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,7 @@ | ||
| -- This file is automatically generated. You should know what you did if you want to edit this | ||
| -- !select_all -- | ||
| 1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00 2003-12-21 07:29:00.000000+08:00 | ||
|
|
||
| -- !select_cast_ts0 -- | ||
| 1 2003-12-21 10:29:00+08:00 2003-12-21 10:29:00.000+08:00 2003-12-21 15:29:00.000000+08:00 | ||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,127 @@ | ||
| // Licensed to the Apache Software Foundation (ASF) under one | ||
| // or more contributor license agreements. See the NOTICE file | ||
| // distributed with this work for additional information | ||
| // regarding copyright ownership. The ASF licenses this file | ||
| // to you under the Apache License, Version 2.0 (the | ||
| // "License"); you may not use this file except in compliance | ||
| // with the License. You may obtain a copy of the License at | ||
| // | ||
| // http://www.apache.org/licenses/LICENSE-2.0 | ||
| // | ||
| // Unless required by applicable law or agreed to in writing, | ||
| // software distributed under the License is distributed on an | ||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
| // KIND, either express or implied. See the License for the | ||
| // specific language governing permissions and limitations | ||
| // under the License. | ||
|
|
||
| // Reproduces DORIS-25915: | ||
| // When VARIANT has typed paths that exceed variant_max_subcolumns_count and | ||
| // variant_enable_typed_paths_to_sparse=true, timestamptz values that fall to | ||
| // the sparse path are read back as only the timezone suffix (e.g. "+00:00") | ||
| // instead of the full timestamp. | ||
| suite("test_variant_timestamptz_sparse", "p0"){ | ||
| sql " set time_zone = '+08:00' " | ||
|
|
||
| def tableName = "test_variant_timestamptz_sparse_repro" | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This new regression test does not follow the Doris regression-test rules in |
||
| sql "DROP TABLE IF EXISTS ${tableName}" | ||
|
|
||
| sql """ | ||
| CREATE TABLE ${tableName} ( | ||
| pk int, | ||
| var VARIANT< | ||
| 'c1':bigint, | ||
| 'c2':bigint, | ||
| 'd1':date, | ||
| 'd2':date, | ||
| 'dt0':datetime(0), | ||
| 'dt0n':datetime(0), | ||
| 'dt3':datetime(3), | ||
| 'dt3n':datetime(3), | ||
| 'dt6':datetime(6), | ||
| 'dt6n':datetime(6), | ||
| 'ts0':timestamptz(0), | ||
| 'ts0n':timestamptz(0), | ||
| 'ts3':timestamptz(3), | ||
| 'ts3n':timestamptz(3), | ||
| 'ts6':timestamptz(6), | ||
| 'ts6n':timestamptz(6), | ||
| 'v1':string, | ||
| 'v2':string, | ||
| properties("variant_max_subcolumns_count" = "2","variant_enable_typed_paths_to_sparse" = "true") | ||
| > NULL, | ||
| INDEX c1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c1"), | ||
| INDEX c2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "c2"), | ||
| INDEX d1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d1"), | ||
| INDEX d2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "d2"), | ||
| INDEX dt0 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0"), | ||
| INDEX dt0n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt0n"), | ||
| INDEX dt3 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3"), | ||
| INDEX dt3n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt3n"), | ||
| INDEX dt6 (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6"), | ||
| INDEX dt6n (`var`) USING INVERTED PROPERTIES("field_pattern" = "dt6n"), | ||
| INDEX v1 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v1"), | ||
| INDEX v2 (`var`) USING INVERTED PROPERTIES("field_pattern" = "v2") | ||
| ) ENGINE=OLAP | ||
| DUPLICATE KEY(pk) | ||
| DISTRIBUTED BY HASH(pk) BUCKETS 1 | ||
| PROPERTIES("replication_num" = "1"); | ||
| """ | ||
|
|
||
| sql """ | ||
| INSERT INTO ${tableName} VALUES | ||
| (1, '{"c1":1699516324,"c2":1690122421,"d1":"2024-02-25","d2":"2024-11-27","dt0":"2002-12-25 12:42:19","dt0n":"2015-02-05 02:36:13","dt3":"2015-02-22 02:09:01","dt3n":"2015-09-16 02:55:07","dt6":"2001-09-19 09:53:52","dt6n":"2003-12-21 02:29:00","ts0":"2003-12-21 02:29:00 +00:00","ts0n":"2003-12-21 02:29:00 +00:00","ts3":"2003-12-21 02:29:00 +00:00","ts3n":"2003-12-21 02:29:00 -05:00","ts6":"2003-12-21 02:29:00 -05:00","ts6n":"2003-12-21 02:29:00 +03:00","v1":"2024-12-10 10:16:19","v2":"2023-12-31 23:59:59"}'); | ||
| """ | ||
|
|
||
| sql "SYNC" | ||
|
|
||
| qt_select_all """ | ||
| SELECT | ||
| pk, | ||
| CAST(var['ts0'] AS string) AS str_ts0, | ||
| CAST(var['ts0n'] AS string) AS str_ts0n, | ||
| CAST(var['ts3'] AS string) AS str_ts3, | ||
| CAST(var['ts3n'] AS string) AS str_ts3n, | ||
| CAST(var['ts6'] AS string) AS str_ts6, | ||
| CAST(var['ts6n'] AS string) AS str_ts6n | ||
| FROM ${tableName} | ||
| ORDER BY pk; | ||
| """ | ||
|
|
||
| qt_select_cast_ts0 """ | ||
| SELECT | ||
| pk, | ||
| CAST(var['ts0'] AS timestamptz(0)) AS cast_ts0, | ||
| CAST(var['ts3'] AS timestamptz(3)) AS cast_ts3, | ||
| CAST(var['ts6'] AS timestamptz(6)) AS cast_ts6 | ||
| FROM ${tableName} | ||
| ORDER BY pk; | ||
| """ | ||
|
|
||
| // Explicit assertion that reproduces the bug. | ||
| // When the bug is present, str_ts0 == "+00:00" (only the timezone), and this | ||
| // assertion fails. When the fix lands, the value contains the full timestamp | ||
| // "2003-12-21 ..." and the assertion passes. | ||
| def rows = sql """ | ||
| SELECT | ||
| CAST(var['ts0'] AS string) AS str_ts0, | ||
| CAST(var['ts3n'] AS string) AS str_ts3n, | ||
| CAST(var['ts6'] AS string) AS str_ts6 | ||
| FROM ${tableName} | ||
| WHERE pk = 1; | ||
| """ | ||
| logger.info("DORIS-25915 repro rows: ${rows}") | ||
|
|
||
| def str_ts0 = rows[0][0].toString() | ||
| def str_ts3n = rows[0][1].toString() | ||
| def str_ts6 = rows[0][2].toString() | ||
|
|
||
| assertTrue(str_ts0.contains("2003-12-21"), | ||
| "DORIS-25915 bug present: var['ts0'] in sparse path returned '${str_ts0}', expected to contain '2003-12-21'") | ||
| assertTrue(str_ts3n.contains("2003-12-21"), | ||
| "DORIS-25915 bug present: var['ts3n'] in sparse path returned '${str_ts3n}', expected to contain '2003-12-21'") | ||
| assertTrue(str_ts6.contains("2003-12-21"), | ||
| "DORIS-25915 bug present: var['ts6'] in sparse path returned '${str_ts6}', expected to contain '2003-12-21'") | ||
|
|
||
| sql "DROP TABLE IF EXISTS ${tableName}" | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This fixes the encoding for newly written sparse TIMESTAMPTZ values, but it also changes a persisted sparse-column value layout from the old
[type][8-byte value]bytes to[type][scale][8-byte value]without any reader compatibility for rowsets already written by the old code. The variant sparse path stores these bytes inColumnString(ColumnVariant::serialize_to_binary_column/SparseColumnMergeIterator::_serialize_nullable_column_to_sparse), andColumnVariant::deserialize_from_binary_columnlater callsDataTypeSerDe::deserialize_binary_to_fieldfollowed byCHECK_EQ(end - start_data, data_ref.size). For an existing 9-byte TIMESTAMPTZ value, the current reader consumes a scale byte plus 8 value bytes (10 bytes total), so after this change old persisted sparse values can read past the StringRef and/or trip the size check during upgrade. Please add a compatibility read path for the legacy 9-byte TIMESTAMPTZ encoding, and add a test that deserializes those old bytes.