fix(cubestore): 'unsorted data' assertion with high-precision timestamps

ilya-biryukov · ilya-biryukov · commit 58a8cb453953 · 2021-08-25T20:46:04.000+03:00
CubeStore used to truncate timestamps to millisecond precision when
writing to parquet, but sort the data with nanosecond precision.

This led to 'unsorted data in merge' assertions.

Ensure we truncate before we sort the data.
Increasing the storage precision is another option, but that involves
backward and forward compatibility issues and requires more planning.
So stick with the current behavior for now.

If you see 'unmerged data' assertion in the logs, you have to manually
drop the tables where this happens, e.g. by rebuilding the rollups in
CubeJS.
diff --git a/rust/cubestore-sql-tests/src/tests.rs b/rust/cubestore-sql-tests/src/tests.rs
@@ -101,6 +101,7 @@ pub fn sql_tests() -> Vec<(&'static str, TestFn)> {
         t("now", now),
         t("dump", dump),
         t("unsorted_merge_assertion", unsorted_merge_assertion),
+        t("unsorted_data_timestamps", unsorted_data_timestamps),
     ];
 
     fn t<F>(name: &'static str, f: fn(Box<dyn SqlClient>) -> F) -> (&'static str, TestFn)
@@ -3332,6 +3333,42 @@ async fn unsorted_merge_assertion(service: Box<dyn SqlClient>) {
     assert_eq!(to_rows(&r), rows(&[(3, 2, 2), (2, 3, 2), (1, 4, 2)]));
 }
 
+async fn unsorted_data_timestamps(service: Box<dyn SqlClient>) {
+    service.exec_query("CREATE SCHEMA s").await.unwrap();
+    service
+        .exec_query("CREATE TABLE s.data(t timestamp, n string)")
+        .await
+        .unwrap();
+    service
+        .exec_query(
+            "INSERT INTO s.data(t, n) VALUES \
+            ('2020-01-01T00:00:00.000000005Z', 'a'), \
+            ('2020-01-01T00:00:00.000000001Z', 'b'), \
+            ('2020-01-01T00:00:00.000000002Z', 'c')",
+        )
+        .await
+        .unwrap();
+
+    // CubeStore currently truncs timestamps to millisecond precision.
+    // This checks we sort trunced precisions on inserts. We rely on implementation details of
+    // CubeStore here.
+    let r = service.exec_query("SELECT t, n FROM s.data").await.unwrap();
+
+    let t = timestamp_from_string("2020-01-01T00:00:00Z").unwrap();
+    assert_eq!(to_rows(&r), rows(&[(t, "a"), (t, "b"), (t, "c")]));
+
+    // This ends up using MergeSortExec, make sure we see no assertions.
+    let r = service
+        .exec_query(
+            "SELECT t, n FROM (SELECT * FROM s.data UNION ALL SELECT * FROM s.data) data \
+        GROUP BY 1, 2 \
+        ORDER BY 1, 2",
+        )
+        .await
+        .unwrap();
+    assert_eq!(to_rows(&r), rows(&[(t, "a"), (t, "b"), (t, "c")]));
+}
+
 async fn now(service: Box<dyn SqlClient>) {
     let r = service.exec_query("SELECT now()").await.unwrap();
     assert_eq!(r.get_rows().len(), 1);
diff --git a/rust/cubestore/src/table/mod.rs b/rust/cubestore/src/table/mod.rs
@@ -29,7 +29,10 @@ pub struct TimestampValue {
 }
 
 impl TimestampValue {
-    pub fn new(unix_nano: i64) -> TimestampValue {
+    pub fn new(mut unix_nano: i64) -> TimestampValue {
+        // This is a hack to workaround a mismatch between on-disk and in-memory representations.
+        // We use millisecond precision on-disk.
+        unix_nano -= unix_nano % 1000;
         TimestampValue { unix_nano }
     }
 

Original file line number	Diff line number	Diff line change
`@@ -29,7 +29,10 @@ pub struct TimestampValue {`
`29`	`29`	`}`
`30`	`30`
`31`	`31`	`impl TimestampValue {`
`32`		`- pub fn new(unix_nano: i64) -> TimestampValue {`
	`32`	`+ pub fn new(mut unix_nano: i64) -> TimestampValue {`
	`33`	`+ // This is a hack to workaround a mismatch between on-disk and in-memory representations.`
	`34`	`+ // We use millisecond precision on-disk.`
	`35`	`+ unix_nano -= unix_nano % 1000;`
`33`	`36`	`TimestampValue { unix_nano }`
`34`	`37`	`}`
`35`	`38`