From 9f3883951dc66130bd1f9382a06f88b0d104f042 Mon Sep 17 00:00:00 2001 From: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> Date: Mon, 20 Oct 2025 20:51:53 +0400 Subject: [PATCH] feat(cubesql): Push down Tableau year-month-day IN number filter to CubeScan Signed-off-by: Alex Qyoun-ae <4062971+MazterQyou@users.noreply.github.com> --- rust/cubesql/cubesql/src/compile/mod.rs | 147 ++++++- .../src/compile/rewrite/rules/filters.rs | 364 +++++++++++++++++- 2 files changed, 497 insertions(+), 14 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/mod.rs b/rust/cubesql/cubesql/src/compile/mod.rs index 4f0c96d1776d8..efb858b8d5ebf 100644 --- a/rust/cubesql/cubesql/src/compile/mod.rs +++ b/rust/cubesql/cubesql/src/compile/mod.rs @@ -15685,8 +15685,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), dimension: "KibanaSampleDataEcommerce.order_date".to_string(), granularity: Some("year".to_string()), date_range: Some(json!(vec![ - "2019-01-01 00:00:00.000".to_string(), - "2023-12-31 23:59:59.999".to_string() + "2019-01-01T00:00:00.000Z".to_string(), + "2023-12-31T23:59:59.999Z".to_string() ])), }]), order: Some(vec![]), @@ -15744,8 +15744,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), member: Some("KibanaSampleDataEcommerce.order_date".to_string()), operator: Some("inDateRange".to_string()), values: Some(vec![ - "2019-01-01 00:00:00.000".to_string(), - "2019-03-31 23:59:59.999".to_string(), + "2019-01-01T00:00:00.000Z".to_string(), + "2019-03-31T23:59:59.999Z".to_string(), ]), or: None, and: None, @@ -15754,8 +15754,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), member: Some("KibanaSampleDataEcommerce.order_date".to_string()), operator: Some("inDateRange".to_string()), values: Some(vec![ - "2020-01-01 00:00:00.000".to_string(), - "2020-03-31 23:59:59.999".to_string(), + "2020-01-01T00:00:00.000Z".to_string(), + "2020-03-31T23:59:59.999Z".to_string(), ]), or: None, and: None, @@ -15764,8 +15764,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), member: Some("KibanaSampleDataEcommerce.order_date".to_string()), operator: Some("inDateRange".to_string()), values: Some(vec![ - "2021-01-01 00:00:00.000".to_string(), - "2021-03-31 23:59:59.999".to_string(), + "2021-01-01T00:00:00.000Z".to_string(), + "2021-03-31T23:59:59.999Z".to_string(), ]), or: None, and: None, @@ -15774,8 +15774,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), member: Some("KibanaSampleDataEcommerce.order_date".to_string()), operator: Some("inDateRange".to_string()), values: Some(vec![ - "2022-01-01 00:00:00.000".to_string(), - "2022-03-31 23:59:59.999".to_string(), + "2022-01-01T00:00:00.000Z".to_string(), + "2022-03-31T23:59:59.999Z".to_string(), ]), or: None, and: None, @@ -15784,8 +15784,8 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), member: Some("KibanaSampleDataEcommerce.order_date".to_string()), operator: Some("inDateRange".to_string()), values: Some(vec![ - "2023-01-01 00:00:00.000".to_string(), - "2023-03-31 23:59:59.999".to_string(), + "2023-01-01T00:00:00.000Z".to_string(), + "2023-03-31T23:59:59.999Z".to_string(), ]), or: None, and: None, @@ -17792,4 +17792,127 @@ LIMIT {{ limit }}{% endif %}"#.to_string(), } ) } + + #[tokio::test] + async fn test_tableau_year_month_in_number() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan( + r#" + SELECT + COUNT("KibanaSampleDataEcommerce"."count") AS "cnt:count:ok", + CAST("KibanaSampleDataEcommerce"."customer_gender" AS TEXT) AS "customer_gender" + FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce" + WHERE ( + ( + ( + CAST( + TRUNC(EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date")) + AS INTEGER) * 100 + ) + CAST( + TRUNC(EXTRACT(MONTH FROM "KibanaSampleDataEcommerce"."order_date")) + AS INTEGER + ) + ) IN (202501, 202502, 202503, 202504) + AND CAST("KibanaSampleDataEcommerce"."customer_gender" AS TEXT) = 'value' + ) + GROUP BY 2 + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string(),]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: None, + date_range: Some(json!(vec![ + "2025-01-01T00:00:00.000Z".to_string(), + "2025-04-30T23:59:59.999Z".to_string(), + ])), + }]), + order: Some(vec![]), + filters: Some(vec![V1LoadRequestQueryFilterItem { + member: Some("KibanaSampleDataEcommerce.customer_gender".to_string()), + operator: Some("equals".to_string()), + values: Some(vec!["value".to_string()]), + or: None, + and: None, + },]), + ..Default::default() + } + ) + } + + #[tokio::test] + async fn test_tableau_year_month_day_eq_number() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let logical_plan = convert_select_to_query_plan( + r#" + SELECT + COUNT("KibanaSampleDataEcommerce"."count") AS "cnt:count:ok", + CAST("KibanaSampleDataEcommerce"."customer_gender" AS TEXT) AS "customer_gender" + FROM "public"."KibanaSampleDataEcommerce" "KibanaSampleDataEcommerce" + WHERE ( + ( + ( + ( + CAST(TRUNC( + EXTRACT(YEAR FROM "KibanaSampleDataEcommerce"."order_date") + ) AS INTEGER) * 10000 + ) + ( + CAST(TRUNC( + EXTRACT(MONTH FROM "KibanaSampleDataEcommerce"."order_date") + ) AS INTEGER) * 100 + ) + ) + CAST(TRUNC( + EXTRACT(DAY FROM "KibanaSampleDataEcommerce"."order_date") + ) AS INTEGER) + ) = 20250218 + ) + GROUP BY 2 + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await + .as_logical_plan(); + + assert_eq!( + logical_plan.find_cube_scan().request, + V1LoadRequestQuery { + measures: Some(vec!["KibanaSampleDataEcommerce.count".to_string(),]), + dimensions: Some(vec![ + "KibanaSampleDataEcommerce.customer_gender".to_string(), + ]), + segments: Some(vec![]), + time_dimensions: Some(vec![V1LoadRequestQueryTimeDimension { + dimension: "KibanaSampleDataEcommerce.order_date".to_string(), + granularity: None, + date_range: Some(json!(vec![ + "2025-02-18T00:00:00.000Z".to_string(), + "2025-02-18T23:59:59.999Z".to_string(), + ])), + }]), + order: Some(vec![]), + ..Default::default() + } + ) + } } diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs index 6980b200f1a34..a56a1143f9f86 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/filters.rs @@ -37,7 +37,8 @@ use chrono::{ Numeric::{Day, Hour, Minute, Month, Second, Year}, Pad::Zero, }, - DateTime, Datelike, Days, Duration, Months, NaiveDate, NaiveDateTime, Timelike, Weekday, + DateTime, Datelike, Days, Duration, Months, NaiveDate, NaiveDateTime, TimeZone, Timelike, Utc, + Weekday, }; use cubeclient::models::V1CubeMeta; use datafusion::{ @@ -52,6 +53,7 @@ use egg::{Subst, Var}; use std::{ cmp::{max, min}, collections::HashSet, + convert::TryInto, fmt::Display, ops::{Index, IndexMut}, sync::Arc, @@ -2943,6 +2945,274 @@ impl RewriteRules for FilterRules { "?output_date_range", ), ), + // Tableau year/month: YEAR * 100 + MONTH IN (...) + transforming_rewrite( + "tableau-year-month-in-number", + filter_replacer( + inlist_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + ), + "?list", + "?negated", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + filter_replacer( + inlist_expr( + self.fun_expr( + "DateTrunc", + vec![literal_string("month"), "?date_expr".to_string()], + ), + "?new_list", + "?negated", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + self.transform_tableau_year_month_in_number("?list", "?new_list", false), + ), + // Tableau year/month: YEAR * 100 + MONTH = ... + // Rule above is reused + rewrite( + "tableau-year-month-eq-number", + filter_replacer( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + ), + "=", + "?value", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + filter_replacer( + inlist_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + ), + inlist_expr_list(vec!["?value"], self.config_obj.push_down_pull_up_split()), + "InListExprNegated:false", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + ), + // Tableau year/month/day: YEAR * 10000 + MONTH * 100 + DAY IN (...) + transforming_rewrite( + "tableau-year-month-day-in-number", + filter_replacer( + inlist_expr( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(10000), + ), + "+", + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("day"), "?date_expr".to_string()], + )], + ), + ), + "?list", + "?negated", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + filter_replacer( + inlist_expr( + self.fun_expr( + "DateTrunc", + vec![literal_string("day"), "?date_expr".to_string()], + ), + "?new_list", + "?negated", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + self.transform_tableau_year_month_in_number("?list", "?new_list", true), + ), + // Tableau year/month/day: YEAR * 10000 + MONTH * 100 + DAY = ... + // Rule above is reused + rewrite( + "tableau-year-month-day-eq-number", + filter_replacer( + binary_expr( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(10000), + ), + "+", + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("day"), "?date_expr".to_string()], + )], + ), + ), + "=", + "?value", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + filter_replacer( + inlist_expr( + binary_expr( + binary_expr( + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("year"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(10000), + ), + "+", + binary_expr( + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("month"), "?date_expr".to_string()], + )], + ), + "*", + literal_int(100), + ), + ), + "+", + self.fun_expr( + "Trunc", + vec![self.fun_expr( + "DatePart", + vec![literal_string("day"), "?date_expr".to_string()], + )], + ), + ), + inlist_expr_list(vec!["?value"], self.config_obj.push_down_pull_up_split()), + "InListExprNegated:false", + ), + "?alias_to_cube", + "?members", + "?filter_aliases", + ), + ), ]; if self.config_obj.push_down_pull_up_split() { rules.push(list_rewrite( @@ -5550,7 +5820,7 @@ impl FilterRules { dts.push(last_value); } - let format = "%Y-%m-%d %H:%M:%S%.3f"; + let format = "%Y-%m-%dT%H:%M:%S%.3fZ"; let dts = dts .into_iter() .map(|(dt, new_dt)| { @@ -5621,6 +5891,53 @@ impl FilterRules { } } + fn transform_tableau_year_month_in_number( + &self, + list_var: &'static str, + new_list_var: &'static str, + has_day: bool, + ) -> impl Fn(&mut CubeEGraph, &mut Subst) -> bool { + let list_var = var!(list_var); + let new_list_var = var!(new_list_var); + move |egraph, subst| { + let Some(list) = &egraph[subst[list_var]].data.constant_in_list else { + return false; + }; + + let mut new_values = vec![]; + for literal in list { + let Some(timestamp_nanos_opt) = Self::number_to_timestamp_nanos(literal, has_day) + else { + // One of the values cannot be converted, cancel the rule + return false; + }; + if let Some(timestamp_nanos) = timestamp_nanos_opt { + let scalar = ScalarValue::TimestampNanosecond(Some(timestamp_nanos), None); + new_values.push(scalar); + } + } + if new_values.is_empty() { + // No valid values after conversion, cancel the rule + return false; + } + + let ids = new_values + .into_iter() + .map(|literal| { + let value = egraph.add(LogicalPlanLanguage::LiteralExprValue( + LiteralExprValue(literal), + )); + egraph.add(LogicalPlanLanguage::LiteralExpr([value])) + }) + .collect::>(); + subst.insert( + new_list_var, + egraph.add(LogicalPlanLanguage::InListExprList(ids)), + ); + true + } + } + // The outer Option's purpose is to signal when the type is incorrect // or parsing couldn't interpret the value as a NativeDateTime. // The inner Option is None when the ScalarValue is None. @@ -5648,6 +5965,49 @@ impl FilterRules { Some(Some(dt)) } + // The outer Option's purpose is to signal when the type is incorrect + // or parsing couldn't interpret the value as a date. This leads to the rule + // being cancelled. + // + // The inner Option is None when the value is syntaxically correct + // but the date value is invalid. This leads to the rule skipping this value only. + fn number_to_timestamp_nanos(value: &ScalarValue, has_day: bool) -> Option> { + let ScalarValue::Int64(value) = value else { + // Only Int64 types are supported + return None; + }; + + let Some(value) = value else { + // NULL values will never match with IN + return Some(None); + }; + + // Cancel on conversion errors + let year = if has_day { + *value / 10000 + } else { + *value / 100 + } + .try_into() + .ok()?; + let month = if has_day { + (*value / 100) % 100 + } else { + *value % 100 + } + .try_into() + .ok()?; + let day = if has_day { *value % 100 } else { 1 }.try_into().ok()?; + let Some(date) = NaiveDate::from_ymd_opt(year, month, day) else { + // Date is invalid, skip this value + return Some(None); + }; + + let datetime = date.and_hms_opt(0, 0, 0)?; + let timestamp_nanos = Utc.from_utc_datetime(&datetime).timestamp_nanos_opt()?; + Some(Some(timestamp_nanos)) + } + fn naive_datetime_to_range_by_granularity( dt: NaiveDateTime, granularity: &String,