From 77d2848b7ee8ac4b81dff9fe5878551bc9998c5b Mon Sep 17 00:00:00 2001 From: Mikhail Cheshkov Date: Fri, 31 Jan 2025 12:52:39 +0200 Subject: [PATCH] feat(cubesql): Add projection flattening rule Allow to flatten projection node into internal WrappedSelect. This should allow to execute plans like Projection(Filter(...)) as a single ungrouped wrapper with push-to-Cube --- .../src/compile/rewrite/rules/wrapper/mod.rs | 1 + .../rewrite/rules/wrapper/projection.rs | 255 +++++++++++++++++- .../cubesql/src/compile/test/test_wrapper.rs | 39 +++ 3 files changed, 289 insertions(+), 6 deletions(-) diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs index e729e21217fdc..e231f97d338bc 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/mod.rs @@ -60,6 +60,7 @@ impl RewriteRules for WrapperRules { self.aggregate_merge_rules(&mut rules); self.projection_rules(&mut rules); self.projection_rules_subquery(&mut rules); + self.projection_merge_rules(&mut rules); self.limit_rules(&mut rules); self.filter_rules(&mut rules); self.filter_rules_subquery(&mut rules); diff --git a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs index 65f80e07b64b6..bfacf3094e29f 100644 --- a/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs +++ b/rust/cubesql/cubesql/src/compile/rewrite/rules/wrapper/projection.rs @@ -1,16 +1,17 @@ use crate::{ compile::rewrite::{ - cube_scan_wrapper, projection, + cube_scan_wrapper, projection, rewrite, rewriter::{CubeEGraph, CubeRewrite}, rules::wrapper::WrapperRules, subquery, transforming_rewrite, wrapped_select, wrapped_select_aggr_expr_empty_tail, wrapped_select_filter_expr_empty_tail, wrapped_select_group_expr_empty_tail, wrapped_select_having_expr_empty_tail, wrapped_select_joins_empty_tail, - wrapped_select_order_expr_empty_tail, wrapped_select_subqueries_empty_tail, - wrapped_select_window_expr_empty_tail, wrapper_pullup_replacer, wrapper_pushdown_replacer, - wrapper_replacer_context, ListType, LogicalPlanLanguage, ProjectionAlias, - WrappedSelectAlias, WrappedSelectPushToCube, WrappedSelectUngroupedScan, - WrapperReplacerContextPushToCube, WrapperReplacerContextUngroupedScan, + wrapped_select_order_expr_empty_tail, wrapped_select_projection_expr_empty_tail, + wrapped_select_subqueries_empty_tail, wrapped_select_window_expr_empty_tail, + wrapper_pullup_replacer, wrapper_pushdown_replacer, wrapper_replacer_context, ListType, + LogicalPlanLanguage, ProjectionAlias, WrappedSelectAlias, WrappedSelectPushToCube, + WrappedSelectUngroupedScan, WrapperReplacerContextPushToCube, + WrapperReplacerContextUngroupedScan, }, copy_flag, var, var_iter, }; @@ -329,6 +330,248 @@ impl WrapperRules { ), )]); } + + pub fn projection_merge_rules(&self, rules: &mut Vec) { + rules.extend(vec![rewrite( + "wrapper-merge-projection-with-inner-wrapped-select", + // Input is not a finished wrapper_pullup_replacer, but WrappedSelect just before pullup + // After pullup replacer would disable push to cube, because any node on top would have WrappedSelect in `from` + // So there would be no CubeScan to push to + // Instead, this rule tries to catch `from` before pulling up, and merge outer Filter into inner WrappedSelect + projection( + "?projection_expr", + cube_scan_wrapper( + wrapped_select( + "WrappedSelectSelectType:Projection", + wrapper_pullup_replacer( + wrapped_select_projection_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + wrapped_select_subqueries_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + wrapped_select_group_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + wrapped_select_aggr_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + wrapped_select_window_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + "?inner_from", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + "?inner_joins", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapper_pullup_replacer( + "?inner_filter", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + wrapped_select_having_expr_empty_tail(), + // Inner must not have limit and offset, because they are not commutative with aggregation + "WrappedSelectLimit:None", + "WrappedSelectOffset:None", + wrapper_pullup_replacer( + wrapped_select_order_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:false", + "?cube_members", + "?grouped_subqueries", + "?ungrouped_scan", + ), + ), + "WrappedSelectAlias:None", + "WrappedSelectDistinct:false", + "WrappedSelectPushToCube:true", + "WrappedSelectUngroupedScan:true", + ), + "CubeScanWrapperFinalized:false", + ), + // TODO support merging projection with aliases + "ProjectionAlias:None", + "ProjectionSplit:false", + ), + cube_scan_wrapper( + wrapped_select( + "WrappedSelectSelectType:Projection", + wrapper_pushdown_replacer( + "?projection_expr", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + wrapped_select_subqueries_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + wrapped_select_group_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + wrapped_select_aggr_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + wrapped_select_window_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + "?inner_from", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + "?inner_joins", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapper_pullup_replacer( + "?inner_filter", + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + wrapped_select_having_expr_empty_tail(), + "WrappedSelectLimit:None", + "WrappedSelectOffset:None", + wrapper_pullup_replacer( + wrapped_select_order_expr_empty_tail(), + wrapper_replacer_context( + "?alias_to_cube", + "WrapperReplacerContextPushToCube:true", + "WrapperReplacerContextInProjection:true", + "?cube_members", + "?grouped_subqueries", + "WrapperReplacerContextUngroupedScan:true", + ), + ), + "WrappedSelectAlias:None", + "WrappedSelectDistinct:false", + "WrappedSelectPushToCube:true", + "WrappedSelectUngroupedScan:true", + ), + "CubeScanWrapperFinalized:false", + ), + )]); + } + fn transform_projection( &self, expr_var: &'static str, diff --git a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs index abc8e1a8b506e..74ad0fa7b65f5 100644 --- a/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs +++ b/rust/cubesql/cubesql/src/compile/test/test_wrapper.rs @@ -1417,3 +1417,42 @@ async fn wrapper_agg_dimension_over_limit() { .sql .contains("\"ungrouped\": true")); } + +// TODO allow number measures and add test for those +/// Projection(Filter(CubeScan(ungrouped))) should have projection expressions pushed down to Cube +#[tokio::test] +async fn wrapper_projection_flatten_simple_measure() { + if !Rewriter::sql_push_down_enabled() { + return; + } + init_testing_logger(); + + let query_plan = convert_select_to_query_plan( + // language=PostgreSQL + r#" +SELECT + maxPrice +FROM + MultiTypeCube +WHERE + LOWER(CAST(dim_num0 AS TEXT)) = 'all' +; + "# + .to_string(), + DatabaseProtocol::PostgreSQL, + ) + .await; + + let physical_plan = query_plan.as_physical_plan().await.unwrap(); + println!( + "Physical plan: {}", + displayable(physical_plan.as_ref()).indent() + ); + + let request = query_plan + .as_logical_plan() + .find_cube_scan_wrapped_sql() + .request; + assert_eq!(request.measures.unwrap().len(), 1); + assert_eq!(request.dimensions.unwrap().len(), 0); +}