From 1fc0c5d22d81b0d92a30a3922c9886c012a0e784 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Tue, 28 Oct 2025 16:36:43 +0100 Subject: [PATCH 1/5] feat(tesseract): Bucketing --- .../src/compiler/CubeValidator.ts | 1 - .../integration/postgres/bucketing.test.ts | 160 ++++++++++++++++++ .../integration/postgres/calc-groups.test.ts | 2 +- .../test/integration/utils/BaseDbRunner.ts | 1 + .../src/cube_bridge/dimension_definition.rs | 2 + .../src/logical_plan/multistage/dimension.rs | 11 +- .../src/physical_plan_builder/builder.rs | 25 ++- .../processors/logical_join.rs | 11 +- .../physical_plan_builder/processors/query.rs | 2 + .../cubesqlplanner/src/plan/builder/join.rs | 1 + .../cubesqlplanner/src/plan/expression.rs | 25 ++- .../cubesqlplanner/src/plan/join.rs | 7 + .../multi_stage/multi_stage_query_planner.rs | 20 ++- .../symbols/common/static_filter.rs | 5 +- .../sql_evaluator/symbols/dimension_symbol.rs | 29 ++++ 15 files changed, 290 insertions(+), 12 deletions(-) create mode 100644 packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts diff --git a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts index 4171514dafd93..a5887e8356c9f 100644 --- a/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts +++ b/packages/cubejs-schema-compiler/src/compiler/CubeValidator.ts @@ -715,7 +715,6 @@ const DimensionsSchema = Joi.object().pattern(identifierRegex, Joi.alternatives( }), inherit(BaseDimension, { multiStage: Joi.boolean().valid(true), - type: Joi.any().valid('number').required(), sql: Joi.func().required(), addGroupBy: Joi.func(), }), diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts new file mode 100644 index 0000000000000..9deb93527dead --- /dev/null +++ b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts @@ -0,0 +1,160 @@ +import { + getEnv, +} from '@cubejs-backend/shared'; +import { prepareYamlCompiler } from '../../unit/PrepareCompiler'; +import { dbRunner } from './PostgresDBRunner'; + +describe('Multi-Stage Bucketing', () => { + jest.setTimeout(200000); + + const { compiler, joinGraph, cubeEvaluator } = prepareYamlCompiler(` +cubes: + - name: orders + sql: > + SELECT 1 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1000 AS revenue UNION ALL + SELECT 2 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1100 AS revenue UNION ALL + SELECT 3 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1300 AS revenue UNION ALL + SELECT 4 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1400 AS revenue UNION ALL + SELECT 5 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1600 AS revenue UNION ALL + SELECT 6 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 1 AS customerId, 1700 AS revenue UNION ALL + + SELECT 7 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2000 AS revenue UNION ALL + SELECT 8 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2100 AS revenue UNION ALL + SELECT 9 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2300 AS revenue UNION ALL + SELECT 10 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2500 AS revenue UNION ALL + SELECT 11 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2700 AS revenue UNION ALL + SELECT 12 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 2 AS customerId, 2900 AS revenue UNION ALL + + SELECT 13 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 3000 AS revenue UNION ALL + SELECT 14 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2800 AS revenue UNION ALL + SELECT 15 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2500 AS revenue UNION ALL + SELECT 16 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2300 AS revenue UNION ALL + SELECT 17 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 2100 AS revenue UNION ALL + SELECT 18 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 3 AS customerId, 1900 AS revenue UNION ALL + + SELECT 19 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 4000 AS revenue UNION ALL + SELECT 20 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 4200 AS revenue UNION ALL + SELECT 21 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3900 AS revenue UNION ALL + SELECT 22 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3700 AS revenue UNION ALL + SELECT 23 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3400 AS revenue UNION ALL + SELECT 24 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 4 AS customerId, 3200 AS revenue UNION ALL + + SELECT 25 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 1500 AS revenue UNION ALL + SELECT 26 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 1700 AS revenue UNION ALL + SELECT 27 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2000 AS revenue UNION ALL + SELECT 28 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2200 AS revenue UNION ALL + SELECT 29 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2500 AS revenue UNION ALL + SELECT 30 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 5 AS customerId, 2700 AS revenue UNION ALL + + SELECT 31 AS id, '2023-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4500 AS revenue UNION ALL + SELECT 32 AS id, '2023-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4300 AS revenue UNION ALL + SELECT 33 AS id, '2024-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 4100 AS revenue UNION ALL + SELECT 34 AS id, '2024-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3900 AS revenue UNION ALL + SELECT 35 AS id, '2025-03-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3700 AS revenue UNION ALL + SELECT 36 AS id, '2025-09-01T00:00:00Z'::timestamptz AS createdAt, 6 AS customerId, 3500 AS revenue + + dimensions: + - name: id + sql: ID + type: number + primary_key: true + + - name: customerId + sql: customerId + type: number + + - name: createdAt + sql: createdAt + type: time + + - name: changeType + sql: "CONCAT('Revenue is ', {revenueChangeType})" + multi_stage: true + type: string + add_group_by: [orders.customerId] + + + measures: + - name: count + type: count + + - name: revenue + sql: revenue + type: sum + + - name: revenueYearAgo + sql: "{revenue}" + multi_stage: true + type: number + time_shift: + - time_dimension: orders.createdAt + interval: 1 year + type: prior + + - name: revenueChangeType + sql: > + CASE + WHEN {revenueYearAgo} IS NULL THEN 'New' + WHEN {revenue} > {revenueYearAgo} THEN 'Grow' + ELSE 'Down' + END + type: string + + + + `); + + if (getEnv('nativeSqlPlanner')) { + it('bucketing', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeType'], + measures: ['orders.count', 'orders.revenue'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.customerId' + }, { id: 'orders.createdAt' }], + }, [ + { + orders__cagr_1_y: null, + orders__date_year: '2022-01-01T00:00:00.000Z', + orders__revenue: '5', + orders__revenue_1_y_ago: null, + }, + { + orders__date_year: '2023-01-01T00:00:00.000Z', + orders__revenue: '15', + orders__revenue_1_y_ago: '5', + orders__cagr_1_y: '2.0000000000000000' + }, + { + orders__date_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '30', + orders__revenue_1_y_ago: '15', + orders__cagr_1_y: '1.0000000000000000' + }, + { + orders__date_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '5', + orders__revenue_1_y_ago: '30', + orders__cagr_1_y: '-0.83333333333333333333' + }, + + { + orders__cagr_1_y: null, + orders__date_year: '2026-01-01T00:00:00.000Z', + orders__revenue: null, + orders__revenue_1_y_ago: '5', + }, + ], + { joinGraph, cubeEvaluator, compiler })); + } else { + // This test is working only in tesseract + test.skip('multi stage over sub query', () => { expect(1).toBe(1); }); + } +}); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts index 5b6410a8f08e7..fb7acf70c5bda 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts @@ -1317,7 +1317,7 @@ views: ], { joinGraph, cubeEvaluator, compiler }); }); - it('source full switch', async () => { + it('source full switch 1', async () => { await dbRunner.runQueryTest({ dimensions: ['source.currency', 'source.product_category'], measures: ['source.price'], diff --git a/packages/cubejs-schema-compiler/test/integration/utils/BaseDbRunner.ts b/packages/cubejs-schema-compiler/test/integration/utils/BaseDbRunner.ts index 80f3259ef4bbb..ef1c51b33e59d 100644 --- a/packages/cubejs-schema-compiler/test/integration/utils/BaseDbRunner.ts +++ b/packages/cubejs-schema-compiler/test/integration/utils/BaseDbRunner.ts @@ -30,6 +30,7 @@ export class BaseDbRunner { const res = await this.testQuery(sqlAndParams); console.log(JSON.stringify(res)); + console.log('!!! res', res); expect(res).toEqual( expectedResult diff --git a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/dimension_definition.rs b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/dimension_definition.rs index 7706f95455705..80967faa7c992 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/dimension_definition.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/cube_bridge/dimension_definition.rs @@ -21,6 +21,8 @@ pub struct DimensionDefinitionStatic { pub owned_by_cube: Option, #[serde(rename = "multiStage")] pub multi_stage: Option, + #[serde(rename = "addGroupByReferences")] + pub add_group_by_references: Option>, #[serde(rename = "subQuery")] pub sub_query: Option, #[serde(rename = "propagateFiltersToSubQuery")] diff --git a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs index f37f2ce336541..489f754b5a81d 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/multistage/dimension.rs @@ -3,6 +3,7 @@ use crate::planner::query_properties::OrderByItem; use crate::planner::sql_evaluator::collectors::has_multi_stage_members; use crate::planner::sql_evaluator::MemberSymbol; use cubenativeutils::CubeError; +use itertools::Itertools; use std::rc::Rc; use typed_builder::TypedBuilder; @@ -44,12 +45,20 @@ impl MultiStageDimensionCalculation { } pub fn join_dimensions(&self) -> Result>, CubeError> { - let mut result = vec![]; + let mut result = if let Ok(dimension) = self.multi_stage_dimension.as_dimension() { + dimension.add_group_by().clone().unwrap_or_default() + } else { + vec![] + }; for dim in self.schema.all_dimensions() { if !has_multi_stage_members(dim, true)? { result.push(dim.clone()); } } + let result = result + .into_iter() + .unique_by(|d| d.full_name()) + .collect_vec(); Ok(result) } } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs index ae0aa72f637c9..cfa31f1f7fa4e 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/builder.rs @@ -11,6 +11,7 @@ use crate::planner::sql_evaluator::sql_nodes::SqlNodesFactory; use crate::planner::sql_evaluator::MemberSymbol; use crate::planner::sql_evaluator::ReferencesBuilder; use crate::planner::sql_templates::PlanSqlTemplates; +use crate::planner::VisitorContext; use cubenativeutils::CubeError; use itertools::Itertools; use std::collections::HashMap; @@ -153,7 +154,10 @@ impl PhysicalPlanBuilder { &self, dimension_schema: &Rc, join_builder: &mut JoinBuilder, + context: &PushDownBuilderContext, ) -> Result<(), CubeError> { + let original_join = join_builder.clone().build(); + let references_builder = ReferencesBuilder::new(From::new_from_join(original_join)); let conditions = dimension_schema .join_dimensions .iter() @@ -164,7 +168,26 @@ impl PhysicalPlanBuilder { alias_in_cte, )); - Ok(vec![(sub_query_ref, Expr::new_member(dim.clone()))]) + if let Ok(dimension) = dim.as_dimension() { + if dimension.is_calc_group() { + return Ok(vec![(sub_query_ref, Expr::new_member(dim.clone()))]); + } + } + + let mut context_factory = context.make_sql_nodes_factory()?; + references_builder.resolve_references_for_member( + dim.clone(), + &None, + context_factory.render_references_mut(), + )?; + + let visitor_context = + VisitorContext::new(self.query_tools.clone(), &context_factory, None); + + Ok(vec![( + sub_query_ref, + Expr::new_member_with_context(dim.clone(), Rc::new(visitor_context)), + )]) }) .collect::, _>>()?; diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs index 70700895d7eb5..86968c75895c7 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs @@ -36,7 +36,9 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> { } let root = logical_join.root().clone().unwrap().cube().clone(); - if logical_join.joins().is_empty() && logical_join.dimension_subqueries().is_empty() { + if logical_join.joins().is_empty() && logical_join.dimension_subqueries().is_empty() + //&& multi_stage_dimension.is_none() + { Ok(From::new_from_cube( root.clone(), Some(root.default_alias_with_prefix(&context.alias_prefix)), @@ -81,8 +83,11 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> { } } if let Some(multi_stage_dimension) = &multi_stage_dimension { - self.builder - .add_multistage_dimension_join(multi_stage_dimension, &mut join_builder)?; + self.builder.add_multistage_dimension_join( + multi_stage_dimension, + &mut join_builder, + &context, + )?; } Ok(From::new_from_join(join_builder.build())) } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs index fef3535028474..a2389094f1898 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs @@ -76,9 +76,11 @@ impl<'a> LogicalNodeProcessor<'a, Query> for QueryProcessor<'a> { let from = if let QuerySource::LogicalJoin(_) = logical_plan.source() { let all_symbols = all_symbols(&logical_plan.schema(), &logical_plan.filter()); let calc_group_dims = collect_calc_group_dims_from_nodes(all_symbols.iter())?; + println!("!!! calc group len: {}", calc_group_dims.len()); let calc_groups_items = calc_group_dims.into_iter().map(|dim| { let values = get_filtered_values(&dim, &filter); + println!("!!!! values len: {}", values.len()); CalcGroupItem { symbol: dim, values, diff --git a/rust/cubesqlplanner/cubesqlplanner/src/plan/builder/join.rs b/rust/cubesqlplanner/cubesqlplanner/src/plan/builder/join.rs index 733888b9bc663..d9af5ad32968f 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/plan/builder/join.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/plan/builder/join.rs @@ -5,6 +5,7 @@ use crate::plan::{ use crate::planner::BaseCube; use std::rc::Rc; +#[derive(Clone)] pub struct JoinBuilder { root: SingleAliasedSource, joins: Vec, diff --git a/rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs b/rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs index f69bea0065801..d2b2298dbbdb6 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/plan/expression.rs @@ -8,11 +8,22 @@ use std::rc::Rc; #[derive(Clone)] pub struct MemberExpression { pub member: Rc, + pub context: Option>, } impl MemberExpression { pub fn new(member: Rc) -> Self { - Self { member } + Self { + member, + context: None, + } + } + + pub fn new_with_context(member: Rc, context: Rc) -> Self { + Self { + member, + context: Some(context), + } } pub fn to_sql( @@ -44,6 +55,9 @@ impl Expr { pub fn new_member(member: Rc) -> Self { Self::Member(MemberExpression::new(member)) } + pub fn new_member_with_context(member: Rc, context: Rc) -> Self { + Self::Member(MemberExpression::new_with_context(member, context)) + } pub fn new_reference(source: Option, reference: String) -> Self { Self::Reference(QualifiedColumnName::new(source, reference)) } @@ -54,7 +68,14 @@ impl Expr { ) -> Result { match self { Self::Null => Ok(format!("CAST(NULL as integer)")), - Self::Member(member) => member.to_sql(templates, context), + Self::Member(member) => { + let context = if let Some(self_context) = &member.context { + self_context.clone() + } else { + context + }; + member.to_sql(templates, context) + } Self::Reference(reference) => { templates.column_reference(reference.source(), &reference.name()) } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs b/rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs index 7be8764434f4c..86b2f62509090 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/plan/join.rs @@ -7,6 +7,7 @@ use lazy_static::lazy_static; use std::rc::Rc; +#[derive(Clone)] pub struct RegularRollingWindowJoinCondition { time_series_source: String, trailing_interval: Option, @@ -90,6 +91,7 @@ impl RegularRollingWindowJoinCondition { } } +#[derive(Clone)] pub struct RollingTotalJoinCondition { time_series_source: String, time_dimension: Expr, @@ -116,6 +118,8 @@ impl RollingTotalJoinCondition { Ok(result) } } + +#[derive(Clone)] pub struct ToDateRollingWindowJoinCondition { time_series_source: String, granularity: Rc, @@ -157,6 +161,7 @@ impl ToDateRollingWindowJoinCondition { } } +#[derive(Clone)] pub struct DimensionJoinCondition { // AND (... OR ...) conditions: Vec>, @@ -212,6 +217,7 @@ impl DimensionJoinCondition { } } +#[derive(Clone)] pub enum JoinCondition { DimensionJoinCondition(DimensionJoinCondition), BaseJoinCondition(Rc), @@ -285,6 +291,7 @@ impl JoinCondition { } } +#[derive(Clone)] pub struct JoinItem { pub from: SingleAliasedSource, pub on: JoinCondition, diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index 400ed822b2964..b0d53746d8d32 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -154,13 +154,18 @@ impl MultiStageQueryPlanner { is_ungrupped, ) } else { + let add_group_by = if let Ok(dimension) = base_member.as_dimension() { + dimension.add_group_by().clone().unwrap_or_default() + } else { + vec![] + }; resolved_multi_stage_dimensions .insert(base_member.clone().resolve_reference_chain().full_name()); ( MultiStageInodeMember::new( MultiStageInodeMemberType::Dimension, vec![], - vec![], + add_group_by, None, None, ), @@ -409,6 +414,17 @@ impl MultiStageQueryPlanner { resolved_multi_stage_dimensions, )?; + // Add GROUP BY to the dimension subquery itself + // if a multi-stage dimension has the `add_group_by` field. + let self_state = + if !multi_stage_member.add_group_by_symbols().is_empty() && member.is_dimension() { + let mut self_state = state.clone_state(); + self_state.add_dimensions(multi_stage_member.add_group_by_symbols().clone()); + Rc::new(self_state) + } else { + state.clone() + }; + let alias = format!("cte_{}", descriptions.len()); MultiStageQueryDescription::new( MultiStageMember::new( @@ -417,7 +433,7 @@ impl MultiStageQueryPlanner { is_ungrupped, false, ), - state.clone(), + self_state, input, alias.clone(), ) diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs index 9ca45d757cd73..e4674a27199a4 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs @@ -20,14 +20,17 @@ pub fn find_value_restriction( pub fn get_filtered_values(symbol: &Rc, filter: &Option) -> Vec { if let Ok(dim) = symbol.as_dimension() { if dim.dimension_type() == "switch" { + println!("!!!! values original: {:?}", dim.values()); if let Some(filter) = filter { if let Some(values) = find_value_restriction(&filter.items, symbol) { - return dim + let res = dim .values() .iter() .filter(|v| values.contains(v)) .cloned() .collect(); + println!("!!!! values filtered: {:?}", res); + return res; } } } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs index 2525578db0d22..c3e8850ac8e1a 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs @@ -33,6 +33,7 @@ pub struct DimensionSymbol { definition: Rc, is_reference: bool, // Symbol is a direct reference to another symbol without any calculations is_view: bool, + add_group_by: Option>>, time_shift: Vec, time_shift_pk_full_name: Option, is_self_time_shift_pk: bool, // If the dimension itself is a primary key and has time shifts, we can not reevaluate itself again while processing time shifts to avoid infinite recursion. So we raise this flag instead. @@ -56,6 +57,7 @@ impl DimensionSymbol { values: Vec, case: Option, definition: Rc, + add_group_by: Option>>, time_shift: Vec, time_shift_pk_full_name: Option, is_self_time_shift_pk: bool, @@ -75,6 +77,7 @@ impl DimensionSymbol { longitude, values, definition, + add_group_by, case, is_view, time_shift, @@ -167,6 +170,10 @@ impl DimensionSymbol { self.is_sub_query } + pub fn add_group_by(&self) -> &Option>> { + &self.add_group_by + } + pub fn dimension_type(&self) -> &String { &self.dimension_type } @@ -227,6 +234,11 @@ impl DimensionSymbol { if let Some(member_sql) = &self.longitude { member_sql.extract_symbol_deps(&mut deps); } + if let Some(add_group_by) = &self.add_group_by { + for member_sql in add_group_by { + deps.extend(member_sql.get_dependencies().into_iter()); + } + } if let Some(case) = &self.case { case.extract_symbol_deps(&mut deps); } @@ -247,6 +259,11 @@ impl DimensionSymbol { if let Some(case) = &self.case { case.extract_symbol_deps_with_path(&mut deps); } + if let Some(add_group_by) = &self.add_group_by { + for member_sql in add_group_by { + deps.extend(member_sql.get_dependencies_with_path().into_iter()); + } + } deps } @@ -476,6 +493,17 @@ impl SymbolFactory for DimensionSymbolFactory { vec![] }; + let add_group_by = + if let Some(add_group_by) = &definition.static_data().add_group_by_references { + let symbols = add_group_by + .iter() + .map(|add_group_by| compiler.add_dimension_evaluator(add_group_by.clone())) + .collect::, _>>()?; + Some(symbols) + } else { + None + }; + let is_multi_stage = definition.static_data().multi_stage.unwrap_or(false); //TODO move owned logic to rust @@ -514,6 +542,7 @@ impl SymbolFactory for DimensionSymbolFactory { values, case, definition, + add_group_by, time_shift, time_shift_pk, is_self_time_shift_pk, From 63ef27ca7937ffbc4e00735478e5dfe91d2988d7 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Tue, 28 Oct 2025 18:50:11 +0100 Subject: [PATCH 2/5] in work --- .../integration/postgres/bucketing.test.ts | 82 +++++++++++++------ .../integration/postgres/calc-groups.test.ts | 79 ------------------ .../src/logical_plan/full_key_aggregate.rs | 5 ++ .../cubesqlplanner/src/logical_plan/schema.rs | 12 +++ .../processors/logical_join.rs | 5 +- .../physical_plan_builder/processors/query.rs | 25 ++++-- .../multi_stage/member_query_planner.rs | 21 +++-- .../multi_stage/multi_stage_query_planner.rs | 23 ++---- .../planners/multi_stage/query_description.rs | 8 ++ .../sql_evaluator/symbols/common/case.rs | 14 ++++ .../symbols/common/static_filter.rs | 2 - .../sql_evaluator/symbols/dimension_symbol.rs | 4 + 12 files changed, 147 insertions(+), 133 deletions(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts index 9deb93527dead..f7b20a9b2c1e9 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts @@ -105,7 +105,7 @@ cubes: `); if (getEnv('nativeSqlPlanner')) { - it('bucketing', async () => dbRunner.runQueryTest({ + it('simple bucketing', async () => dbRunner.runQueryTest({ dimensions: ['orders.changeType'], measures: ['orders.count', 'orders.revenue'], timeDimensions: [ @@ -117,39 +117,75 @@ cubes: ], timezone: 'UTC', order: [{ - id: 'orders.customerId' + id: 'orders.changeType' }, { id: 'orders.createdAt' }], }, [ { - orders__cagr_1_y: null, - orders__date_year: '2022-01-01T00:00:00.000Z', - orders__revenue: '5', - orders__revenue_1_y_ago: null, + orders__change_type: 'Revenue is Down', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__count: '6', + orders__revenue: '20400' }, { - orders__date_year: '2023-01-01T00:00:00.000Z', - orders__revenue: '15', - orders__revenue_1_y_ago: '5', - orders__cagr_1_y: '2.0000000000000000' + orders__change_type: 'Revenue is Down', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__count: '6', + orders__revenue: '17800' }, { - orders__date_year: '2024-01-01T00:00:00.000Z', - orders__revenue: '30', - orders__revenue_1_y_ago: '15', - orders__cagr_1_y: '1.0000000000000000' + orders__change_type: 'Revenue is Grow', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__count: '6', + orders__revenue: '11700' }, { - orders__date_year: '2025-01-01T00:00:00.000Z', - orders__revenue: '5', - orders__revenue_1_y_ago: '30', - orders__cagr_1_y: '-0.83333333333333333333' - }, + orders__change_type: 'Revenue is Grow', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__count: '6', + orders__revenue: '14100' + } + ], + { joinGraph, cubeEvaluator, compiler })); + it('bucketing with multistage measure', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeType'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.changeType' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__change_type: 'Revenue is Down', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__change_type: 'Revenue is Down', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__change_type: 'Revenue is Grow', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '11700', + orders__revenue_year_ago: '9400' + }, { - orders__cagr_1_y: null, - orders__date_year: '2026-01-01T00:00:00.000Z', - orders__revenue: null, - orders__revenue_1_y_ago: '5', + orders__change_type: 'Revenue is Grow', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' }, ], { joinGraph, cubeEvaluator, compiler })); diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts index fb7acf70c5bda..7829952e989ab 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts @@ -1198,85 +1198,6 @@ views: { joinGraph, cubeEvaluator, compiler }); }); - it('source product_category_ext and created_at cross join', async () => { - await dbRunner.runQueryTest({ - dimensions: ['source.product_category_ext'], - timeDimensions: [ - { - dimension: 'source.created_at', - granularity: 'month' - } - ], - timezone: 'UTC', - order: [ - { - id: 'source.created_at' - }, - { - id: 'source.product_category_ext' - } - ], - }, [ - { - source__product_category_ext: 'some category-EUR-EUR', - source__created_at_month: '2022-01-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category-USD-USD', - source__created_at_month: '2022-01-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category A-EUR-EUR', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category A-USD-USD', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-EUR-EUR', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-USD-USD', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category-EUR-EUR', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category-USD-USD', - source__created_at_month: '2022-02-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category A-EUR-EUR', - source__created_at_month: '2022-03-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category A-USD-USD', - source__created_at_month: '2022-03-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-EUR-EUR', - source__created_at_month: '2022-03-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-USD-USD', - source__created_at_month: '2022-03-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-EUR-EUR', - source__created_at_month: '2022-04-01T00:00:00.000Z' - }, - { - source__product_category_ext: 'some category B-USD-USD', - source__created_at_month: '2022-04-01T00:00:00.000Z' - } - ], - { joinGraph, cubeEvaluator, compiler }); - }); - it('source product_category_ext filter', async () => { await dbRunner.runQueryTest({ dimensions: ['source.product_category'], diff --git a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs index b304388a96ab5..fc03c78f9fce5 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/full_key_aggregate.rs @@ -9,6 +9,7 @@ pub struct MultiStageSubqueryRef { name: String, #[builder(default)] symbols: Vec>, + schema: Rc, } impl MultiStageSubqueryRef { @@ -19,6 +20,10 @@ impl MultiStageSubqueryRef { pub fn symbols(&self) -> &Vec> { &self.symbols } + + pub fn schema(&self) -> &Rc { + &self.schema + } } impl PrettyPrint for MultiStageSubqueryRef { diff --git a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/schema.rs b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/schema.rs index 7964642749069..36061a28f36a2 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/schema.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/logical_plan/schema.rs @@ -1,6 +1,8 @@ +use cubenativeutils::CubeError; use itertools::Itertools; use super::pretty_print::*; +use crate::planner::sql_evaluator::collectors::has_multi_stage_members; use crate::planner::sql_evaluator::MemberSymbol; use std::collections::HashSet; use std::fmt; @@ -82,6 +84,16 @@ impl LogicalSchema { pub fn has_dimensions(&self) -> bool { !self.time_dimensions.is_empty() || !self.dimensions.is_empty() } + + pub fn multi_stage_dimensions(&self) -> Result>, CubeError> { + let mut result = vec![]; + for member in self.all_dimensions() { + if has_multi_stage_members(member, true)? { + result.push(member.clone()) + } + } + Ok(result) + } } impl PrettyPrint for LogicalSchema { diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs index 86968c75895c7..7f29a162359f3 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/logical_join.rs @@ -36,8 +36,9 @@ impl<'a> LogicalNodeProcessor<'a, LogicalJoin> for LogicalJoinProcessor<'a> { } let root = logical_join.root().clone().unwrap().cube().clone(); - if logical_join.joins().is_empty() && logical_join.dimension_subqueries().is_empty() - //&& multi_stage_dimension.is_none() + if logical_join.joins().is_empty() + && logical_join.dimension_subqueries().is_empty() + && multi_stage_dimension.is_none() { Ok(From::new_from_cube( root.clone(), diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs index a2389094f1898..107b879d89bce 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs @@ -1,5 +1,7 @@ use super::super::{LogicalNodeProcessor, ProcessableNode, PushDownBuilderContext}; -use crate::logical_plan::{all_symbols, MultiStageMemberLogicalType, Query, QuerySource}; +use crate::logical_plan::{ + all_symbols, pretty_print, pretty_print_rc, MultiStageMemberLogicalType, Query, QuerySource, +}; use crate::physical_plan_builder::PhysicalPlanBuilder; use crate::plan::{ CalcGroupItem, CalcGroupsJoin, Cte, Expr, From, MemberExpression, Select, SelectBuilder, @@ -62,8 +64,23 @@ impl<'a> LogicalNodeProcessor<'a, Query> for QueryProcessor<'a> { } context.remove_multi_stage_dimensions(); - for member in logical_plan.schema().all_dimensions() { - if has_multi_stage_members(member, true)? { + + //FIXME This is hack but good solution require refactor + let resolved_multistage_dimension = + if let QuerySource::FullKeyAggregate(fk_source) = logical_plan.source() { + if let Some(first_cte_ref) = fk_source.multi_stage_subquery_refs().first() { + first_cte_ref.schema().multi_stage_dimensions()? + } else { + vec![] + } + } else { + vec![] + }; + for member in logical_plan.schema().multi_stage_dimensions()? { + if resolved_multistage_dimension + .iter() + .all(|d| d.full_name() != member.full_name()) + { context.add_multi_stage_dimension(member.full_name()); } } @@ -76,11 +93,9 @@ impl<'a> LogicalNodeProcessor<'a, Query> for QueryProcessor<'a> { let from = if let QuerySource::LogicalJoin(_) = logical_plan.source() { let all_symbols = all_symbols(&logical_plan.schema(), &logical_plan.filter()); let calc_group_dims = collect_calc_group_dims_from_nodes(all_symbols.iter())?; - println!("!!! calc group len: {}", calc_group_dims.len()); let calc_groups_items = calc_group_dims.into_iter().map(|dim| { let values = get_filtered_values(&dim, &filter); - println!("!!!! values len: {}", values.len()); CalcGroupItem { symbol: dim, values, diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs index 20a74fe5720d4..3722bf9b13376 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/member_query_planner.rs @@ -169,10 +169,12 @@ impl MultiStageMemberQueryPlanner { time_series_input: MultiStageSubqueryRef::builder() .name(inputs[0].0.clone()) .symbols(inputs[0].1.clone()) + .schema(inputs[0].2.clone()) .build(), measure_input: MultiStageSubqueryRef::builder() .name(inputs[1].0.clone()) .symbols(inputs[1].1.clone()) + .schema(inputs[1].2.clone()) .build(), rolling_time_dimension: rolling_window_desc.time_dimension.clone(), time_dimension_in_measure_input: rolling_window_desc.base_time_dimension.clone(), @@ -229,11 +231,12 @@ impl MultiStageMemberQueryPlanner { let input_sources = self .input_cte_aliases() .into_iter() - .map(|(name, symbols)| { + .map(|(name, symbols, schema)| { Rc::new( MultiStageSubqueryRef::builder() .name(name.clone()) .symbols(symbols.clone()) + .schema(schema) .build(), ) }) @@ -301,11 +304,12 @@ impl MultiStageMemberQueryPlanner { let input_sources = self .input_cte_aliases() .into_iter() - .map(|(name, symbols)| { + .map(|(name, symbols, schema)| { Rc::new( MultiStageSubqueryRef::builder() .name(name.clone()) .symbols(symbols.clone()) + .schema(schema) .build(), ) }) @@ -430,12 +434,19 @@ impl MultiStageMemberQueryPlanner { .into_rc() } - fn input_cte_aliases(&self) -> Vec<(String, Vec>)> { + fn input_cte_aliases(&self) -> Vec<(String, Vec>, Rc)> { self.description .input() .iter() - .map(|d| (d.alias().clone(), vec![d.member_node().clone()])) - .unique_by(|(a, _)| a.clone()) + .map(|d| { + let schema = LogicalSchema::default() + .set_time_dimensions(d.state().time_dimensions().clone()) + .set_dimensions(d.state().dimensions().clone()) + .set_measures(vec![d.member_node().clone()]) + .into_rc(); + (d.alias().clone(), vec![d.member_node().clone()], schema) + }) + .unique_by(|(a, _, _)| a.clone()) .collect_vec() } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs index b0d53746d8d32..00c0e3a05fecf 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/multi_stage_query_planner.rs @@ -83,11 +83,12 @@ impl MultiStageQueryPlanner { &mut resolved_multi_stage_dimensions, )?; if !description.is_multi_stage_dimension() { - let result = ( - description.alias().clone(), - vec![description.member_node().clone()], - ); - top_level_ctes.push(result) + let result = MultiStageSubqueryRef::builder() + .name(description.alias().clone()) + .symbols(vec![description.member_node().clone()]) + .schema(description.schema().clone()) + .build(); + top_level_ctes.push(Rc::new(result)) } } @@ -104,18 +105,6 @@ impl MultiStageQueryPlanner { }) .collect::, _>>()?; - let top_level_ctes = top_level_ctes - .iter() - .map(|(alias, symbols)| { - Rc::new( - MultiStageSubqueryRef::builder() - .name(alias.clone()) - .symbols(symbols.clone()) - .build(), - ) - }) - .collect_vec(); - Ok((all_queries, top_level_ctes)) } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs index afdb4d687e95c..d6db16f4f3aa7 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/planners/multi_stage/query_description.rs @@ -1,4 +1,5 @@ use super::{MultiStageAppliedState, MultiStageMember}; +use crate::logical_plan::LogicalSchema; use crate::planner::sql_evaluator::MemberSymbol; use std::fmt::Debug; use std::rc::Rc; @@ -39,6 +40,13 @@ impl MultiStageQueryDescription { }) } + pub fn schema(&self) -> Rc { + LogicalSchema::default() + .set_time_dimensions(self.state.time_dimensions().clone()) + .set_dimensions(self.state.dimensions().clone()) + .set_measures(vec![self.member_node().clone()]) + .into_rc() + } pub fn member_node(&self) -> &Rc { &self.member.evaluation_node() } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/case.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/case.rs index 0a924b7cc7d7e..341e630b56ca7 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/case.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/case.rs @@ -157,6 +157,14 @@ impl CaseSwitchDefinition { res.remove_unreachable_branches(); Ok(res) } + + pub fn is_single_value(&self) -> bool { + let mut values_len = self.items.len(); + if self.else_sql.is_some() { + values_len += 1; + } + values_len == 1 + } fn extract_symbol_deps(&self, result: &mut Vec>) { self.switch.extract_symbol_deps(result); for itm in self.items.iter() { @@ -357,4 +365,10 @@ impl Case { }; Ok(res) } + pub fn is_single_value(&self) -> bool { + match self { + Case::Case(_) => false, + Case::CaseSwitch(case) => case.is_single_value(), + } + } } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs index e4674a27199a4..5c8ddac81c12e 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/common/static_filter.rs @@ -20,7 +20,6 @@ pub fn find_value_restriction( pub fn get_filtered_values(symbol: &Rc, filter: &Option) -> Vec { if let Ok(dim) = symbol.as_dimension() { if dim.dimension_type() == "switch" { - println!("!!!! values original: {:?}", dim.values()); if let Some(filter) = filter { if let Some(values) = find_value_restriction(&filter.items, symbol) { let res = dim @@ -29,7 +28,6 @@ pub fn get_filtered_values(symbol: &Rc, filter: &Option) - .filter(|v| values.contains(v)) .cloned() .collect(); - println!("!!!! values filtered: {:?}", res); return res; } } diff --git a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs index c3e8850ac8e1a..3501c49c1f781 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/planner/sql_evaluator/symbols/dimension_symbol.rs @@ -122,6 +122,10 @@ impl DimensionSymbol { pub(super) fn replace_case(&self, new_case: Case) -> Rc { let mut new = self.clone(); + if new_case.is_single_value() { + //FIXME - Hack: we don’t treat a single-element case as a multi-stage dimension + new.is_multi_stage = false; + } new.case = Some(new_case); Rc::new(new) } From e180bbc2300cd2d91f3f57abf88f8ef18524233f Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Tue, 28 Oct 2025 18:56:06 +0100 Subject: [PATCH 3/5] in work --- .../integration/postgres/bucketing.test.ts | 53 +++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts index f7b20a9b2c1e9..b2928e7ad2e98 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/bucketing.test.ts @@ -73,6 +73,17 @@ cubes: type: string add_group_by: [orders.customerId] + - name: changeTypeComplex + sql: > + CASE + WHEN {revenueYearAgo} IS NULL THEN 'New' + WHEN {revenue} > {revenueYearAgo} THEN 'Grow' + ELSE 'Down' + END + multi_stage: true + type: string + add_group_by: [orders.customerId] + measures: - name: count @@ -189,6 +200,48 @@ cubes: }, ], { joinGraph, cubeEvaluator, compiler })); + it('bucketing with complex bucket dimension', async () => dbRunner.runQueryTest({ + dimensions: ['orders.changeTypeComplex'], + measures: ['orders.revenue', 'orders.revenueYearAgo'], + timeDimensions: [ + { + dimension: 'orders.createdAt', + granularity: 'year', + dateRange: ['2024-01-02T00:00:00', '2026-01-01T00:00:00'] + } + ], + timezone: 'UTC', + order: [{ + id: 'orders.changeTypeComplex' + }, { id: 'orders.createdAt' }], + }, + [ + { + orders__change_type_complex: 'Down', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '20400', + orders__revenue_year_ago: '22800' + }, + { + orders__change_type_complex: 'Down', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '17800', + orders__revenue_year_ago: '20400' + }, + { + orders__change_type_complex: 'Grow', + orders__created_at_year: '2024-01-01T00:00:00.000Z', + orders__revenue: '11700', + orders__revenue_year_ago: '9400' + }, + { + orders__change_type_complex: 'Grow', + orders__created_at_year: '2025-01-01T00:00:00.000Z', + orders__revenue: '14100', + orders__revenue_year_ago: '11700' + }, + ], + { joinGraph, cubeEvaluator, compiler })); } else { // This test is working only in tesseract test.skip('multi stage over sub query', () => { expect(1).toBe(1); }); From d24bb0f9bf5a3a556c1759ae0300cc897b2f4528 Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Tue, 28 Oct 2025 18:57:08 +0100 Subject: [PATCH 4/5] fmt --- .../src/physical_plan_builder/processors/query.rs | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs index 107b879d89bce..edccabe150385 100644 --- a/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs +++ b/rust/cubesqlplanner/cubesqlplanner/src/physical_plan_builder/processors/query.rs @@ -1,14 +1,10 @@ use super::super::{LogicalNodeProcessor, ProcessableNode, PushDownBuilderContext}; -use crate::logical_plan::{ - all_symbols, pretty_print, pretty_print_rc, MultiStageMemberLogicalType, Query, QuerySource, -}; +use crate::logical_plan::{all_symbols, MultiStageMemberLogicalType, Query, QuerySource}; use crate::physical_plan_builder::PhysicalPlanBuilder; use crate::plan::{ CalcGroupItem, CalcGroupsJoin, Cte, Expr, From, MemberExpression, Select, SelectBuilder, }; -use crate::planner::sql_evaluator::collectors::{ - collect_calc_group_dims_from_nodes, has_multi_stage_members, -}; +use crate::planner::sql_evaluator::collectors::collect_calc_group_dims_from_nodes; use crate::planner::sql_evaluator::{get_filtered_values, ReferencesBuilder}; use cubenativeutils::CubeError; use itertools::Itertools; From 08739cc568cee23712004f3c4d8e6f9ab5ffccae Mon Sep 17 00:00:00 2001 From: Aleksandr Romanenko Date: Wed, 29 Oct 2025 10:14:10 +0100 Subject: [PATCH 5/5] fix --- .../test/integration/postgres/calc-groups.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts index 7829952e989ab..974e488856c68 100644 --- a/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts +++ b/packages/cubejs-schema-compiler/test/integration/postgres/calc-groups.test.ts @@ -1238,7 +1238,7 @@ views: ], { joinGraph, cubeEvaluator, compiler }); }); - it('source full switch 1', async () => { + it('source full switch', async () => { await dbRunner.runQueryTest({ dimensions: ['source.currency', 'source.product_category'], measures: ['source.price'],