From a8b6f773b17338be9aa714a2cca632beb10a5644 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Tue, 14 Mar 2023 18:18:04 +0800 Subject: [PATCH 1/6] fix: join binder stack overflow --- src/query/sql/src/planner/binder/join.rs | 12 +++---- src/query/sql/src/planner/binder/table.rs | 40 +++++++++++++++++++++-- 2 files changed, 42 insertions(+), 10 deletions(-) diff --git a/src/query/sql/src/planner/binder/join.rs b/src/query/sql/src/planner/binder/join.rs index 22e2a8b6b04df..91bbe2a4fb9c5 100644 --- a/src/query/sql/src/planner/binder/join.rs +++ b/src/query/sql/src/planner/binder/join.rs @@ -55,17 +55,15 @@ impl Binder { #[async_recursion] pub(super) async fn bind_join( &mut self, - bind_context: &BindContext, + left_context: BindContext, + right_context: BindContext, + left_child: SExpr, + right_child: SExpr, join: &common_ast::ast::Join, ) -> Result<(SExpr, BindContext)> { - let (left_child, left_context) = - self.bind_table_reference(bind_context, &join.left).await?; - let (right_child, right_context) = - self.bind_table_reference(bind_context, &join.right).await?; - check_duplicate_join_tables(&left_context, &right_context)?; - let mut bind_context = bind_context.replace(); + let mut bind_context = BindContext::new(); match &join.op { JoinOperator::LeftOuter | JoinOperator::RightOuter | JoinOperator::FullOuter diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 70998d70e2d37..cf8d4888e02e7 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -19,6 +19,7 @@ use std::sync::Arc; use chrono::TimeZone; use chrono::Utc; use common_ast::ast::Indirection; +use common_ast::ast::Join; use common_ast::ast::SelectStmt; use common_ast::ast::SelectTarget; use common_ast::ast::Statement; @@ -112,7 +113,7 @@ impl Binder { .await } - pub(super) async fn bind_table_reference( + async fn bind_single_table( &mut self, bind_context: &BindContext, table_ref: &TableReference, @@ -348,7 +349,6 @@ impl Binder { } Ok((s_expr, bind_context)) } - TableReference::Join { span: _, join } => self.bind_join(bind_context, join).await, TableReference::Subquery { span: _, subquery, @@ -382,6 +382,7 @@ impl Binder { self.bind_stage_table(bind_context, stage_info, files_info, alias, None) .await } + TableReference::Join { .. } => unreachable!(), } } @@ -431,7 +432,40 @@ impl Binder { } } - async fn bind_cte( + pub(super) async fn bind_table_reference( + &mut self, + bind_context: &BindContext, + table_ref: &TableReference, + ) -> Result<(SExpr, BindContext)> { + let mut current_ref = table_ref; + let current_ctx = bind_context; + + // Stack to keep track of the joins + let mut join_stack: Vec<&Join> = Vec::new(); + + // Traverse the table reference hierarchy to get to the innermost table + while let TableReference::Join { join, .. } = current_ref { + join_stack.push(join); + current_ref = &join.left; + } + + // Bind the innermost table + let (mut left_expr, mut left_ctx) = + self.bind_single_table(current_ctx, current_ref).await?; + + for join in join_stack.iter() { + let (right_expr, right_ctx) = self.bind_single_table(&left_ctx, &join.right).await?; + let (join_expr, ctx) = self + .bind_join(left_ctx, right_ctx, left_expr, right_expr, join) + .await?; + left_expr = join_expr; + left_ctx = ctx; + } + + Ok((left_expr, left_ctx)) + } + + asycn fn bind_cte( &mut self, bind_context: &BindContext, table_name: &str, From b8ef2fffb890582f4a0d7d8346e9c7f194e1dd0a Mon Sep 17 00:00:00 2001 From: xudong963 Date: Tue, 14 Mar 2023 19:33:19 +0800 Subject: [PATCH 2/6] fix iterator --- src/query/sql/src/planner/binder/join.rs | 3 ++- src/query/sql/src/planner/binder/table.rs | 14 ++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/src/query/sql/src/planner/binder/join.rs b/src/query/sql/src/planner/binder/join.rs index 91bbe2a4fb9c5..138a8e06aa634 100644 --- a/src/query/sql/src/planner/binder/join.rs +++ b/src/query/sql/src/planner/binder/join.rs @@ -55,6 +55,7 @@ impl Binder { #[async_recursion] pub(super) async fn bind_join( &mut self, + bind_context: &BindContext, left_context: BindContext, right_context: BindContext, left_child: SExpr, @@ -63,7 +64,7 @@ impl Binder { ) -> Result<(SExpr, BindContext)> { check_duplicate_join_tables(&left_context, &right_context)?; - let mut bind_context = BindContext::new(); + let mut bind_context = bind_context.replace(); match &join.op { JoinOperator::LeftOuter | JoinOperator::RightOuter | JoinOperator::FullOuter diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index cf8d4888e02e7..c26c1f2e4af56 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -452,11 +452,17 @@ impl Binder { // Bind the innermost table let (mut left_expr, mut left_ctx) = self.bind_single_table(current_ctx, current_ref).await?; - - for join in join_stack.iter() { - let (right_expr, right_ctx) = self.bind_single_table(&left_ctx, &join.right).await?; + for join in join_stack.iter().rev() { + let (right_expr, right_ctx) = self.bind_single_table(current_ctx, &join.right).await?; let (join_expr, ctx) = self - .bind_join(left_ctx, right_ctx, left_expr, right_expr, join) + .bind_join( + current_ctx, + left_ctx, + right_ctx, + left_expr, + right_expr, + join, + ) .await?; left_expr = join_expr; left_ctx = ctx; From bac03241f1c45ed41ffa41c6374961beaf56fb68 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Tue, 14 Mar 2023 21:20:32 +0800 Subject: [PATCH 3/6] fix special case --- src/query/sql/src/planner/binder/table.rs | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index c26c1f2e4af56..e917ec228fc13 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -16,6 +16,7 @@ use std::collections::HashMap; use std::default::Default; use std::sync::Arc; +use async_recursion::async_recursion; use chrono::TimeZone; use chrono::Utc; use common_ast::ast::Indirection; @@ -113,6 +114,7 @@ impl Binder { .await } + #[async_recursion] async fn bind_single_table( &mut self, bind_context: &BindContext, @@ -382,7 +384,11 @@ impl Binder { self.bind_stage_table(bind_context, stage_info, files_info, alias, None) .await } - TableReference::Join { .. } => unreachable!(), + TableReference::Join { .. } => { + // Cover some specif case, such as select * from t0, t1 inner join t2 on t1.a = t2.a + // Such case will be right associate + self.bind_table_reference(bind_context, table_ref).await + } } } From 8fa4f3dce74df28928ed49f500ff9ced801a87df Mon Sep 17 00:00:00 2001 From: xudong963 Date: Wed, 15 Mar 2023 11:45:34 +0800 Subject: [PATCH 4/6] update --- src/query/sql/src/planner/binder/table.rs | 74 +++++++++++++++------- tests/sqllogictests/suites/query/join.test | 2 + 2 files changed, 54 insertions(+), 22 deletions(-) diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index e917ec228fc13..472c574ff38f7 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -384,11 +384,7 @@ impl Binder { self.bind_stage_table(bind_context, stage_info, files_info, alias, None) .await } - TableReference::Join { .. } => { - // Cover some specif case, such as select * from t0, t1 inner join t2 on t1.a = t2.a - // Such case will be right associate - self.bind_table_reference(bind_context, table_ref).await - } + TableReference::Join { .. } => unreachable!(), } } @@ -452,32 +448,66 @@ impl Binder { // Traverse the table reference hierarchy to get to the innermost table while let TableReference::Join { join, .. } = current_ref { join_stack.push(join); - current_ref = &join.left; + + // Check whether the right-hand side is a Join or a TableReference + match &*join.right { + TableReference::Join { .. } => { + // Traverse the right-hand side if the right-hand side is a Join + current_ref = &join.right; + } + _ => { + // Traverse the left-hand side if the right-hand side is a TableReference + current_ref = &join.left; + } + } } // Bind the innermost table - let (mut left_expr, mut left_ctx) = + // current_ref must be left table in its join + let (mut result_expr, mut result_ctx) = self.bind_single_table(current_ctx, current_ref).await?; + for join in join_stack.iter().rev() { - let (right_expr, right_ctx) = self.bind_single_table(current_ctx, &join.right).await?; - let (join_expr, ctx) = self - .bind_join( - current_ctx, - left_ctx, - right_ctx, - left_expr, - right_expr, - join, - ) - .await?; - left_expr = join_expr; - left_ctx = ctx; + match &*join.right { + TableReference::Join { .. } => { + let (left_expr, left_ctx) = + self.bind_single_table(current_ctx, &join.left).await?; + let (join_expr, ctx) = self + .bind_join( + current_ctx, + left_ctx, + result_ctx, + left_expr, + result_expr, + join, + ) + .await?; + result_expr = join_expr; + result_ctx = ctx; + } + _ => { + let (right_expr, right_ctx) = + self.bind_single_table(current_ctx, &join.right).await?; + let (join_expr, ctx) = self + .bind_join( + current_ctx, + result_ctx, + right_ctx, + result_expr, + right_expr, + join, + ) + .await?; + result_expr = join_expr; + result_ctx = ctx; + } + } } - Ok((left_expr, left_ctx)) + Ok((result_expr, result_ctx)) } - asycn fn bind_cte( + async fn bind_cte( &mut self, bind_context: &BindContext, table_name: &str, diff --git a/tests/sqllogictests/suites/query/join.test b/tests/sqllogictests/suites/query/join.test index 8ae6b9ab6f27b..ff37d4fba6863 100644 --- a/tests/sqllogictests/suites/query/join.test +++ b/tests/sqllogictests/suites/query/join.test @@ -512,3 +512,5 @@ query I select count(*) from numbers(10000) as t1 inner join numbers(10000) as t2 on t1.number = t2.number ---- 10000 + +# https://github.com/datafuselabs/databend/issues/10523 From 6595652bb19fae6d40e20309749e582eb2ca80d2 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Wed, 15 Mar 2023 14:36:55 +0800 Subject: [PATCH 5/6] add test --- .../sql/src/planner/optimizer/optimizer.rs | 2 - tests/sqllogictests/suites/query/join.test | 62 +++++++++++++++++++ 2 files changed, 62 insertions(+), 2 deletions(-) diff --git a/src/query/sql/src/planner/optimizer/optimizer.rs b/src/query/sql/src/planner/optimizer/optimizer.rs index b75d74d1a1823..9ecb5db796c25 100644 --- a/src/query/sql/src/planner/optimizer/optimizer.rs +++ b/src/query/sql/src/planner/optimizer/optimizer.rs @@ -149,10 +149,8 @@ pub fn optimize_query( let mut heuristic = HeuristicOptimizer::new(ctx.clone(), bind_context, metadata.clone()); let mut result = heuristic.optimize(s_expr)?; - let mut cascades = CascadesOptimizer::create(ctx.clone(), metadata)?; result = cascades.optimize(result)?; - // So far, we don't have ability to execute distributed query // with reading data from local tales(e.g. system tables). let enable_distributed_query = diff --git a/tests/sqllogictests/suites/query/join.test b/tests/sqllogictests/suites/query/join.test index ff37d4fba6863..6c3a3d2596f60 100644 --- a/tests/sqllogictests/suites/query/join.test +++ b/tests/sqllogictests/suites/query/join.test @@ -514,3 +514,65 @@ select count(*) from numbers(10000) as t1 inner join numbers(10000) as t2 on t1. 10000 # https://github.com/datafuselabs/databend/issues/10523 +statement ok +create table t1 as select * from numbers(1) + +statement ok +set enable_cbo = 0 + +query I +select t1.number FROM + t1 + CROSS JOIN + t1 AS t2, + t1 AS t3, + t1 AS t4, + t1 AS t5, + t1 AS t6, + t1 AS t7, + t1 AS t8, + t1 AS t9, + t1 AS t10, + t1 AS t11, + t1 AS t12, + t1 AS t13, + t1 AS t14, + t1 AS t15, + t1 AS t16, + t1 AS t17, + t1 AS t18, + t1 AS t19, + t1 AS t20 +---- +0 + +query I +SELECT t20.number +FROM +(((((((((((((((((( +t1 AS t20 +CROSS JOIN t1 AS t19) +CROSS JOIN t1 AS t18) +CROSS JOIN t1 AS t17) +CROSS JOIN t1 AS t16) +CROSS JOIN t1 AS t15) +CROSS JOIN t1 AS t14) +CROSS JOIN t1 AS t13) +CROSS JOIN t1 AS t12) +CROSS JOIN t1 AS t11) +CROSS JOIN t1 AS t10) +CROSS JOIN t1 AS t9) +CROSS JOIN t1 AS t8) +CROSS JOIN t1 AS t7) +CROSS JOIN t1 AS t6) +CROSS JOIN t1 AS t5) +CROSS JOIN t1 AS t4) +CROSS JOIN t1 AS t3) +CROSS JOIN t1 AS t2) +CROSS JOIN t1 +---- +0 + +statement ok +set enable_cbo = 1 + From 16caed73abca8048ad3908ee4db88d41f6093879 Mon Sep 17 00:00:00 2001 From: xudong963 Date: Wed, 15 Mar 2023 15:56:19 +0800 Subject: [PATCH 6/6] put test to standalone --- .../suites/mode/standalone/explain/join.test | 67 +++++++++++++++++++ tests/sqllogictests/suites/query/join.test | 64 ------------------ 2 files changed, 67 insertions(+), 64 deletions(-) diff --git a/tests/sqllogictests/suites/mode/standalone/explain/join.test b/tests/sqllogictests/suites/mode/standalone/explain/join.test index c2b48e70f603b..22a2844892476 100644 --- a/tests/sqllogictests/suites/mode/standalone/explain/join.test +++ b/tests/sqllogictests/suites/mode/standalone/explain/join.test @@ -547,3 +547,70 @@ drop table t1 statement ok drop table t2 + + +# https://github.com/datafuselabs/databend/issues/10523 +statement ok +create table t1 as select * from numbers(1) + +statement ok +set enable_cbo = 0 + +query I +select t1.number FROM +t1 +CROSS JOIN +t1 AS t2, +t1 AS t3, +t1 AS t4, +t1 AS t5, +t1 AS t6, +t1 AS t7, +t1 AS t8, +t1 AS t9, +t1 AS t10, +t1 AS t11, +t1 AS t12, +t1 AS t13, +t1 AS t14, +t1 AS t15, +t1 AS t16, +t1 AS t17, +t1 AS t18, +t1 AS t19, +t1 AS t20 +---- +0 + +query I +SELECT t20.number +FROM +(((((((((((((((((( +t1 AS t20 +CROSS JOIN t1 AS t19) +CROSS JOIN t1 AS t18) +CROSS JOIN t1 AS t17) +CROSS JOIN t1 AS t16) +CROSS JOIN t1 AS t15) +CROSS JOIN t1 AS t14) +CROSS JOIN t1 AS t13) +CROSS JOIN t1 AS t12) +CROSS JOIN t1 AS t11) +CROSS JOIN t1 AS t10) +CROSS JOIN t1 AS t9) +CROSS JOIN t1 AS t8) +CROSS JOIN t1 AS t7) +CROSS JOIN t1 AS t6) +CROSS JOIN t1 AS t5) +CROSS JOIN t1 AS t4) +CROSS JOIN t1 AS t3) +CROSS JOIN t1 AS t2) +CROSS JOIN t1 +---- +0 + +statement ok +set enable_cbo = 1 + +statement ok +drop table t1 diff --git a/tests/sqllogictests/suites/query/join.test b/tests/sqllogictests/suites/query/join.test index 6c3a3d2596f60..8ae6b9ab6f27b 100644 --- a/tests/sqllogictests/suites/query/join.test +++ b/tests/sqllogictests/suites/query/join.test @@ -512,67 +512,3 @@ query I select count(*) from numbers(10000) as t1 inner join numbers(10000) as t2 on t1.number = t2.number ---- 10000 - -# https://github.com/datafuselabs/databend/issues/10523 -statement ok -create table t1 as select * from numbers(1) - -statement ok -set enable_cbo = 0 - -query I -select t1.number FROM - t1 - CROSS JOIN - t1 AS t2, - t1 AS t3, - t1 AS t4, - t1 AS t5, - t1 AS t6, - t1 AS t7, - t1 AS t8, - t1 AS t9, - t1 AS t10, - t1 AS t11, - t1 AS t12, - t1 AS t13, - t1 AS t14, - t1 AS t15, - t1 AS t16, - t1 AS t17, - t1 AS t18, - t1 AS t19, - t1 AS t20 ----- -0 - -query I -SELECT t20.number -FROM -(((((((((((((((((( -t1 AS t20 -CROSS JOIN t1 AS t19) -CROSS JOIN t1 AS t18) -CROSS JOIN t1 AS t17) -CROSS JOIN t1 AS t16) -CROSS JOIN t1 AS t15) -CROSS JOIN t1 AS t14) -CROSS JOIN t1 AS t13) -CROSS JOIN t1 AS t12) -CROSS JOIN t1 AS t11) -CROSS JOIN t1 AS t10) -CROSS JOIN t1 AS t9) -CROSS JOIN t1 AS t8) -CROSS JOIN t1 AS t7) -CROSS JOIN t1 AS t6) -CROSS JOIN t1 AS t5) -CROSS JOIN t1 AS t4) -CROSS JOIN t1 AS t3) -CROSS JOIN t1 AS t2) -CROSS JOIN t1 ----- -0 - -statement ok -set enable_cbo = 1 -