From e1e65131448c3f510c5af2bf2a1b86e222ced80f Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Thu, 29 Apr 2021 23:02:45 +0200 Subject: [PATCH 1/4] Workaround where without columns --- datafusion/src/optimizer/filter_push_down.rs | 32 +++++++++++++++++--- datafusion/tests/sql.rs | 14 +++++++++ 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs index 4622e9fc62dc..3caaab6d1abe 100644 --- a/datafusion/src/optimizer/filter_push_down.rs +++ b/datafusion/src/optimizer/filter_push_down.rs @@ -237,17 +237,26 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { let mut predicates = vec![]; split_members(predicate, &mut predicates); + let mut no_col_predicates = vec![]; + predicates .into_iter() .try_for_each::<_, Result<()>>(|predicate| { let mut columns: HashSet = HashSet::new(); utils::expr_to_column_names(predicate, &mut columns)?; - // collect the predicate - state.filters.push((predicate.clone(), columns)); + if columns.is_empty() { + no_col_predicates.push(predicate) + } else { + // collect the predicate + state.filters.push((predicate.clone(), columns)); + } Ok(()) })?; - - optimize(input, state) + if !no_col_predicates.is_empty() { + Ok(add_filter(optimize(input, state)?, &no_col_predicates)) + } else { + optimize(input, state) + } } LogicalPlan::Projection { input, @@ -368,6 +377,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { new_filters.push(filter_expr.clone()); } } + println!("{:?}", &state); issue_filters( state, @@ -482,6 +492,19 @@ mod tests { Ok(()) } + #[test] + fn filter_no_columns() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(&table_scan) + .filter(lit(0i64).eq(lit(1i64)))? + .build()?; + let expected = "\ + Filter: Int64(0) Eq Int64(1)\ + \n TableScan: test projection=None"; + assert_optimized_plan_eq(&plan, expected); + Ok(()) + } + #[test] fn filter_jump_2_plans() -> Result<()> { let table_scan = test_table_scan()?; @@ -490,7 +513,6 @@ mod tests { .project(vec![col("c"), col("b")])? .filter(col("a").eq(lit(1i64)))? .build()?; - // filter is before double projection let expected = "\ Projection: #c, #b\ \n Projection: #a, #b, #c\ diff --git a/datafusion/tests/sql.rs b/datafusion/tests/sql.rs index 70baffc700ba..f7e254ab6659 100644 --- a/datafusion/tests/sql.rs +++ b/datafusion/tests/sql.rs @@ -474,6 +474,20 @@ async fn csv_query_group_by_and_having_and_where() -> Result<()> { Ok(()) } +#[tokio::test] +async fn all_where_empty() -> Result<()> { + let mut ctx = ExecutionContext::new(); + register_aggregate_csv(&mut ctx)?; + let sql = "SELECT * + FROM aggregate_test_100 + WHERE 1=2"; + let mut actual = execute(&mut ctx, sql).await; + actual.sort(); + let expected: Vec> = vec![]; + assert_eq!(expected, actual); + Ok(()) +} + #[tokio::test] async fn csv_query_having_without_group_by() -> Result<()> { let mut ctx = ExecutionContext::new(); From cc5c2cc7f2f6e60bddcd753adfbd5591720c5ac1 Mon Sep 17 00:00:00 2001 From: "Heres, Daniel" Date: Thu, 29 Apr 2021 23:16:23 +0200 Subject: [PATCH 2/4] Add some docs --- datafusion/src/optimizer/filter_push_down.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs index 3caaab6d1abe..6e21134c55e8 100644 --- a/datafusion/src/optimizer/filter_push_down.rs +++ b/datafusion/src/optimizer/filter_push_down.rs @@ -237,6 +237,7 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { let mut predicates = vec![]; split_members(predicate, &mut predicates); + // Predicates without referencing columns (WHERE FALSE, WHERE 1=1, etc.) let mut no_col_predicates = vec![]; predicates @@ -252,6 +253,9 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { } Ok(()) })?; + // Predicates without columns will not be pushed down. + // As those contain only literals, they could be optimized using constant folding + // and removal of WHERE TRUE / WHERE FALSE if !no_col_predicates.is_empty() { Ok(add_filter(optimize(input, state)?, &no_col_predicates)) } else { From 2ed38d9af217f6ba86cc9e950653cf5e363e7e20 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Fri, 30 Apr 2021 07:21:01 +0200 Subject: [PATCH 3/4] Remove print statement --- datafusion/src/optimizer/filter_push_down.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs index 6e21134c55e8..0e7ccb6ae8b4 100644 --- a/datafusion/src/optimizer/filter_push_down.rs +++ b/datafusion/src/optimizer/filter_push_down.rs @@ -381,7 +381,6 @@ fn optimize(plan: &LogicalPlan, mut state: State) -> Result { new_filters.push(filter_expr.clone()); } } - println!("{:?}", &state); issue_filters( state, From 94c5b3a5b968cb22b86c560a68698c03c8845ad2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dani=C3=ABl=20Heres?= Date: Fri, 30 Apr 2021 07:23:25 +0200 Subject: [PATCH 4/4] Bring back removed comment --- datafusion/src/optimizer/filter_push_down.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/datafusion/src/optimizer/filter_push_down.rs b/datafusion/src/optimizer/filter_push_down.rs index 0e7ccb6ae8b4..356d497491a1 100644 --- a/datafusion/src/optimizer/filter_push_down.rs +++ b/datafusion/src/optimizer/filter_push_down.rs @@ -516,6 +516,7 @@ mod tests { .project(vec![col("c"), col("b")])? .filter(col("a").eq(lit(1i64)))? .build()?; + // filter is before double projection let expected = "\ Projection: #c, #b\ \n Projection: #a, #b, #c\