From 37423dc2968e9e06b9bbd73331cec8cc7c3fae40 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Thu, 27 May 2021 21:58:19 +0200 Subject: [PATCH 01/15] #410: Remove reundant filters (e.g. c> 5 AND c>5 --> c>5) --- datafusion/src/execution/context.rs | 2 + datafusion/src/optimizer/mod.rs | 1 + .../src/optimizer/remove_duplicate_filters.rs | 257 ++++++++++++++++++ 3 files changed, 260 insertions(+) create mode 100644 datafusion/src/optimizer/remove_duplicate_filters.rs diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index 272e75acba6f..cc0bdb7bc9d8 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -57,6 +57,7 @@ use crate::logical_plan::{ use crate::optimizer::constant_folding::ConstantFolding; use crate::optimizer::filter_push_down::FilterPushDown; use crate::optimizer::limit_push_down::LimitPushDown; +use crate::optimizer::remove_duplicate_filters::RemoveDuplicateFilters; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::projection_push_down::ProjectionPushDown; use crate::physical_optimizer::coalesce_batches::CoalesceBatches; @@ -652,6 +653,7 @@ impl ExecutionConfig { Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), + Arc::new(RemoveDuplicateFilters::new()), Arc::new(HashBuildProbeOrder::new()), Arc::new(LimitPushDown::new()), ], diff --git a/datafusion/src/optimizer/mod.rs b/datafusion/src/optimizer/mod.rs index 2fb8a3d62950..b5f1c9a6806c 100644 --- a/datafusion/src/optimizer/mod.rs +++ b/datafusion/src/optimizer/mod.rs @@ -25,4 +25,5 @@ pub mod hash_build_probe_order; pub mod limit_push_down; pub mod optimizer; pub mod projection_push_down; +pub mod remove_duplicate_filters; pub mod utils; diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs new file mode 100644 index 000000000000..12a1fc9e5ae4 --- /dev/null +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -0,0 +1,257 @@ +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! Remove duplicate filters optimizer rule + +use crate::execution::context::ExecutionProps; +use crate::logical_plan::Expr; +use crate::logical_plan::LogicalPlan; +use crate::optimizer::optimizer::OptimizerRule; +use crate::optimizer::utils; +use crate::optimizer::utils::optimize_explain; +use crate::{error::Result, logical_plan::Operator}; + +/// Remove duplicate filters optimizer +/// # Introduction +pub struct RemoveDuplicateFilters {} + +fn expr_contains<'a>(expr: &'a Expr, needle: &'a Expr) -> bool { + match expr { + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } => expr_contains(left, needle) || expr_contains(right, needle), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } => expr_contains(left, needle) || expr_contains(right, needle), + _ => expr == needle, + } +} + +fn as_binary_expr<'a>(expr: &'a Expr) -> Option<&'a Expr> { + match expr { + Expr::BinaryExpr { .. } => Some(expr), + _ => None, + } +} + +fn simplify<'a>(expr: &'a Expr) -> Expr { + match expr { + Expr::BinaryExpr { left, op: _, right } if left == right => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if expr_contains(left, right) => as_binary_expr(left) + .map(|x| match x { + Expr::BinaryExpr { + left: _, + op: Operator::Or, + right: _, + } => x.clone(), + Expr::BinaryExpr { + left: _, + op: Operator::And, + right: _, + } => *right.clone(), + _ => expr.clone(), + }) + .unwrap_or(expr.clone()), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if expr_contains(right, left) => as_binary_expr(right) + .map(|x| match x { + Expr::BinaryExpr { + left: _, + op: Operator::Or, + right: _, + } => *right.clone(), + Expr::BinaryExpr { + left: _, + op: Operator::And, + right: _, + } => x.clone(), + _ => expr.clone(), + }) + .unwrap_or(expr.clone()), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if expr_contains(left, right) => as_binary_expr(left) + .map(|x| match x { + Expr::BinaryExpr { + left: _, + op: Operator::Or, + right: _, + } => *right.clone(), + Expr::BinaryExpr { + left: _, + op: Operator::And, + right: _, + } => x.clone(), + _ => expr.clone(), + }) + .unwrap_or(expr.clone()), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if expr_contains(right, left) => as_binary_expr(right) + .map(|x| match x { + Expr::BinaryExpr { + left: _, + op: Operator::Or, + right: _, + } => *left.clone(), + Expr::BinaryExpr { + left: _, + op: Operator::And, + right: _, + } => x.clone(), + _ => expr.clone(), + }) + .unwrap_or(expr.clone()), + Expr::BinaryExpr { left, op, right } => Expr::BinaryExpr { + left: Box::new(simplify(&left)), + op: *op, + right: Box::new(simplify(right)), + }, + _ => expr.clone(), + } +} + +#[derive(Debug, Clone, Default)] +struct State { + filters: Vec, +} + +fn optimize(plan: &LogicalPlan) -> Result { + match plan { + LogicalPlan::Filter { input, predicate } => Ok(LogicalPlan::Filter { + input: input.clone(), + predicate: simplify(predicate), + }), + _ => { + let new_inputs = plan + .inputs() + .iter() + .map(|input| optimize(input)) + .collect::>>()?; + + let expr = plan.expressions(); + utils::from_plan(&plan, &expr, &new_inputs) + } + } +} + +impl OptimizerRule for RemoveDuplicateFilters { + fn name(&self) -> &str { + "remove_duplicate_filters" + } + + fn optimize( + &self, + plan: &LogicalPlan, + execution_props: &ExecutionProps, + ) -> Result { + match plan { + LogicalPlan::Explain { + verbose, + plan, + stringified_plans, + schema, + } => { + let schema = schema.as_ref().to_owned().into(); + optimize_explain( + self, + *verbose, + &*plan, + stringified_plans, + &schema, + execution_props, + ) + } + _ => optimize(plan), + } + } +} + +impl RemoveDuplicateFilters { + #[allow(missing_docs)] + pub fn new() -> Self { + Self {} + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::logical_plan::{and, col, lit, LogicalPlanBuilder}; + use crate::test::*; + + fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { + let rule = RemoveDuplicateFilters::new(); + let optimized_plan = rule + .optimize(plan, &ExecutionProps::new()) + .expect("failed to optimize plan"); + let formatted_plan = format!("{:?}", optimized_plan); + assert_eq!(formatted_plan, expected); + } + + #[test] + fn remove_duplicate_and() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(&table_scan) + .project(vec![col("a")])? + .filter(and(col("b").gt(lit(1)), col("b").gt(lit(1))))? + .build()?; + + assert_optimized_plan_eq( + &plan, + "\ + Filter: #b Gt Int32(1)\ + \n Projection: #a\ + \n TableScan: test projection=None", + ); + Ok(()) + } + + // ((c > 5) AND (d < 6)) AND (c > 5) --> (c > 5) AND (d < 6) + #[test] + fn remove_composed_and() -> Result<()> { + let table_scan = test_table_scan()?; + let plan = LogicalPlanBuilder::from(&table_scan) + .project(vec![col("a")])? + .filter(and( + and(col("a").gt(lit(5)), col("b").lt(lit(6))), + col("a").gt(lit(5)), + ))? + .build()?; + + assert_optimized_plan_eq( + &plan, + "\ + Filter: #a Gt Int32(5) And #b Lt Int32(6)\ + \n Projection: #a\ + \n TableScan: test projection=None", + ); + Ok(()) + } +} From 5cb888affeec939ec3affa9e33d4d15aa41c9c61 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Fri, 28 May 2021 22:33:10 +0200 Subject: [PATCH 02/15] RemoveDuplicateFilters: fix unit tests --- datafusion/src/execution/context.rs | 2 +- .../src/optimizer/remove_duplicate_filters.rs | 73 ++++++++++++++++--- 2 files changed, 64 insertions(+), 11 deletions(-) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index cc0bdb7bc9d8..f7989e21ceee 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -57,9 +57,9 @@ use crate::logical_plan::{ use crate::optimizer::constant_folding::ConstantFolding; use crate::optimizer::filter_push_down::FilterPushDown; use crate::optimizer::limit_push_down::LimitPushDown; -use crate::optimizer::remove_duplicate_filters::RemoveDuplicateFilters; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::projection_push_down::ProjectionPushDown; +use crate::optimizer::remove_duplicate_filters::RemoveDuplicateFilters; use crate::physical_optimizer::coalesce_batches::CoalesceBatches; use crate::physical_optimizer::merge_exec::AddMergeExec; use crate::physical_optimizer::repartition::Repartition; diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index 12a1fc9e5ae4..2ea21cae9098 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -22,8 +22,13 @@ use crate::optimizer::utils; use crate::optimizer::utils::optimize_explain; use crate::{error::Result, logical_plan::Operator}; -/// Remove duplicate filters optimizer +/// Remove duplicate filters optimizer. /// # Introduction +/// It uses boolean algebra laws to simplify or reduce the number of terms in expressions. +/// +/// Filter: #b Gt Int32(2) And #b Gt Int32(2) +/// is optimized to +/// Filter: #b Gt Int32(2) pub struct RemoveDuplicateFilters {} fn expr_contains<'a>(expr: &'a Expr, needle: &'a Expr) -> bool { @@ -86,7 +91,7 @@ fn simplify<'a>(expr: &'a Expr) -> Expr { left: _, op: Operator::And, right: _, - } => x.clone(), + } => *left.clone(), _ => expr.clone(), }) .unwrap_or(expr.clone()), @@ -137,11 +142,6 @@ fn simplify<'a>(expr: &'a Expr) -> Expr { } } -#[derive(Debug, Clone, Default)] -struct State { - filters: Vec, -} - fn optimize(plan: &LogicalPlan) -> Result { match plan { LogicalPlan::Filter { input, predicate } => Ok(LogicalPlan::Filter { @@ -203,7 +203,7 @@ impl RemoveDuplicateFilters { #[cfg(test)] mod tests { use super::*; - use crate::logical_plan::{and, col, lit, LogicalPlanBuilder}; + use crate::logical_plan::{and, binary_expr, col, lit, Expr, LogicalPlanBuilder}; use crate::test::*; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { @@ -216,7 +216,60 @@ mod tests { } #[test] - fn remove_duplicate_and() -> Result<()> { + fn test_simplify_simple_and() -> Result<()> { + // (c > 5) AND (c > 5) + let expr = binary_expr(col("c").gt(lit(5)), Operator::And, col("c").gt(lit(5))); + let expected = col("c").gt(lit(5)); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_composed_and() -> Result<()> { + // ((c > 5) AND (d < 6)) AND (c > 5) + let expr = binary_expr( + binary_expr(col("c").gt(lit(5)), Operator::And, col("d").lt(lit(6))), + Operator::And, + col("c").gt(lit(5)), + ); + let expected = + binary_expr(col("c").gt(lit(5)), Operator::And, col("d").lt(lit(6))); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_negated_and() -> Result<()> { + // (c > 5) AND !(c > 5) -- can't remove + let expr = binary_expr( + col("c").gt(lit(5)), + Operator::And, + Expr::not(col("c").gt(lit(5))), + ); + let expected = expr.clone(); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_or_and() -> Result<()> { + // (c > 5) OR ((d < 6) AND (c > 5) -- can't remove + let expr = binary_expr( + col("c").gt(lit(5)), + Operator::Or, + binary_expr(col("d").lt(lit(6)), Operator::And, col("c").gt(lit(5))), + ); + let expected = col("c").gt(lit(5)); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_optimized_plan() -> Result<()> { let table_scan = test_table_scan()?; let plan = LogicalPlanBuilder::from(&table_scan) .project(vec![col("a")])? @@ -235,7 +288,7 @@ mod tests { // ((c > 5) AND (d < 6)) AND (c > 5) --> (c > 5) AND (d < 6) #[test] - fn remove_composed_and() -> Result<()> { + fn test_optimized_plan_with_composed_and() -> Result<()> { let table_scan = test_table_scan()?; let plan = LogicalPlanBuilder::from(&table_scan) .project(vec![col("a")])? From 3c37a55775815d4c0460d13f6a2d79fbdb1051fe Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Fri, 28 May 2021 22:39:31 +0200 Subject: [PATCH 03/15] fix lint --- datafusion/src/optimizer/remove_duplicate_filters.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index 2ea21cae9098..ecb0351436b1 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -25,7 +25,7 @@ use crate::{error::Result, logical_plan::Operator}; /// Remove duplicate filters optimizer. /// # Introduction /// It uses boolean algebra laws to simplify or reduce the number of terms in expressions. -/// +/// /// Filter: #b Gt Int32(2) And #b Gt Int32(2) /// is optimized to /// Filter: #b Gt Int32(2) From e64c69f0aab67976f0db2b20116c321b81713836 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Sat, 29 May 2021 15:54:56 +0200 Subject: [PATCH 04/15] fix erroneous simplifications of arithmetic expressions --- .../src/optimizer/remove_duplicate_filters.rs | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index ecb0351436b1..aa158629ad0d 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -54,9 +54,17 @@ fn as_binary_expr<'a>(expr: &'a Expr) -> Option<&'a Expr> { } } +fn operator_is_boolean(op: &Operator) -> bool { + op == &Operator::And || op == &Operator::Or +} + fn simplify<'a>(expr: &'a Expr) -> Expr { match expr { - Expr::BinaryExpr { left, op: _, right } if left == right => simplify(left), + Expr::BinaryExpr { left, op, right } + if left == right && operator_is_boolean(op) => + { + simplify(left) + } Expr::BinaryExpr { left, op: Operator::Or, @@ -268,6 +276,17 @@ mod tests { Ok(()) } + #[test] + fn test_do_not_simplify_arithmetic_expr() -> Result<()> { + let expr_plus = binary_expr(lit(1), Operator::Plus, lit(1)); + let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1)); + + assert_eq!(simplify(&expr_plus), expr_plus); + assert_eq!(simplify(&expr_eq), expr_eq); + + Ok(()) + } + #[test] fn test_optimized_plan() -> Result<()> { let table_scan = test_table_scan()?; From 9fbeb96cf78f1a916ef19198b095aa351451ba63 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Mon, 31 May 2021 22:00:47 +0200 Subject: [PATCH 05/15] RemoveDuplicateFilters: add more simplification rules (@dandandan) --- .../src/optimizer/remove_duplicate_filters.rs | 284 +++++++++++++++++- 1 file changed, 283 insertions(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index aa158629ad0d..dbcd78287350 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -15,11 +15,12 @@ //! Remove duplicate filters optimizer rule use crate::execution::context::ExecutionProps; -use crate::logical_plan::Expr; use crate::logical_plan::LogicalPlan; +use crate::logical_plan::{lit, Expr}; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::utils; use crate::optimizer::utils::optimize_explain; +use crate::scalar::ScalarValue; use crate::{error::Result, logical_plan::Operator}; /// Remove duplicate filters optimizer. @@ -58,8 +59,144 @@ fn operator_is_boolean(op: &Operator) -> bool { op == &Operator::And || op == &Operator::Or } +fn is_one<'a>(s: &'a Expr) -> bool { + match s { + Expr::Literal(ScalarValue::Int8(Some(1))) => true, + Expr::Literal(ScalarValue::Int16(Some(1))) => true, + Expr::Literal(ScalarValue::Int32(Some(1))) => true, + Expr::Literal(ScalarValue::Int64(Some(1))) => true, + Expr::Literal(ScalarValue::UInt8(Some(1))) => true, + Expr::Literal(ScalarValue::UInt16(Some(1))) => true, + Expr::Literal(ScalarValue::UInt32(Some(1))) => true, + Expr::Literal(ScalarValue::UInt64(Some(1))) => true, + Expr::Literal(ScalarValue::Float32(Some(v))) if *v == 1. => true, + Expr::Literal(ScalarValue::Float64(Some(v))) if *v == 1. => true, + _ => false + } +} + +fn is_zero<'a>(s: &'a Expr) -> bool { + match s { + Expr::Literal(ScalarValue::Int8(Some(0))) => true, + Expr::Literal(ScalarValue::Int16(Some(0))) => true, + Expr::Literal(ScalarValue::Int32(Some(0))) => true, + Expr::Literal(ScalarValue::Int64(Some(0))) => true, + Expr::Literal(ScalarValue::UInt8(Some(0))) => true, + Expr::Literal(ScalarValue::UInt16(Some(0))) => true, + Expr::Literal(ScalarValue::UInt32(Some(0))) => true, + Expr::Literal(ScalarValue::UInt64(Some(0))) => true, + Expr::Literal(ScalarValue::Float32(Some(v))) if *v == 0. => true, + Expr::Literal(ScalarValue::Float64(Some(v))) if *v == 0. => true, + _ => false + } +} + +fn is_true<'a>(expr: &'a Expr) -> bool { + match expr { + Expr::Literal(ScalarValue::Boolean(Some(v))) => *v, + _ => false, + } +} + +fn is_false<'a>(expr: &'a Expr) -> bool { + match expr { + Expr::Literal(ScalarValue::Boolean(Some(v))) => *v == false, + _ => false, + } +} + fn simplify<'a>(expr: &'a Expr) -> Expr { match expr { + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if is_true(left) || is_true(right) => lit(true), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if is_false(left) => simplify(right), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if is_false(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Or, + right, + } if left == right => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if is_false(left) || is_false(right) => lit(false), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if is_true(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if is_true(left) => simplify(right), + Expr::BinaryExpr { + left, + op: Operator::And, + right, + } if left == right => simplify(right), + Expr::BinaryExpr { + left, + op: Operator::Minus, + right + } if is_zero(left) => Expr::Negative(Box::new(simplify(right))), + Expr::BinaryExpr { + left, + op: Operator::Minus, + right + } if is_zero(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Minus, + right + } if left == right => lit(0), + Expr::BinaryExpr { + left, + op: Operator::Multiply, + right + } if is_zero(left) || is_zero(right) => lit(0), + Expr::BinaryExpr { + left, + op: Operator::Multiply, + right + } if is_one(left) => simplify(right), + Expr::BinaryExpr { + left, + op: Operator::Multiply, + right + } if is_one(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Divide, + right + } if is_one(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Divide, + right + } if left == right => lit(1), + Expr::BinaryExpr { + left, + op: Operator::Plus, + right + } if is_zero(left) => simplify(right), + Expr::BinaryExpr { + left, + op: Operator::Plus, + right + } if is_zero(right) => simplify(left), Expr::BinaryExpr { left, op, right } if left == right && operator_is_boolean(op) => { @@ -156,6 +293,11 @@ fn optimize(plan: &LogicalPlan) -> Result { input: input.clone(), predicate: simplify(predicate), }), + LogicalPlan::Projection { expr, input, schema } => Ok(LogicalPlan::Projection { + expr: expr.into_iter().map(|x| simplify(x)).collect::>(), + input: input.clone(), + schema: schema.clone(), + }), _ => { let new_inputs = plan .inputs() @@ -223,6 +365,146 @@ mod tests { assert_eq!(formatted_plan, expected); } + #[test] + fn test_simplify_or_true() -> Result<()> { + let expr_a = binary_expr(col("c"), Operator::Or, lit(true)); + let expr_b = binary_expr(lit(true), Operator::Or, col("c")); + let expected = lit(true); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_or_false() -> Result<()> { + let expr_a = binary_expr(lit(false), Operator::Or, col("c")); + let expr_b = binary_expr(col("c"), Operator::Or, lit(false)); + let expected = col("c"); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_or_same() -> Result<()> { + let expr = binary_expr(col("c"), Operator::Or, col("c")); + let expected = col("c"); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_and_false() -> Result<()> { + let expr_a = binary_expr(lit(false), Operator::And, col("c")); + let expr_b = binary_expr(col("c"), Operator::And, lit(false)); + let expected = lit(false); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_and_same() -> Result<()> { + let expr = binary_expr(col("c"), Operator::And, col("c")); + let expected = col("c"); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_and_true() -> Result<()> { + let expr_a = binary_expr(lit(true), Operator::And, col("c")); + let expr_b = binary_expr(col("c"), Operator::And, lit(true)); + let expected = col("c"); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_minus_zero() -> Result<()> { + let expr = binary_expr(lit(0), Operator::Minus, col("c")); + let expected = Expr::Negative(Box::new(col("c"))); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_minus_same() -> Result<()> { + let expr = binary_expr(col("c"), Operator::Minus, col("c")); + let expected = lit(0); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_multiply_by_zero() -> Result<()> { + let expr_a = binary_expr(col("c"), Operator::Multiply, lit(0)); + let expr_b = binary_expr(lit(0), Operator::Multiply, col("c")); + let expected = lit(0); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_multiply_by_one() -> Result<()> { + let expr_a = binary_expr(col("c"), Operator::Multiply, lit(1)); + let expr_b = binary_expr(lit(1), Operator::Multiply, col("c")); + let expected = col("c"); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + + #[test] + fn test_simplify_divide_by_one() -> Result<()> { + let expr = binary_expr(col("c"), Operator::Divide, lit(1)); + let expected = col("c"); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_divide_by_same() -> Result<()> { + let expr = binary_expr(col("c"), Operator::Divide, col("c")); + let expected = lit(1); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_cancel_sub() -> Result<()> { + let expr = binary_expr(col("c"), Operator::Minus, col("c")); + let expected = lit(0); + + assert_eq!(simplify(&expr), expected); + Ok(()) + } + + #[test] + fn test_simplify_plus_zero() -> Result<()> { + let expr_a = binary_expr(col("c"), Operator::Plus, lit(0)); + let expr_b = binary_expr(lit(0), Operator::Plus, col("c")); + let expected = col("c"); + + assert_eq!(simplify(&expr_a), expected); + assert_eq!(simplify(&expr_b), expected); + Ok(()) + } + #[test] fn test_simplify_simple_and() -> Result<()> { // (c > 5) AND (c > 5) From 4ade2ba8b5229e04eab43dc13794bda39e069d71 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Tue, 1 Jun 2021 20:57:35 +0200 Subject: [PATCH 06/15] Remove unnecessary lifetime specifiers and other fixes ... --- .../src/optimizer/remove_duplicate_filters.rs | 159 +++--------------- 1 file changed, 27 insertions(+), 132 deletions(-) diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index dbcd78287350..d6540084ce7d 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -27,12 +27,12 @@ use crate::{error::Result, logical_plan::Operator}; /// # Introduction /// It uses boolean algebra laws to simplify or reduce the number of terms in expressions. /// -/// Filter: #b Gt Int32(2) And #b Gt Int32(2) +/// Filter: b > 2 AND b > 2 /// is optimized to -/// Filter: #b Gt Int32(2) +/// Filter: b > 2 pub struct RemoveDuplicateFilters {} -fn expr_contains<'a>(expr: &'a Expr, needle: &'a Expr) -> bool { +fn expr_contains(expr: &Expr, needle: &Expr) -> bool { match expr { Expr::BinaryExpr { left, @@ -48,18 +48,18 @@ fn expr_contains<'a>(expr: &'a Expr, needle: &'a Expr) -> bool { } } -fn as_binary_expr<'a>(expr: &'a Expr) -> Option<&'a Expr> { +fn as_binary_expr(expr: &Expr) -> Option<&Expr> { match expr { Expr::BinaryExpr { .. } => Some(expr), _ => None, } } -fn operator_is_boolean(op: &Operator) -> bool { - op == &Operator::And || op == &Operator::Or +fn operator_is_boolean(op: Operator) -> bool { + op == Operator::And || op == Operator::Or } -fn is_one<'a>(s: &'a Expr) -> bool { +fn is_one(s: &Expr) -> bool { match s { Expr::Literal(ScalarValue::Int8(Some(1))) => true, Expr::Literal(ScalarValue::Int16(Some(1))) => true, @@ -71,41 +71,25 @@ fn is_one<'a>(s: &'a Expr) -> bool { Expr::Literal(ScalarValue::UInt64(Some(1))) => true, Expr::Literal(ScalarValue::Float32(Some(v))) if *v == 1. => true, Expr::Literal(ScalarValue::Float64(Some(v))) if *v == 1. => true, - _ => false - } -} - -fn is_zero<'a>(s: &'a Expr) -> bool { - match s { - Expr::Literal(ScalarValue::Int8(Some(0))) => true, - Expr::Literal(ScalarValue::Int16(Some(0))) => true, - Expr::Literal(ScalarValue::Int32(Some(0))) => true, - Expr::Literal(ScalarValue::Int64(Some(0))) => true, - Expr::Literal(ScalarValue::UInt8(Some(0))) => true, - Expr::Literal(ScalarValue::UInt16(Some(0))) => true, - Expr::Literal(ScalarValue::UInt32(Some(0))) => true, - Expr::Literal(ScalarValue::UInt64(Some(0))) => true, - Expr::Literal(ScalarValue::Float32(Some(v))) if *v == 0. => true, - Expr::Literal(ScalarValue::Float64(Some(v))) if *v == 0. => true, - _ => false + _ => false, } } -fn is_true<'a>(expr: &'a Expr) -> bool { +fn is_true(expr: &Expr) -> bool { match expr { Expr::Literal(ScalarValue::Boolean(Some(v))) => *v, _ => false, } } -fn is_false<'a>(expr: &'a Expr) -> bool { +fn is_false(expr: &Expr) -> bool { match expr { Expr::Literal(ScalarValue::Boolean(Some(v))) => *v == false, _ => false, } } -fn simplify<'a>(expr: &'a Expr) -> Expr { +fn simplify(expr: &Expr) -> Expr { match expr { Expr::BinaryExpr { left, @@ -147,58 +131,28 @@ fn simplify<'a>(expr: &'a Expr) -> Expr { op: Operator::And, right, } if left == right => simplify(right), - Expr::BinaryExpr { - left, - op: Operator::Minus, - right - } if is_zero(left) => Expr::Negative(Box::new(simplify(right))), - Expr::BinaryExpr { - left, - op: Operator::Minus, - right - } if is_zero(right) => simplify(left), - Expr::BinaryExpr { - left, - op: Operator::Minus, - right - } if left == right => lit(0), Expr::BinaryExpr { left, op: Operator::Multiply, - right - } if is_zero(left) || is_zero(right) => lit(0), - Expr::BinaryExpr { - left, - op: Operator::Multiply, - right + right, } if is_one(left) => simplify(right), Expr::BinaryExpr { left, op: Operator::Multiply, - right + right, } if is_one(right) => simplify(left), Expr::BinaryExpr { left, op: Operator::Divide, - right + right, } if is_one(right) => simplify(left), Expr::BinaryExpr { left, op: Operator::Divide, - right + right, } if left == right => lit(1), - Expr::BinaryExpr { - left, - op: Operator::Plus, - right - } if is_zero(left) => simplify(right), - Expr::BinaryExpr { - left, - op: Operator::Plus, - right - } if is_zero(right) => simplify(left), Expr::BinaryExpr { left, op, right } - if left == right && operator_is_boolean(op) => + if left == right && operator_is_boolean(*op) => { simplify(left) } @@ -288,27 +242,17 @@ fn simplify<'a>(expr: &'a Expr) -> Expr { } fn optimize(plan: &LogicalPlan) -> Result { - match plan { - LogicalPlan::Filter { input, predicate } => Ok(LogicalPlan::Filter { - input: input.clone(), - predicate: simplify(predicate), - }), - LogicalPlan::Projection { expr, input, schema } => Ok(LogicalPlan::Projection { - expr: expr.into_iter().map(|x| simplify(x)).collect::>(), - input: input.clone(), - schema: schema.clone(), - }), - _ => { - let new_inputs = plan - .inputs() - .iter() - .map(|input| optimize(input)) - .collect::>>()?; - - let expr = plan.expressions(); - utils::from_plan(&plan, &expr, &new_inputs) - } - } + let new_inputs = plan + .inputs() + .iter() + .map(|input| optimize(input)) + .collect::>>()?; + let expr = plan + .expressions() + .into_iter() + .map(|x| simplify(&x)) + .collect::>(); + utils::from_plan(&plan, &expr, &new_inputs) } impl OptimizerRule for RemoveDuplicateFilters { @@ -427,35 +371,6 @@ mod tests { Ok(()) } - #[test] - fn test_simplify_minus_zero() -> Result<()> { - let expr = binary_expr(lit(0), Operator::Minus, col("c")); - let expected = Expr::Negative(Box::new(col("c"))); - - assert_eq!(simplify(&expr), expected); - Ok(()) - } - - #[test] - fn test_simplify_minus_same() -> Result<()> { - let expr = binary_expr(col("c"), Operator::Minus, col("c")); - let expected = lit(0); - - assert_eq!(simplify(&expr), expected); - Ok(()) - } - - #[test] - fn test_simplify_multiply_by_zero() -> Result<()> { - let expr_a = binary_expr(col("c"), Operator::Multiply, lit(0)); - let expr_b = binary_expr(lit(0), Operator::Multiply, col("c")); - let expected = lit(0); - - assert_eq!(simplify(&expr_a), expected); - assert_eq!(simplify(&expr_b), expected); - Ok(()) - } - #[test] fn test_simplify_multiply_by_one() -> Result<()> { let expr_a = binary_expr(col("c"), Operator::Multiply, lit(1)); @@ -485,26 +400,6 @@ mod tests { Ok(()) } - #[test] - fn test_simplify_cancel_sub() -> Result<()> { - let expr = binary_expr(col("c"), Operator::Minus, col("c")); - let expected = lit(0); - - assert_eq!(simplify(&expr), expected); - Ok(()) - } - - #[test] - fn test_simplify_plus_zero() -> Result<()> { - let expr_a = binary_expr(col("c"), Operator::Plus, lit(0)); - let expr_b = binary_expr(lit(0), Operator::Plus, col("c")); - let expected = col("c"); - - assert_eq!(simplify(&expr_a), expected); - assert_eq!(simplify(&expr_b), expected); - Ok(()) - } - #[test] fn test_simplify_simple_and() -> Result<()> { // (c > 5) AND (c > 5) From 55fb021a4a586caae0f7586e4757ccb51e8ccf5d Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Tue, 1 Jun 2021 22:15:49 +0200 Subject: [PATCH 07/15] RemoveDuplicateFilter: fix is_one matches --- .../src/optimizer/remove_duplicate_filters.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/remove_duplicate_filters.rs index d6540084ce7d..968526be66e5 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/remove_duplicate_filters.rs @@ -61,14 +61,14 @@ fn operator_is_boolean(op: Operator) -> bool { fn is_one(s: &Expr) -> bool { match s { - Expr::Literal(ScalarValue::Int8(Some(1))) => true, - Expr::Literal(ScalarValue::Int16(Some(1))) => true, - Expr::Literal(ScalarValue::Int32(Some(1))) => true, - Expr::Literal(ScalarValue::Int64(Some(1))) => true, - Expr::Literal(ScalarValue::UInt8(Some(1))) => true, - Expr::Literal(ScalarValue::UInt16(Some(1))) => true, - Expr::Literal(ScalarValue::UInt32(Some(1))) => true, - Expr::Literal(ScalarValue::UInt64(Some(1))) => true, + Expr::Literal(ScalarValue::Int8(Some(1))) + | Expr::Literal(ScalarValue::Int16(Some(1))) + | Expr::Literal(ScalarValue::Int32(Some(1))) + | Expr::Literal(ScalarValue::Int64(Some(1))) + | Expr::Literal(ScalarValue::UInt8(Some(1))) + | Expr::Literal(ScalarValue::UInt16(Some(1))) + | Expr::Literal(ScalarValue::UInt32(Some(1))) + | Expr::Literal(ScalarValue::UInt64(Some(1))) => true, Expr::Literal(ScalarValue::Float32(Some(v))) if *v == 1. => true, Expr::Literal(ScalarValue::Float64(Some(v))) if *v == 1. => true, _ => false, From 25adf5334e79cdbb3c03b6ab0330f95d7baf9d0f Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Tue, 1 Jun 2021 22:25:06 +0200 Subject: [PATCH 08/15] Change remove_duplicate_filter to simplify_expressions --- datafusion/src/execution/context.rs | 4 ++-- datafusion/src/optimizer/mod.rs | 2 +- ..._duplicate_filters.rs => simplify_expressions.rs} | 12 ++++++------ 3 files changed, 9 insertions(+), 9 deletions(-) rename datafusion/src/optimizer/{remove_duplicate_filters.rs => simplify_expressions.rs} (98%) diff --git a/datafusion/src/execution/context.rs b/datafusion/src/execution/context.rs index f7989e21ceee..27b1962c8687 100644 --- a/datafusion/src/execution/context.rs +++ b/datafusion/src/execution/context.rs @@ -59,7 +59,7 @@ use crate::optimizer::filter_push_down::FilterPushDown; use crate::optimizer::limit_push_down::LimitPushDown; use crate::optimizer::optimizer::OptimizerRule; use crate::optimizer::projection_push_down::ProjectionPushDown; -use crate::optimizer::remove_duplicate_filters::RemoveDuplicateFilters; +use crate::optimizer::simplify_expressions::SimplifyExpressions; use crate::physical_optimizer::coalesce_batches::CoalesceBatches; use crate::physical_optimizer::merge_exec::AddMergeExec; use crate::physical_optimizer::repartition::Repartition; @@ -653,7 +653,7 @@ impl ExecutionConfig { Arc::new(EliminateLimit::new()), Arc::new(ProjectionPushDown::new()), Arc::new(FilterPushDown::new()), - Arc::new(RemoveDuplicateFilters::new()), + Arc::new(SimplifyExpressions::new()), Arc::new(HashBuildProbeOrder::new()), Arc::new(LimitPushDown::new()), ], diff --git a/datafusion/src/optimizer/mod.rs b/datafusion/src/optimizer/mod.rs index b5f1c9a6806c..e360a54f2a96 100644 --- a/datafusion/src/optimizer/mod.rs +++ b/datafusion/src/optimizer/mod.rs @@ -25,5 +25,5 @@ pub mod hash_build_probe_order; pub mod limit_push_down; pub mod optimizer; pub mod projection_push_down; -pub mod remove_duplicate_filters; +pub mod simplify_expressions; pub mod utils; diff --git a/datafusion/src/optimizer/remove_duplicate_filters.rs b/datafusion/src/optimizer/simplify_expressions.rs similarity index 98% rename from datafusion/src/optimizer/remove_duplicate_filters.rs rename to datafusion/src/optimizer/simplify_expressions.rs index 968526be66e5..f564ec49fa97 100644 --- a/datafusion/src/optimizer/remove_duplicate_filters.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -23,14 +23,14 @@ use crate::optimizer::utils::optimize_explain; use crate::scalar::ScalarValue; use crate::{error::Result, logical_plan::Operator}; -/// Remove duplicate filters optimizer. +/// Simplify expressions optimizer. /// # Introduction /// It uses boolean algebra laws to simplify or reduce the number of terms in expressions. /// /// Filter: b > 2 AND b > 2 /// is optimized to /// Filter: b > 2 -pub struct RemoveDuplicateFilters {} +pub struct SimplifyExpressions {} fn expr_contains(expr: &Expr, needle: &Expr) -> bool { match expr { @@ -255,9 +255,9 @@ fn optimize(plan: &LogicalPlan) -> Result { utils::from_plan(&plan, &expr, &new_inputs) } -impl OptimizerRule for RemoveDuplicateFilters { +impl OptimizerRule for SimplifyExpressions { fn name(&self) -> &str { - "remove_duplicate_filters" + "simplify_expressions" } fn optimize( @@ -287,7 +287,7 @@ impl OptimizerRule for RemoveDuplicateFilters { } } -impl RemoveDuplicateFilters { +impl SimplifyExpressions { #[allow(missing_docs)] pub fn new() -> Self { Self {} @@ -301,7 +301,7 @@ mod tests { use crate::test::*; fn assert_optimized_plan_eq(plan: &LogicalPlan, expected: &str) { - let rule = RemoveDuplicateFilters::new(); + let rule = SimplifyExpressions::new(); let optimized_plan = rule .optimize(plan, &ExecutionProps::new()) .expect("failed to optimize plan"); From 2e777a8580972dcc460e883aefc64afb609416db Mon Sep 17 00:00:00 2001 From: jgoday Date: Wed, 2 Jun 2021 12:13:13 +0200 Subject: [PATCH 09/15] fix simplify expressions --- .../src/optimizer/simplify_expressions.rs | 53 +++++++++++++++---- 1 file changed, 42 insertions(+), 11 deletions(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index f564ec49fa97..781f404ef4c8 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -82,6 +82,13 @@ fn is_true(expr: &Expr) -> bool { } } +fn is_null(expr: &Expr) -> bool { + match expr { + Expr::Literal(v) => v.is_null(), + _ => false, + } +} + fn is_false(expr: &Expr) -> bool { match expr { Expr::Literal(ScalarValue::Boolean(Some(v))) => *v == false, @@ -146,6 +153,11 @@ fn simplify(expr: &Expr) -> Expr { op: Operator::Divide, right, } if is_one(right) => simplify(left), + Expr::BinaryExpr { + left, + op: Operator::Divide, + right, + } if left == right && is_null(left) => *left.clone(), Expr::BinaryExpr { left, op: Operator::Divide, @@ -166,12 +178,12 @@ fn simplify(expr: &Expr) -> Expr { left: _, op: Operator::Or, right: _, - } => x.clone(), + } => simplify(&x.clone()), Expr::BinaryExpr { left: _, op: Operator::And, right: _, - } => *right.clone(), + } => simplify(&*right.clone()), _ => expr.clone(), }) .unwrap_or(expr.clone()), @@ -185,12 +197,12 @@ fn simplify(expr: &Expr) -> Expr { left: _, op: Operator::Or, right: _, - } => *right.clone(), + } => simplify(&*right.clone()), Expr::BinaryExpr { left: _, op: Operator::And, right: _, - } => *left.clone(), + } => simplify(&*left.clone()), _ => expr.clone(), }) .unwrap_or(expr.clone()), @@ -204,12 +216,12 @@ fn simplify(expr: &Expr) -> Expr { left: _, op: Operator::Or, right: _, - } => *right.clone(), + } => simplify(&*right.clone()), Expr::BinaryExpr { left: _, op: Operator::And, right: _, - } => x.clone(), + } => simplify(&x.clone()), _ => expr.clone(), }) .unwrap_or(expr.clone()), @@ -223,12 +235,12 @@ fn simplify(expr: &Expr) -> Expr { left: _, op: Operator::Or, right: _, - } => *left.clone(), + } => simplify(&*left.clone()), Expr::BinaryExpr { left: _, op: Operator::And, right: _, - } => x.clone(), + } => simplify(&x.clone()), _ => expr.clone(), }) .unwrap_or(expr.clone()), @@ -454,7 +466,26 @@ mod tests { } #[test] - fn test_do_not_simplify_arithmetic_expr() -> Result<()> { + fn test_simplify_and_and_false() -> Result<()> { + let expr = binary_expr(lit(ScalarValue::Boolean(None)), Operator::And, lit(false)); + let expr_eq = lit(false); + + assert_eq!(simplify(&expr), expr_eq); + Ok(()) + } + + #[test] + fn test_simplify_divide_null_by_null() -> Result<()> { + let null = Expr::Literal(ScalarValue::Int32(None)); + let expr_plus = binary_expr(null.clone(), Operator::Divide, null.clone()); + let expr_eq = null; + + assert_eq!(simplify(&expr_plus), expr_eq); + Ok(()) + } + + #[test] + fn test_simplify_do_not_simplify_arithmetic_expr() -> Result<()> { let expr_plus = binary_expr(lit(1), Operator::Plus, lit(1)); let expr_eq = binary_expr(lit(1), Operator::Eq, lit(1)); @@ -465,7 +496,7 @@ mod tests { } #[test] - fn test_optimized_plan() -> Result<()> { + fn test_simplify_optimized_plan() -> Result<()> { let table_scan = test_table_scan()?; let plan = LogicalPlanBuilder::from(&table_scan) .project(vec![col("a")])? @@ -484,7 +515,7 @@ mod tests { // ((c > 5) AND (d < 6)) AND (c > 5) --> (c > 5) AND (d < 6) #[test] - fn test_optimized_plan_with_composed_and() -> Result<()> { + fn test_simplify_optimized_plan_with_composed_and() -> Result<()> { let table_scan = test_table_scan()?; let plan = LogicalPlanBuilder::from(&table_scan) .project(vec![col("a")])? From c79899bce6d288e224b677128dd4326348b299d4 Mon Sep 17 00:00:00 2001 From: jgoday Date: Wed, 2 Jun 2021 13:29:40 +0200 Subject: [PATCH 10/15] fix lint and clippy --- datafusion/src/optimizer/simplify_expressions.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index 781f404ef4c8..797192560286 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -91,7 +91,7 @@ fn is_null(expr: &Expr) -> bool { fn is_false(expr: &Expr) -> bool { match expr { - Expr::Literal(ScalarValue::Boolean(Some(v))) => *v == false, + Expr::Literal(ScalarValue::Boolean(Some(v))) => !(*v), _ => false, } } @@ -186,7 +186,7 @@ fn simplify(expr: &Expr) -> Expr { } => simplify(&*right.clone()), _ => expr.clone(), }) - .unwrap_or(expr.clone()), + .unwrap_or_else(|| expr.clone()), Expr::BinaryExpr { left, op: Operator::Or, @@ -205,7 +205,7 @@ fn simplify(expr: &Expr) -> Expr { } => simplify(&*left.clone()), _ => expr.clone(), }) - .unwrap_or(expr.clone()), + .unwrap_or_else(|| expr.clone()), Expr::BinaryExpr { left, op: Operator::And, @@ -224,7 +224,7 @@ fn simplify(expr: &Expr) -> Expr { } => simplify(&x.clone()), _ => expr.clone(), }) - .unwrap_or(expr.clone()), + .unwrap_or_else(|| expr.clone()), Expr::BinaryExpr { left, op: Operator::And, @@ -243,7 +243,7 @@ fn simplify(expr: &Expr) -> Expr { } => simplify(&x.clone()), _ => expr.clone(), }) - .unwrap_or(expr.clone()), + .unwrap_or_else(|| expr.clone()), Expr::BinaryExpr { left, op, right } => Expr::BinaryExpr { left: Box::new(simplify(&left)), op: *op, @@ -467,7 +467,8 @@ mod tests { #[test] fn test_simplify_and_and_false() -> Result<()> { - let expr = binary_expr(lit(ScalarValue::Boolean(None)), Operator::And, lit(false)); + let expr = + binary_expr(lit(ScalarValue::Boolean(None)), Operator::And, lit(false)); let expr_eq = lit(false); assert_eq!(simplify(&expr), expr_eq); From f161d6f21274084dae9e5984cb82b98d8bf60114 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Thu, 3 Jun 2021 07:34:48 +0200 Subject: [PATCH 11/15] Update datafusion/src/optimizer/simplify_expressions.rs Co-authored-by: Andrew Lamb --- datafusion/src/optimizer/simplify_expressions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index 797192560286..6c8b40744389 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -453,7 +453,7 @@ mod tests { #[test] fn test_simplify_or_and() -> Result<()> { - // (c > 5) OR ((d < 6) AND (c > 5) -- can't remove + // (c > 5) OR ((d < 6) AND (c > 5) -- can remove let expr = binary_expr( col("c").gt(lit(5)), Operator::Or, From 71b95a23b4ce1601de6e6d846f6cd669290960e4 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Thu, 3 Jun 2021 07:35:07 +0200 Subject: [PATCH 12/15] Update datafusion/src/optimizer/simplify_expressions.rs Co-authored-by: Andrew Lamb --- datafusion/src/optimizer/simplify_expressions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index 6c8b40744389..be0b05a615fa 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -323,7 +323,7 @@ mod tests { #[test] fn test_simplify_or_true() -> Result<()> { - let expr_a = binary_expr(col("c"), Operator::Or, lit(true)); + let expr_a = col("c").or(lit(true))); let expr_b = binary_expr(lit(true), Operator::Or, col("c")); let expected = lit(true); From 618f34c209ebb6138492e7e4ca6bdb9f03e47afa Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Thu, 3 Jun 2021 07:35:37 +0200 Subject: [PATCH 13/15] Update datafusion/src/optimizer/simplify_expressions.rs Co-authored-by: Andrew Lamb --- datafusion/src/optimizer/simplify_expressions.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index be0b05a615fa..73c893487340 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -1,3 +1,6 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance From 538ccdb5a8a1b462148e4fd51a2bfeec051b06a9 Mon Sep 17 00:00:00 2001 From: Javier Goday Date: Thu, 3 Jun 2021 07:35:55 +0200 Subject: [PATCH 14/15] Update datafusion/src/optimizer/simplify_expressions.rs Co-authored-by: Andrew Lamb --- datafusion/src/optimizer/simplify_expressions.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index 73c893487340..854963a79bc1 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -15,7 +15,7 @@ // specific language governing permissions and limitations // under the License. -//! Remove duplicate filters optimizer rule +//! Simplify expressions optimizer rule use crate::execution::context::ExecutionProps; use crate::logical_plan::LogicalPlan; From af2c6ac8c76a87cca1dc0ce6241f2e103dfb3d0c Mon Sep 17 00:00:00 2001 From: jgoday Date: Thu, 3 Jun 2021 08:48:03 +0200 Subject: [PATCH 15/15] simplify test expressions --- .../src/optimizer/simplify_expressions.rs | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/datafusion/src/optimizer/simplify_expressions.rs b/datafusion/src/optimizer/simplify_expressions.rs index 854963a79bc1..0697d689c401 100644 --- a/datafusion/src/optimizer/simplify_expressions.rs +++ b/datafusion/src/optimizer/simplify_expressions.rs @@ -326,8 +326,8 @@ mod tests { #[test] fn test_simplify_or_true() -> Result<()> { - let expr_a = col("c").or(lit(true))); - let expr_b = binary_expr(lit(true), Operator::Or, col("c")); + let expr_a = col("c").or(lit(true)); + let expr_b = lit(true).or(col("c")); let expected = lit(true); assert_eq!(simplify(&expr_a), expected); @@ -337,8 +337,8 @@ mod tests { #[test] fn test_simplify_or_false() -> Result<()> { - let expr_a = binary_expr(lit(false), Operator::Or, col("c")); - let expr_b = binary_expr(col("c"), Operator::Or, lit(false)); + let expr_a = lit(false).or(col("c")); + let expr_b = col("c").or(lit(false)); let expected = col("c"); assert_eq!(simplify(&expr_a), expected); @@ -348,7 +348,7 @@ mod tests { #[test] fn test_simplify_or_same() -> Result<()> { - let expr = binary_expr(col("c"), Operator::Or, col("c")); + let expr = col("c").or(col("c")); let expected = col("c"); assert_eq!(simplify(&expr), expected); @@ -357,8 +357,8 @@ mod tests { #[test] fn test_simplify_and_false() -> Result<()> { - let expr_a = binary_expr(lit(false), Operator::And, col("c")); - let expr_b = binary_expr(col("c"), Operator::And, lit(false)); + let expr_a = lit(false).and(col("c")); + let expr_b = col("c").and(lit(false)); let expected = lit(false); assert_eq!(simplify(&expr_a), expected); @@ -368,7 +368,7 @@ mod tests { #[test] fn test_simplify_and_same() -> Result<()> { - let expr = binary_expr(col("c"), Operator::And, col("c")); + let expr = col("c").and(col("c")); let expected = col("c"); assert_eq!(simplify(&expr), expected); @@ -377,8 +377,8 @@ mod tests { #[test] fn test_simplify_and_true() -> Result<()> { - let expr_a = binary_expr(lit(true), Operator::And, col("c")); - let expr_b = binary_expr(col("c"), Operator::And, lit(true)); + let expr_a = lit(true).and(col("c")); + let expr_b = col("c").and(lit(true)); let expected = col("c"); assert_eq!(simplify(&expr_a), expected); @@ -418,7 +418,7 @@ mod tests { #[test] fn test_simplify_simple_and() -> Result<()> { // (c > 5) AND (c > 5) - let expr = binary_expr(col("c").gt(lit(5)), Operator::And, col("c").gt(lit(5))); + let expr = (col("c").gt(lit(5))).and(col("c").gt(lit(5))); let expected = col("c").gt(lit(5)); assert_eq!(simplify(&expr), expected);