From c75922fe866ced92bd99b36572e641c3572a2fc8 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Sat, 30 Oct 2021 20:25:30 -0400 Subject: [PATCH 1/6] Add Or branch to constant folding --- datafusion/src/optimizer/constant_folding.rs | 32 ++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 8c29da4172ba..a4957d9b1933 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -210,6 +210,38 @@ impl<'a> ExprRewriter for Simplifier<'a> { right, }, }, + Operator::Or => match (left.as_ref(), right.as_ref()) { + ( + Expr::Literal(ScalarValue::Boolean(l)), + Expr::Literal(ScalarValue::Boolean(r)), + ) => match (l, r) { + (Some(l), Some(r)) => { + Expr::Literal(ScalarValue::Boolean(Some(*l || *r))) + } + _ => Expr::Literal(ScalarValue::Boolean(None)), + }, + (Expr::Literal(ScalarValue::Boolean(b)), _) + if self.is_boolean_type(&right) => + { + match b { + Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), + _ => *right, + } + } + (_, Expr::Literal(ScalarValue::Boolean(b))) + if self.is_boolean_type(&left) => + { + match b { + Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), + _ => *left, + } + } + _ => Expr::BinaryExpr { + left, + op: Operator::Or, + right, + }, + }, _ => Expr::BinaryExpr { left, op, right }, }, // Not(Not(expr)) --> expr From ff060ea9a7d90a8ea67b8700034dc47b7db09ab7 Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Sat, 30 Oct 2021 21:30:10 -0400 Subject: [PATCH 2/6] Add test for Or --- datafusion/src/optimizer/constant_folding.rs | 23 ++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index a4957d9b1933..d23a758c3b93 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -843,4 +843,27 @@ mod tests { assert_eq!(expected, actual); } + #[test] + fn optimize_expr_bool() -> Result<()> { + let schema = expr_test_schema(); + let mut rewriter = Simplifier { + schemas: vec![&schema], + }; + + // col || true is always true + assert_eq!( + (col("c2").or(Expr::Literal(ScalarValue::Boolean(Some(true))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(true))), + ); + + // col || false is always col + assert_eq!( + (col("c2").or(Expr::Literal(ScalarValue::Boolean(Some(false))))) + .rewrite(&mut rewriter)?, + col("c2"), + ); + + Ok(()) + } } From e04a8dd149f79455af703a647cca9fc1f40c4a5a Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Mon, 1 Nov 2021 00:50:23 -0400 Subject: [PATCH 3/6] Simplify or match --- datafusion/src/optimizer/constant_folding.rs | 26 ++++---------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index d23a758c3b93..d0eae9e9fff4 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -211,29 +211,13 @@ impl<'a> ExprRewriter for Simplifier<'a> { }, }, Operator::Or => match (left.as_ref(), right.as_ref()) { - ( - Expr::Literal(ScalarValue::Boolean(l)), - Expr::Literal(ScalarValue::Boolean(r)), - ) => match (l, r) { - (Some(l), Some(r)) => { - Expr::Literal(ScalarValue::Boolean(Some(*l || *r))) - } - _ => Expr::Literal(ScalarValue::Boolean(None)), - }, - (Expr::Literal(ScalarValue::Boolean(b)), _) - if self.is_boolean_type(&right) => + (Expr::Literal(ScalarValue::Boolean(lit)), col) + | (col, Expr::Literal(ScalarValue::Boolean(lit))) + if self.is_boolean_type(col) => { - match b { - Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), - _ => *right, - } - } - (_, Expr::Literal(ScalarValue::Boolean(b))) - if self.is_boolean_type(&left) => - { - match b { + match lit { Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), - _ => *left, + _ => *col, } } _ => Expr::BinaryExpr { From fc715decc34b5d0f73e395c41534d1c7f485711c Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Mon, 1 Nov 2021 12:09:35 -0400 Subject: [PATCH 4/6] Add And operator --- datafusion/src/optimizer/constant_folding.rs | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index d0eae9e9fff4..6e9c8c42f424 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -226,6 +226,24 @@ impl<'a> ExprRewriter for Simplifier<'a> { right, }, }, + Operator::And => match (left.as_ref(), right.as_ref()) { + (Expr::Literal(ScalarValue::Boolean(lit)), col) + | (col, Expr::Literal(ScalarValue::Boolean(lit))) + if self.is_boolean_type(col) => + { + match lit { + Some(false) => { + Expr::Literal(ScalarValue::Boolean(Some(false))) + } + _ => *col, + } + } + _ => Expr::BinaryExpr { + left, + op: Operator::And, + right, + }, + }, _ => Expr::BinaryExpr { left, op, right }, }, // Not(Not(expr)) --> expr From c240e8355ad1d4b652200e5de730830b01e63d1b Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Mon, 1 Nov 2021 15:44:49 -0400 Subject: [PATCH 5/6] Updated Or and And logic --- datafusion/src/optimizer/constant_folding.rs | 78 +++++++++++++++++--- 1 file changed, 67 insertions(+), 11 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 6e9c8c42f424..75da20ccd6f8 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -211,13 +211,29 @@ impl<'a> ExprRewriter for Simplifier<'a> { }, }, Operator::Or => match (left.as_ref(), right.as_ref()) { - (Expr::Literal(ScalarValue::Boolean(lit)), col) - | (col, Expr::Literal(ScalarValue::Boolean(lit))) - if self.is_boolean_type(col) => + ( + Expr::Literal(ScalarValue::Boolean(l)), + Expr::Literal(ScalarValue::Boolean(r)), + ) => match (l, r) { + (Some(l), Some(r)) => { + Expr::Literal(ScalarValue::Boolean(Some(*l || *r))) + } + _ => Expr::Literal(ScalarValue::Boolean(None)), + }, + (Expr::Literal(ScalarValue::Boolean(b)), _) + if self.is_boolean_type(&right) => { - match lit { + match b { Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), - _ => *col, + _ => *right, + } + } + (_, Expr::Literal(ScalarValue::Boolean(b))) + if self.is_boolean_type(&left) => + { + match b { + Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), + _ => *left, } } _ => Expr::BinaryExpr { @@ -227,15 +243,33 @@ impl<'a> ExprRewriter for Simplifier<'a> { }, }, Operator::And => match (left.as_ref(), right.as_ref()) { - (Expr::Literal(ScalarValue::Boolean(lit)), col) - | (col, Expr::Literal(ScalarValue::Boolean(lit))) - if self.is_boolean_type(col) => + ( + Expr::Literal(ScalarValue::Boolean(l)), + Expr::Literal(ScalarValue::Boolean(r)), + ) => match (l, r) { + (Some(l), Some(r)) => { + Expr::Literal(ScalarValue::Boolean(Some(*l && *r))) + } + _ => Expr::Literal(ScalarValue::Boolean(None)), + }, + (Expr::Literal(ScalarValue::Boolean(b)), _) + if self.is_boolean_type(&right) => + { + match b { + Some(false) => { + Expr::Literal(ScalarValue::Boolean(Some(false))) + } + _ => *right, + } + } + (_, Expr::Literal(ScalarValue::Boolean(b))) + if self.is_boolean_type(&left) => { - match lit { + match b { Some(false) => { Expr::Literal(ScalarValue::Boolean(Some(false))) } - _ => *col, + _ => *left, } } _ => Expr::BinaryExpr { @@ -846,7 +880,7 @@ mod tests { assert_eq!(expected, actual); } #[test] - fn optimize_expr_bool() -> Result<()> { + fn optimize_expr_bool_or() -> Result<()> { let schema = expr_test_schema(); let mut rewriter = Simplifier { schemas: vec![&schema], @@ -866,6 +900,28 @@ mod tests { col("c2"), ); + Ok(()) + } + #[test] + fn optimize_expr_bool_and() -> Result<()> { + let schema = expr_test_schema(); + let mut rewriter = Simplifier { + schemas: vec![&schema], + }; + + // col & true is always col + assert_eq!( + (col("c2").and(Expr::Literal(ScalarValue::Boolean(Some(true))))) + .rewrite(&mut rewriter)?, + col("c2"), + ); + // col & false is always false + assert_eq!( + (col("c2").and(Expr::Literal(ScalarValue::Boolean(Some(false))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(false))), + ); + Ok(()) } } From 056508da56998702f52749cc40cb4e75f32ab87d Mon Sep 17 00:00:00 2001 From: Matthew Turner Date: Tue, 2 Nov 2021 12:19:43 -0400 Subject: [PATCH 6/6] Updated Or and And to handle nulls and added tests --- datafusion/src/optimizer/constant_folding.rs | 136 ++++++++++++++++--- 1 file changed, 115 insertions(+), 21 deletions(-) diff --git a/datafusion/src/optimizer/constant_folding.rs b/datafusion/src/optimizer/constant_folding.rs index 75da20ccd6f8..dace62201e7f 100644 --- a/datafusion/src/optimizer/constant_folding.rs +++ b/datafusion/src/optimizer/constant_folding.rs @@ -211,21 +211,26 @@ impl<'a> ExprRewriter for Simplifier<'a> { }, }, Operator::Or => match (left.as_ref(), right.as_ref()) { - ( - Expr::Literal(ScalarValue::Boolean(l)), - Expr::Literal(ScalarValue::Boolean(r)), - ) => match (l, r) { - (Some(l), Some(r)) => { - Expr::Literal(ScalarValue::Boolean(Some(*l || *r))) - } - _ => Expr::Literal(ScalarValue::Boolean(None)), - }, (Expr::Literal(ScalarValue::Boolean(b)), _) if self.is_boolean_type(&right) => { match b { Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), - _ => *right, + Some(false) => match *right { + Expr::Literal(ScalarValue::Boolean(None)) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + _ => *right, + }, + None => match *right { + Expr::Literal(ScalarValue::Boolean(Some(true))) => { + Expr::Literal(ScalarValue::Boolean(Some(true))) + } + Expr::Literal(ScalarValue::Boolean(Some(false))) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + _ => *right, + }, } } (_, Expr::Literal(ScalarValue::Boolean(b))) @@ -233,7 +238,21 @@ impl<'a> ExprRewriter for Simplifier<'a> { { match b { Some(true) => Expr::Literal(ScalarValue::Boolean(Some(true))), - _ => *left, + Some(false) => match *left { + Expr::Literal(ScalarValue::Boolean(None)) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + _ => *left, + }, + None => match *left { + Expr::Literal(ScalarValue::Boolean(Some(true))) => { + Expr::Literal(ScalarValue::Boolean(Some(true))) + } + Expr::Literal(ScalarValue::Boolean(Some(false))) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + _ => *left, + }, } } _ => Expr::BinaryExpr { @@ -243,23 +262,34 @@ impl<'a> ExprRewriter for Simplifier<'a> { }, }, Operator::And => match (left.as_ref(), right.as_ref()) { - ( - Expr::Literal(ScalarValue::Boolean(l)), - Expr::Literal(ScalarValue::Boolean(r)), - ) => match (l, r) { - (Some(l), Some(r)) => { - Expr::Literal(ScalarValue::Boolean(Some(*l && *r))) - } - _ => Expr::Literal(ScalarValue::Boolean(None)), - }, (Expr::Literal(ScalarValue::Boolean(b)), _) if self.is_boolean_type(&right) => { + // match b { + // Some(false) => { + // Expr::Literal(ScalarValue::Boolean(Some(false))) + // } + // _ => *right, + // } match b { + Some(true) => match *right { + Expr::Literal(ScalarValue::Boolean(None)) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + _ => *right, + }, Some(false) => { Expr::Literal(ScalarValue::Boolean(Some(false))) } - _ => *right, + None => match *right { + Expr::Literal(ScalarValue::Boolean(Some(true))) => { + Expr::Literal(ScalarValue::Boolean(None)) + } + Expr::Literal(ScalarValue::Boolean(Some(false))) => { + Expr::Literal(ScalarValue::Boolean(Some(false))) + } + _ => *right, + }, } } (_, Expr::Literal(ScalarValue::Boolean(b))) @@ -900,6 +930,38 @@ mod tests { col("c2"), ); + // true || null is always true + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(Some(true))) + .or(Expr::Literal(ScalarValue::Boolean(None)))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(true))), + ); + + // null || true is always true + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(None)) + .or(Expr::Literal(ScalarValue::Boolean(Some(true))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(true))), + ); + + // false || null is always null + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(Some(false))) + .or(Expr::Literal(ScalarValue::Boolean(None)))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(None)), + ); + + // null || false is always null + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(None)) + .or(Expr::Literal(ScalarValue::Boolean(Some(false))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(None)), + ); + Ok(()) } #[test] @@ -922,6 +984,38 @@ mod tests { lit(ScalarValue::Boolean(Some(false))), ); + // true && null is always null + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(Some(true))) + .and(Expr::Literal(ScalarValue::Boolean(None)))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(None)), + ); + + // null && true is always null + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(None)) + .and(Expr::Literal(ScalarValue::Boolean(Some(true))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(None)), + ); + + // false && null is always false + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(Some(false))) + .and(Expr::Literal(ScalarValue::Boolean(None)))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(false))), + ); + + // null && false is always false + assert_eq!( + (Expr::Literal(ScalarValue::Boolean(None)) + .and(Expr::Literal(ScalarValue::Boolean(Some(false))))) + .rewrite(&mut rewriter)?, + lit(ScalarValue::Boolean(Some(false))), + ); + Ok(()) } }