From 7e2535554d5a0661490b74ff4422798d98063214 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 7 Oct 2016 02:54:34 +0000 Subject: [PATCH 1/5] Support And and Or in Canonicalize. --- .../catalyst/expressions/Canonicalize.scala | 3 +++ .../expressions/ExpressionSetSuite.scala | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index 07ba7d5e4a849..7ef9d1e9c881b 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -62,6 +62,9 @@ object Canonicalize extends { case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add) case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply) + case o: Or => orderCommutative(o, { case Or(l, r) => Seq(l, r) }).reduce(Or) + case a: And => orderCommutative(a, { case And(l, r) => Seq(l, r)}).reduce(And) + case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l) case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index 60939ee0eda5d..55ee4f7aed0f1 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -80,6 +80,24 @@ class ExpressionSetSuite extends SparkFunSuite { setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper) setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper) + setTest(1, aUpper > bUpper && aUpper <= 10, aUpper <= 10 && aUpper > bUpper) + setTest(1, + aUpper > bUpper && + bUpper > 100 && + aUpper <= 10, + bUpper > 100 && + aUpper <= 10 && + aUpper > bUpper) + + setTest(1, aUpper > bUpper || aUpper <= 10, aUpper <= 10 || aUpper > bUpper) + setTest(1, + aUpper > bUpper || + bUpper > 100 || + aUpper <= 10, + bUpper > 100 || + aUpper <= 10 || + aUpper > bUpper) + test("add to / remove from set") { val initialSet = ExpressionSet(aUpper + 1 :: Nil) From 0a48d864299633402f4eb5a83a2be931473fde1b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 7 Oct 2016 06:42:46 +0000 Subject: [PATCH 2/5] Only reorder deterministic expressions. --- .../catalyst/expressions/Canonicalize.scala | 8 ++++++-- .../expressions/ExpressionSetSuite.scala | 19 +++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala index 7ef9d1e9c881b..e876450c73fde 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Canonicalize.scala @@ -62,8 +62,12 @@ object Canonicalize extends { case a: Add => orderCommutative(a, { case Add(l, r) => Seq(l, r) }).reduce(Add) case m: Multiply => orderCommutative(m, { case Multiply(l, r) => Seq(l, r) }).reduce(Multiply) - case o: Or => orderCommutative(o, { case Or(l, r) => Seq(l, r) }).reduce(Or) - case a: And => orderCommutative(a, { case And(l, r) => Seq(l, r)}).reduce(And) + case o: Or => + orderCommutative(o, { case Or(l, r) if l.deterministic && r.deterministic => Seq(l, r) }) + .reduce(Or) + case a: And => + orderCommutative(a, { case And(l, r) if l.deterministic && r.deterministic => Seq(l, r)}) + .reduce(And) case EqualTo(l, r) if l.hashCode() > r.hashCode() => EqualTo(r, l) case EqualNullSafe(l, r) if l.hashCode() > r.hashCode() => EqualNullSafe(r, l) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index 55ee4f7aed0f1..7f31c21d89ea6 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -98,6 +98,25 @@ class ExpressionSetSuite extends SparkFunSuite { aUpper <= 10 || aUpper > bUpper) + // Don't reorder non-deterministic expression in AND/OR. + setTest(2, Rand(1L) > aUpper && aUpper <= 10, aUpper <= 10 && Rand(1L) > aUpper) + setTest(2, + aUpper > bUpper && + bUpper > 100 && + Rand(1L) > aUpper, + bUpper > 100 && + Rand(1L) > aUpper && + aUpper > bUpper) + + setTest(2, Rand(1L) > aUpper || aUpper <= 10, aUpper <= 10 || Rand(1L) > aUpper) + setTest(2, + aUpper > bUpper || + aUpper <= Rand(1L) || + aUpper <= 10, + aUpper <= Rand(1L) || + aUpper <= 10 || + aUpper > bUpper) + test("add to / remove from set") { val initialSet = ExpressionSet(aUpper + 1 :: Nil) From a5baf3a79e8cd66c18095b6e865b02266297d295 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 9 Oct 2016 08:49:19 +0000 Subject: [PATCH 3/5] More test cases of mixing AND and OR. --- .../expressions/ExpressionSetSuite.scala | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index 7f31c21d89ea6..aeaf6abf67dab 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -82,21 +82,37 @@ class ExpressionSetSuite extends SparkFunSuite { setTest(1, aUpper > bUpper && aUpper <= 10, aUpper <= 10 && aUpper > bUpper) setTest(1, - aUpper > bUpper && - bUpper > 100 && - aUpper <= 10, - bUpper > 100 && - aUpper <= 10 && - aUpper > bUpper) + aUpper > bUpper && bUpper > 100 && aUpper <= 10, + bUpper > 100 && aUpper <= 10 && aUpper > bUpper) setTest(1, aUpper > bUpper || aUpper <= 10, aUpper <= 10 || aUpper > bUpper) setTest(1, - aUpper > bUpper || - bUpper > 100 || - aUpper <= 10, - bUpper > 100 || - aUpper <= 10 || - aUpper > bUpper) + aUpper > bUpper || bUpper > 100 || aUpper <= 10, + bUpper > 100 || aUpper <= 10 || aUpper > bUpper) + + setTest(1, + bUpper > 100 || aUpper <= 10 && aUpper > bUpper, + bUpper > 100 || (aUpper <= 10 && aUpper > bUpper)) + + setTest(1, + aUpper > 10 && bUpper < 10 || aUpper >= bUpper, + (bUpper < 10 && aUpper > 10) || aUpper >= bUpper) + + setTest(1, + bUpper > 100 || aUpper < 100 && bUpper <= aUpper || aUpper >= 10 && bUpper >= 50, + (aUpper >= 10 && bUpper >= 50) || bUpper > 100 || (aUpper < 100 && bUpper <= aUpper), + (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100) || bUpper > 100) + + setTest(1, + bUpper > 100 && aUpper < 100 && bUpper <= aUpper || aUpper >= 10 && bUpper >= 50, + (aUpper >= 10 && bUpper >= 50) || (aUpper < 100 && bUpper > 100 && bUpper <= aUpper), + (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100 && bUpper > 100)) + + setTest(1, + aUpper >= 10 || bUpper <= 10 && aUpper === bUpper && aUpper < 100 || bUpper >= 100, + aUpper === bUpper && aUpper < 100 && bUpper <= 10 || bUpper >= 100 || aUpper >= 10, + aUpper < 100 && bUpper <= 10 && aUpper === bUpper || aUpper >= 10 || bUpper >= 100, + ((bUpper <= 10 && aUpper === bUpper) && aUpper < 100) || (aUpper >= 10 || bUpper >= 100)) // Don't reorder non-deterministic expression in AND/OR. setTest(2, Rand(1L) > aUpper && aUpper <= 10, aUpper <= 10 && Rand(1L) > aUpper) From 25f5d4d068509d93630d56db2155f11cc2a9b301 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 10 Oct 2016 00:55:45 +0000 Subject: [PATCH 4/5] Make test cases more clear. --- .../expressions/ExpressionSetSuite.scala | 50 +++++++++++-------- 1 file changed, 28 insertions(+), 22 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index aeaf6abf67dab..dcaedb19e3b41 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -80,6 +80,7 @@ class ExpressionSetSuite extends SparkFunSuite { setTest(1, Not(aUpper >= 1), aUpper < 1, Not(Literal(1) <= aUpper), Literal(1) > aUpper) setTest(1, Not(aUpper <= 1), aUpper > 1, Not(Literal(1) >= aUpper), Literal(1) < aUpper) + // Reordering AND/OR expressions setTest(1, aUpper > bUpper && aUpper <= 10, aUpper <= 10 && aUpper > bUpper) setTest(1, aUpper > bUpper && bUpper > 100 && aUpper <= 10, @@ -91,47 +92,52 @@ class ExpressionSetSuite extends SparkFunSuite { bUpper > 100 || aUpper <= 10 || aUpper > bUpper) setTest(1, - bUpper > 100 || aUpper <= 10 && aUpper > bUpper, + (aUpper <= 10 && aUpper > bUpper) || bUpper > 100, bUpper > 100 || (aUpper <= 10 && aUpper > bUpper)) setTest(1, - aUpper > 10 && bUpper < 10 || aUpper >= bUpper, + aUpper >= bUpper || (aUpper > 10 && bUpper < 10), (bUpper < 10 && aUpper > 10) || aUpper >= bUpper) + // More complicated cases mixing AND/OR + // Three predicates in the following: + // (bUpper > 100) + // (aUpper < 100 && bUpper <= aUpper) + // (aUpper >= 10 && bUpper >= 50) + // They can be reordered and the sub-predicates contained in each of them can be reordered too. setTest(1, - bUpper > 100 || aUpper < 100 && bUpper <= aUpper || aUpper >= 10 && bUpper >= 50, - (aUpper >= 10 && bUpper >= 50) || bUpper > 100 || (aUpper < 100 && bUpper <= aUpper), - (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100) || bUpper > 100) + (bUpper > 100) || (aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50), + (aUpper >= 10 && bUpper >= 50) || (bUpper > 100) || (aUpper < 100 && bUpper <= aUpper), + (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100) || (bUpper > 100)) + // Two predicates in the following: + // (bUpper > 100 && aUpper < 100 && bUpper <= aUpper) + // (aUpper >= 10 && bUpper >= 50) setTest(1, - bUpper > 100 && aUpper < 100 && bUpper <= aUpper || aUpper >= 10 && bUpper >= 50, + (bUpper > 100 && aUpper < 100 && bUpper <= aUpper) || (aUpper >= 10 && bUpper >= 50), (aUpper >= 10 && bUpper >= 50) || (aUpper < 100 && bUpper > 100 && bUpper <= aUpper), (bUpper >= 50 && aUpper >= 10) || (bUpper <= aUpper && aUpper < 100 && bUpper > 100)) + // Three predicates in the following: + // (aUpper >= 10) + // (bUpper <= 10 && aUpper === bUpper && aUpper < 100) + // (bUpper >= 100) setTest(1, - aUpper >= 10 || bUpper <= 10 && aUpper === bUpper && aUpper < 100 || bUpper >= 100, - aUpper === bUpper && aUpper < 100 && bUpper <= 10 || bUpper >= 100 || aUpper >= 10, - aUpper < 100 && bUpper <= 10 && aUpper === bUpper || aUpper >= 10 || bUpper >= 100, - ((bUpper <= 10 && aUpper === bUpper) && aUpper < 100) || (aUpper >= 10 || bUpper >= 100)) + (aUpper >= 10) || (bUpper <= 10 && aUpper === bUpper && aUpper < 100) || (bUpper >= 100), + (aUpper === bUpper && aUpper < 100 && bUpper <= 10) || (bUpper >= 100) || (aUpper >= 10), + (aUpper < 100 && bUpper <= 10 && aUpper === bUpper) || (aUpper >= 10) || (bUpper >= 100), + ((bUpper <= 10 && aUpper === bUpper) && aUpper < 100) || ((aUpper >= 10) || (bUpper >= 100))) // Don't reorder non-deterministic expression in AND/OR. setTest(2, Rand(1L) > aUpper && aUpper <= 10, aUpper <= 10 && Rand(1L) > aUpper) setTest(2, - aUpper > bUpper && - bUpper > 100 && - Rand(1L) > aUpper, - bUpper > 100 && - Rand(1L) > aUpper && - aUpper > bUpper) + aUpper > bUpper && bUpper > 100 && Rand(1L) > aUpper, + bUpper > 100 && Rand(1L) > aUpper && aUpper > bUpper) setTest(2, Rand(1L) > aUpper || aUpper <= 10, aUpper <= 10 || Rand(1L) > aUpper) setTest(2, - aUpper > bUpper || - aUpper <= Rand(1L) || - aUpper <= 10, - aUpper <= Rand(1L) || - aUpper <= 10 || - aUpper > bUpper) + aUpper > bUpper || aUpper <= Rand(1L) || aUpper <= 10, + aUpper <= Rand(1L) || aUpper <= 10 || aUpper > bUpper) test("add to / remove from set") { val initialSet = ExpressionSet(aUpper + 1 :: Nil) From 21958d7e7b2cb0de6a5b6353afc933359e490df2 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 11 Oct 2016 03:08:46 +0000 Subject: [PATCH 5/5] Add partial reorder test case. --- .../expressions/ExpressionSetSuite.scala | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala index dcaedb19e3b41..c587d4f632531 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionSetSuite.scala @@ -139,6 +139,29 @@ class ExpressionSetSuite extends SparkFunSuite { aUpper > bUpper || aUpper <= Rand(1L) || aUpper <= 10, aUpper <= Rand(1L) || aUpper <= 10 || aUpper > bUpper) + // Partial reorder case: we don't reorder non-deterministic expressions, + // but we can reorder sub-expressions in deterministic AND/OR expressions. + // There are two predicates: + // (aUpper > bUpper || bUpper > 100) => we can reorder sub-expressions in it. + // (aUpper === Rand(1L)) + setTest(1, + (aUpper > bUpper || bUpper > 100) && aUpper === Rand(1L), + (bUpper > 100 || aUpper > bUpper) && aUpper === Rand(1L)) + + // There are three predicates: + // (Rand(1L) > aUpper) + // (aUpper <= Rand(1L) && aUpper > bUpper) + // (aUpper > 10 && bUpper > 10) => we can reorder sub-expressions in it. + setTest(1, + Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10), + Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (bUpper > 10 && aUpper > 10)) + + // Same predicates as above, but a negative case when we reorder non-deterministic + // expression in (aUpper <= Rand(1L) && aUpper > bUpper). + setTest(2, + Rand(1L) > aUpper || (aUpper <= Rand(1L) && aUpper > bUpper) || (aUpper > 10 && bUpper > 10), + Rand(1L) > aUpper || (aUpper > bUpper && aUpper <= Rand(1L)) || (aUpper > 10 && bUpper > 10)) + test("add to / remove from set") { val initialSet = ExpressionSet(aUpper + 1 :: Nil)