From ef41e26727c6c1091e94f960237419a63d473f19 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Fri, 8 Apr 2016 15:51:11 -0700 Subject: [PATCH 1/5] [SPARK-14502][SQL] Add optimization for Non-Nullable Binary Comparison Simplification --- .../sql/catalyst/optimizer/Optimizer.scala | 23 +++++ ...eBinaryComparisonSimplificationSuite.scala | 99 +++++++++++++++++++ 2 files changed, 122 insertions(+) create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 619514e8aacbe..005762d1d3997 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -86,6 +86,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { BooleanSimplification, SimplifyConditionals, RemoveDispensableExpressions, + NonNullableBinaryComparisonSimplification, PruneFilters, EliminateSorts, SimplifyCasts, @@ -786,6 +787,28 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { } } +/** + * Simplifies semantically-equal non-nullable expressions: + * 1) Replace '=', '<=', and '>=' with 'true' literal + * 2) Replace '<' and '>' with 'false' literal + */ +object NonNullableBinaryComparisonSimplification extends Rule[LogicalPlan] with PredicateHelper { + def apply(plan: LogicalPlan): LogicalPlan = plan transform { + case q: LogicalPlan => q transformExpressionsUp { + // True with equality + case a EqualTo b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral + case a EqualNullSafe b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral + case a GreaterThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) => + TrueLiteral + case a LessThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral + + // False with inequality + case a GreaterThan b if !a.nullable && !b.nullable && a.semanticEquals(b) => FalseLiteral + case a LessThan b if !a.nullable && !b.nullable && a.semanticEquals(b) => FalseLiteral + } + } +} + /** * Simplifies conditional expressions (if / case). */ diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala new file mode 100644 index 0000000000000..8cf0614050d21 --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.optimizer + +import org.apache.spark.sql.Row +import org.apache.spark.sql.catalyst.analysis._ +import org.apache.spark.sql.catalyst.dsl.expressions._ +import org.apache.spark.sql.catalyst.dsl.plans._ +import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.expressions.Literal.{FalseLiteral, TrueLiteral} +import org.apache.spark.sql.catalyst.plans.PlanTest +import org.apache.spark.sql.catalyst.plans.logical._ +import org.apache.spark.sql.catalyst.rules._ + +class NonNullableBinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper { + + object Optimize extends RuleExecutor[LogicalPlan] { + val batches = + Batch("AnalysisNodes", Once, + EliminateSubqueryAliases) :: + Batch("Constant Folding", FixedPoint(50), + NullPropagation, + ConstantFolding, + BooleanSimplification, + NonNullableBinaryComparisonSimplification, + PruneFilters) :: Nil + } + + val nullableRelation = LocalRelation.fromExternalRows( + Seq('a.int.withNullability(true)), + Seq(null, null, null, null).map(x => Row(x))) + val nonNullableRelation = LocalRelation.fromExternalRows( + Seq('a.int.withNullability(false)), + Seq(1, 2, 3, 4).map(x => Row(x))) + + test("Preserve nullable or non-deterministic exprs") { + for (e <- Seq('a === 'a, Rand(0) === Rand(0))) { + val plan = nullableRelation.where('a === 'a).analyze + val actual = Optimize.execute(plan) + val correctAnswer = plan + comparePlans(actual, correctAnswer) + } + } + + test("Non-Nullable Simplification Primitive") { + val plan = nonNullableRelation + .select('a === 'a, 'a <=> 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a).analyze + val actual = Optimize.execute(plan) + val correctAnswer = nonNullableRelation + .select( + Alias(TrueLiteral, "(a = a)")(), + Alias(TrueLiteral, "(a <=> a)")(), + Alias(TrueLiteral, "(a <= a)")(), + Alias(TrueLiteral, "(a >= a)")(), + Alias(FalseLiteral, "(a < a)")(), + Alias(FalseLiteral, "(a > a)")()) + .analyze + comparePlans(actual, correctAnswer) + } + + test("TRUE Filter") { + val plan = nonNullableRelation.where('a === 'a && 'a <=> 'a && 'a <= 'a && 'a >= 'a).analyze + val actual = Optimize.execute(plan) + val correctAnswer = nonNullableRelation.analyze + comparePlans(actual, correctAnswer) + } + + test("FALSE Filter") { + val plan = nonNullableRelation.where('a < 'a || 'a > 'a).analyze + val actual = Optimize.execute(plan) + val correctAnswer = LocalRelation(Seq('a.int.withNullability(false)), Seq.empty) + comparePlans(actual, correctAnswer) + } + + test("Expression Normalization") { + val plan = nonNullableRelation.where( + 'a * Literal(100) + Pi() === Pi() + Literal(100) * 'a || + DateAdd(CurrentDate(), 'a + Literal(2)) <= DateAdd(CurrentDate(), Literal(2) + 'a)) + .analyze + val actual = Optimize.execute(plan) + val correctAnswer = nonNullableRelation.analyze + comparePlans(actual, correctAnswer) + } +} From 68b58b6c3492c3d553f5d338e86d7198efd8e59f Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 10 Apr 2016 22:27:32 -0700 Subject: [PATCH 2/5] Replace '||' with '&&' --- .../NonNullableBinaryComparisonSimplificationSuite.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala index 8cf0614050d21..76dcd5e60144d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala @@ -89,7 +89,7 @@ class NonNullableBinaryComparisonSimplificationSuite extends PlanTest with Predi test("Expression Normalization") { val plan = nonNullableRelation.where( - 'a * Literal(100) + Pi() === Pi() + Literal(100) * 'a || + 'a * Literal(100) + Pi() === Pi() + Literal(100) * 'a && DateAdd(CurrentDate(), 'a + Literal(2)) <= DateAdd(CurrentDate(), Literal(2) + 'a)) .analyze val actual = Optimize.execute(plan) From 71bf40f8042d7e2dcbcace0b72b7357f55b5344c Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 10 Apr 2016 22:44:04 -0700 Subject: [PATCH 3/5] Simplify test by removing useless data. --- .../NonNullableBinaryComparisonSimplificationSuite.scala | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala index 76dcd5e60144d..4e2cf4f24c42e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala @@ -41,12 +41,8 @@ class NonNullableBinaryComparisonSimplificationSuite extends PlanTest with Predi PruneFilters) :: Nil } - val nullableRelation = LocalRelation.fromExternalRows( - Seq('a.int.withNullability(true)), - Seq(null, null, null, null).map(x => Row(x))) - val nonNullableRelation = LocalRelation.fromExternalRows( - Seq('a.int.withNullability(false)), - Seq(1, 2, 3, 4).map(x => Row(x))) + val nullableRelation = LocalRelation('a.int.withNullability(true)) + val nonNullableRelation = LocalRelation('a.int.withNullability(false)) test("Preserve nullable or non-deterministic exprs") { for (e <- Seq('a === 'a, Rand(0) === Rand(0))) { From bdc309c41ecebd2c364e21606ebdcf81bc502842 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Sun, 10 Apr 2016 23:48:06 -0700 Subject: [PATCH 4/5] Extend `EqualNullSafe` case to handle non-nullable exprs. --- .../sql/catalyst/optimizer/Optimizer.scala | 13 +++++++------ ... BinaryComparisonSimplificationSuite.scala} | 18 ++++++++++++------ 2 files changed, 19 insertions(+), 12 deletions(-) rename sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/{NonNullableBinaryComparisonSimplificationSuite.scala => BinaryComparisonSimplificationSuite.scala} (84%) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 005762d1d3997..bad115d22f1ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -86,7 +86,7 @@ abstract class Optimizer extends RuleExecutor[LogicalPlan] { BooleanSimplification, SimplifyConditionals, RemoveDispensableExpressions, - NonNullableBinaryComparisonSimplification, + BinaryComparisonSimplification, PruneFilters, EliminateSorts, SimplifyCasts, @@ -788,16 +788,17 @@ object BooleanSimplification extends Rule[LogicalPlan] with PredicateHelper { } /** - * Simplifies semantically-equal non-nullable expressions: - * 1) Replace '=', '<=', and '>=' with 'true' literal - * 2) Replace '<' and '>' with 'false' literal + * Simplifies binary comparisons with semantically-equal expressions: + * 1) Replace '<=>' with 'true' literal. + * 2) Replace '=', '<=', and '>=' with 'true' literal if both operands are non-nullable. + * 3) Replace '<' and '>' with 'false' literal if both operands are non-nullable. */ -object NonNullableBinaryComparisonSimplification extends Rule[LogicalPlan] with PredicateHelper { +object BinaryComparisonSimplification extends Rule[LogicalPlan] with PredicateHelper { def apply(plan: LogicalPlan): LogicalPlan = plan transform { case q: LogicalPlan => q transformExpressionsUp { // True with equality + case a EqualNullSafe b if a.semanticEquals(b) => TrueLiteral case a EqualTo b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral - case a EqualNullSafe b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral case a GreaterThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral case a LessThanOrEqual b if !a.nullable && !b.nullable && a.semanticEquals(b) => TrueLiteral diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala similarity index 84% rename from sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala rename to sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala index 4e2cf4f24c42e..c165c4ab4b684 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/NonNullableBinaryComparisonSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.catalyst.optimizer -import org.apache.spark.sql.Row import org.apache.spark.sql.catalyst.analysis._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.dsl.plans._ @@ -27,7 +26,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest import org.apache.spark.sql.catalyst.plans.logical._ import org.apache.spark.sql.catalyst.rules._ -class NonNullableBinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper { +class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper { object Optimize extends RuleExecutor[LogicalPlan] { val batches = @@ -37,22 +36,29 @@ class NonNullableBinaryComparisonSimplificationSuite extends PlanTest with Predi NullPropagation, ConstantFolding, BooleanSimplification, - NonNullableBinaryComparisonSimplification, + BinaryComparisonSimplification, PruneFilters) :: Nil } val nullableRelation = LocalRelation('a.int.withNullability(true)) val nonNullableRelation = LocalRelation('a.int.withNullability(false)) - test("Preserve nullable or non-deterministic exprs") { - for (e <- Seq('a === 'a, Rand(0) === Rand(0))) { - val plan = nullableRelation.where('a === 'a).analyze + test("Preserve nullable or non-deterministic exprs in general") { + for (e <- Seq('a === 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a, Rand(0) === Rand(0))) { + val plan = nullableRelation.where(e).analyze val actual = Optimize.execute(plan) val correctAnswer = plan comparePlans(actual, correctAnswer) } } + test("Nullable Simplification Primitive: <=>") { + val plan = nullableRelation.select('a <=> 'a).analyze + val actual = Optimize.execute(plan) + val correctAnswer = nullableRelation.select(Alias(TrueLiteral, "(a <=> a)")()).analyze + comparePlans(actual, correctAnswer) + } + test("Non-Nullable Simplification Primitive") { val plan = nonNullableRelation .select('a === 'a, 'a <=> 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a).analyze From 9ca13be7dd0d0994d072407ede52008e1bcffbb6 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 11 Apr 2016 02:17:20 -0700 Subject: [PATCH 5/5] Add non-deterministic test on non-nullable relation and remove TRUE/FALSE Filter tests. --- .../BinaryComparisonSimplificationSuite.scala | 26 +++++++------------ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala index c165c4ab4b684..7cd038570bbdf 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/BinaryComparisonSimplificationSuite.scala @@ -43,8 +43,8 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper val nullableRelation = LocalRelation('a.int.withNullability(true)) val nonNullableRelation = LocalRelation('a.int.withNullability(false)) - test("Preserve nullable or non-deterministic exprs in general") { - for (e <- Seq('a === 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a, Rand(0) === Rand(0))) { + test("Preserve nullable exprs in general") { + for (e <- Seq('a === 'a, 'a <= 'a, 'a >= 'a, 'a < 'a, 'a > 'a)) { val plan = nullableRelation.where(e).analyze val actual = Optimize.execute(plan) val correctAnswer = plan @@ -52,6 +52,14 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper } } + test("Preserve non-deterministic exprs") { + val plan = nonNullableRelation + .where(Rand(0) === Rand(0) && Rand(1) <=> Rand(1)).analyze + val actual = Optimize.execute(plan) + val correctAnswer = plan + comparePlans(actual, correctAnswer) + } + test("Nullable Simplification Primitive: <=>") { val plan = nullableRelation.select('a <=> 'a).analyze val actual = Optimize.execute(plan) @@ -75,20 +83,6 @@ class BinaryComparisonSimplificationSuite extends PlanTest with PredicateHelper comparePlans(actual, correctAnswer) } - test("TRUE Filter") { - val plan = nonNullableRelation.where('a === 'a && 'a <=> 'a && 'a <= 'a && 'a >= 'a).analyze - val actual = Optimize.execute(plan) - val correctAnswer = nonNullableRelation.analyze - comparePlans(actual, correctAnswer) - } - - test("FALSE Filter") { - val plan = nonNullableRelation.where('a < 'a || 'a > 'a).analyze - val actual = Optimize.execute(plan) - val correctAnswer = LocalRelation(Seq('a.int.withNullability(false)), Seq.empty) - comparePlans(actual, correctAnswer) - } - test("Expression Normalization") { val plan = nonNullableRelation.where( 'a * Literal(100) + Pi() === Pi() + Literal(100) * 'a &&