From 8d496f63dd3e760a6c165738152afe44a5203fda Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 11 Apr 2016 17:01:19 -0700 Subject: [PATCH 1/4] [SPARK-14545][SQL] Improve `LikeSimplification` by adding `a%b` rule --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 4 ++++ .../optimizer/LikeSimplificationSuite.scala | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index bad115d22f1ae..46cbae1072886 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -519,6 +519,7 @@ object LikeSimplification extends Rule[LogicalPlan] { // Cases like "something\%" are not optimized, but this does not affect correctness. private val startsWith = "([^_%]+)%".r private val endsWith = "%([^_%]+)".r + private val startsAndEndsWith = "([^_%]+)%([^_%]+)".r private val contains = "%([^_%]+)%".r private val equalTo = "([^_%]*)".r @@ -529,6 +530,9 @@ object LikeSimplification extends Rule[LogicalPlan] { StartsWith(l, Literal(pattern)) case endsWith(pattern) => EndsWith(l, Literal(pattern)) + case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") => + And(GreaterThanOrEqual(Length(l), Literal(prefix.size + postfix.size)), + And(StartsWith(l, Literal(prefix)), EndsWith(l, Literal(postfix)))) case contains(pattern) if !pattern.endsWith("\\") => Contains(l, Literal(pattern)) case equalTo(pattern) => diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala index 741bc113cfcda..d4d95ec0f251d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala @@ -61,6 +61,20 @@ class LikeSimplificationSuite extends PlanTest { comparePlans(optimized, correctAnswer) } + test("simplify Like into StartsAndEndsWith") { + val originalQuery = + testRelation + .where(('a like "abc\\%def") || ('a like "abc%def")) + + val optimized = Optimize.execute(originalQuery.analyze) + val correctAnswer = testRelation + .where(('a like "abc\\%def") || + (Length('a) >= 6 && (StartsWith('a, "abc") && EndsWith('a, "def")))) + .analyze + + comparePlans(optimized, correctAnswer) + } + test("simplify Like into Contains") { val originalQuery = testRelation From 16ab6c87e234a6bf7d5acbc8407868c2cefd2b35 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Tue, 12 Apr 2016 10:09:08 -0700 Subject: [PATCH 2/4] Add comments and change testcase name. --- .../org/apache/spark/sql/catalyst/optimizer/Optimizer.scala | 2 ++ .../spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 46cbae1072886..c01be27297433 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -530,6 +530,8 @@ object LikeSimplification extends Rule[LogicalPlan] { StartsWith(l, Literal(pattern)) case endsWith(pattern) => EndsWith(l, Literal(pattern)) + // 'a%a' pattern is basically same with 'a%' && '%a'. + // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") => And(GreaterThanOrEqual(Length(l), Literal(prefix.size + postfix.size)), And(StartsWith(l, Literal(prefix)), EndsWith(l, Literal(postfix)))) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala index d4d95ec0f251d..fdde89d079bc0 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/optimizer/LikeSimplificationSuite.scala @@ -61,7 +61,7 @@ class LikeSimplificationSuite extends PlanTest { comparePlans(optimized, correctAnswer) } - test("simplify Like into StartsAndEndsWith") { + test("simplify Like into startsWith and EndsWith") { val originalQuery = testRelation .where(('a like "abc\\%def") || ('a like "abc%def")) From 5753437bc84558b9f18a77d4c0194e7c57c66686 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 14 Apr 2016 10:54:11 -0700 Subject: [PATCH 3/4] Rename variables --- .../sql/catalyst/optimizer/Optimizer.scala | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index c01be27297433..67bba602bcdaa 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -524,23 +524,23 @@ object LikeSimplification extends Rule[LogicalPlan] { private val equalTo = "([^_%]*)".r def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case Like(l, Literal(utf, StringType)) => - utf.toString match { - case startsWith(pattern) if !pattern.endsWith("\\") => - StartsWith(l, Literal(pattern)) - case endsWith(pattern) => - EndsWith(l, Literal(pattern)) + case Like(l, Literal(pattern, StringType)) => + pattern.toString match { + case startsWith(prefix) if !prefix.endsWith("\\") => + StartsWith(l, Literal(prefix)) + case endsWith(postfix) => + EndsWith(l, Literal(postfix)) // 'a%a' pattern is basically same with 'a%' && '%a'. // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") => And(GreaterThanOrEqual(Length(l), Literal(prefix.size + postfix.size)), And(StartsWith(l, Literal(prefix)), EndsWith(l, Literal(postfix)))) - case contains(pattern) if !pattern.endsWith("\\") => - Contains(l, Literal(pattern)) - case equalTo(pattern) => - EqualTo(l, Literal(pattern)) + case contains(infix) if !infix.endsWith("\\") => + Contains(l, Literal(infix)) + case equalTo(str) => + EqualTo(l, Literal(str)) case _ => - Like(l, Literal.create(utf, StringType)) + Like(l, Literal.create(pattern, StringType)) } } } From 111a78c8b91cc2f81ca8ad50c1730d7f54cb8186 Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Thu, 14 Apr 2016 11:13:53 -0700 Subject: [PATCH 4/4] Rename `l` with `input` --- .../spark/sql/catalyst/optimizer/Optimizer.scala | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala index 67bba602bcdaa..eac04c80def67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/Optimizer.scala @@ -524,23 +524,23 @@ object LikeSimplification extends Rule[LogicalPlan] { private val equalTo = "([^_%]*)".r def apply(plan: LogicalPlan): LogicalPlan = plan transformAllExpressions { - case Like(l, Literal(pattern, StringType)) => + case Like(input, Literal(pattern, StringType)) => pattern.toString match { case startsWith(prefix) if !prefix.endsWith("\\") => - StartsWith(l, Literal(prefix)) + StartsWith(input, Literal(prefix)) case endsWith(postfix) => - EndsWith(l, Literal(postfix)) + EndsWith(input, Literal(postfix)) // 'a%a' pattern is basically same with 'a%' && '%a'. // However, the additional `Length` condition is required to prevent 'a' match 'a%a'. case startsAndEndsWith(prefix, postfix) if !prefix.endsWith("\\") => - And(GreaterThanOrEqual(Length(l), Literal(prefix.size + postfix.size)), - And(StartsWith(l, Literal(prefix)), EndsWith(l, Literal(postfix)))) + And(GreaterThanOrEqual(Length(input), Literal(prefix.size + postfix.size)), + And(StartsWith(input, Literal(prefix)), EndsWith(input, Literal(postfix)))) case contains(infix) if !infix.endsWith("\\") => - Contains(l, Literal(infix)) + Contains(input, Literal(infix)) case equalTo(str) => - EqualTo(l, Literal(str)) + EqualTo(input, Literal(str)) case _ => - Like(l, Literal.create(pattern, StringType)) + Like(input, Literal.create(pattern, StringType)) } } }