From 69f0fb614f2efefdced341294c1de8d84fcb4708 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 21 Jul 2015 14:40:34 +0800 Subject: [PATCH 1/8] Add code generation for Like and RLike. --- .../expressions/stringOperations.scala | 92 +++++++++++++++++++ 1 file changed, 92 insertions(+) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 5c1908d55576a..7eece7fbce10e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -187,6 +187,64 @@ case class Like(left: Expression, right: Expression) override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() override def toString: String = s"$left LIKE $right" + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val patternClass = classOf[Pattern].getName + + val literalRight: String = right match { + case x @ Literal(value: String, StringType) => escape(value) + case _ => null + } + + val leftGen = left.gen(ctx) + val rightGen = right.gen(ctx) + + val patternCode = + if (literalRight != null) { + s"${patternClass} pattern = $patternClass.compile($literalRight);" + } else { + s""" + StringBuilder regex = new StringBuilder("(?s)"); + for (int idx = 1; idx < rightStr.length(); idx++) { + char prev = rightStr.charAt(idx - 1); + char curr = rightStr.charAt(idx); + if (prev == '\\\\') { + if (curr == '_') { + regex.append("_"); + } else if (curr == '%') { + regex.append("%"); + } else { + regex.append(${patternClass}.quote("\\\\" + curr)); + } + } else { + if (curr != '\\\\') { + if (curr == '_') { + regex.append("."); + } else if (curr == '%') { + regex.append(".*"); + } else { + regex.append(${patternClass}.quote((new Character(curr)).toString())); + } + } + } + } + ${patternClass} pattern = ${patternClass}.compile(regex.toString()); + """ + } + + s""" + ${leftGen.code} + ${rightGen.code} + + boolean ${ev.isNull} = ${leftGen.isNull} || ${rightGen.isNull}; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { + String rightStr = " " + ${rightGen.primitive}.toString(); + $patternCode + ${ev.primitive} = pattern.matcher(${leftGen.primitive}.toString()).matches(); + } + """ + } } @@ -196,6 +254,40 @@ case class RLike(left: Expression, right: Expression) override def escape(v: String): String = v override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0) override def toString: String = s"$left RLIKE $right" + + override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { + val patternClass = classOf[Pattern].getName + + val literalRight: String = right match { + case x @ Literal(value: String, StringType) => escape(value) + case _ => null + } + + val leftGen = left.gen(ctx) + val rightGen = right.gen(ctx) + + val patternCode = + if (literalRight != null) { + s"${patternClass} pattern = $patternClass.compile($literalRight);" + } else { + s""" + ${patternClass} pattern = ${patternClass}.compile(rightStr); + """ + } + + s""" + ${leftGen.code} + ${rightGen.code} + + boolean ${ev.isNull} = ${leftGen.isNull} || ${rightGen.isNull}; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { + String rightStr = ${rightGen.primitive}.toString(); + $patternCode + ${ev.primitive} = pattern.matcher(${leftGen.primitive}.toString()).find(0); + } + """ + } } From 6cffe3c250c6790c8cb059269375ae0268c71ccd Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 21 Jul 2015 16:12:09 +0800 Subject: [PATCH 2/8] For comments. --- .../expressions/stringOperations.scala | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 7eece7fbce10e..3f82e992c8775 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -190,6 +190,9 @@ case class Like(left: Expression, right: Expression) override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val patternClass = classOf[Pattern].getName + val sb = classOf[StringBuilder].getName + val regex = ctx.freshName("regex") + val pattern = ctx.freshName("pattern") val literalRight: String = right match { case x @ Literal(value: String, StringType) => escape(value) @@ -204,44 +207,46 @@ case class Like(left: Expression, right: Expression) s"${patternClass} pattern = $patternClass.compile($literalRight);" } else { s""" - StringBuilder regex = new StringBuilder("(?s)"); + $sb $regex = new $sb("(?s)"); for (int idx = 1; idx < rightStr.length(); idx++) { char prev = rightStr.charAt(idx - 1); char curr = rightStr.charAt(idx); if (prev == '\\\\') { if (curr == '_') { - regex.append("_"); + $regex.append("_"); } else if (curr == '%') { - regex.append("%"); + $regex.append("%"); } else { - regex.append(${patternClass}.quote("\\\\" + curr)); + $regex.append(${patternClass}.quote("\\\\" + curr)); } } else { if (curr != '\\\\') { if (curr == '_') { - regex.append("."); + $regex.append("."); } else if (curr == '%') { - regex.append(".*"); + $regex.append(".*"); } else { - regex.append(${patternClass}.quote((new Character(curr)).toString())); + $regex.append(${patternClass}.quote((new Character(curr)).toString())); } } } } - ${patternClass} pattern = ${patternClass}.compile(regex.toString()); + ${patternClass} $pattern = ${patternClass}.compile($regex.toString()); """ } s""" ${leftGen.code} - ${rightGen.code} - - boolean ${ev.isNull} = ${leftGen.isNull} || ${rightGen.isNull}; + boolean ${ev.isNull} = ${leftGen.isNull}; ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { - String rightStr = " " + ${rightGen.primitive}.toString(); - $patternCode - ${ev.primitive} = pattern.matcher(${leftGen.primitive}.toString()).matches(); + ${rightGen.code} + ${ev.isNull} = ${rightGen.isNull}; + if (!${ev.isNull}) { + String rightStr = " " + ${rightGen.primitive}.toString(); + $patternCode + ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches(); + } } """ } From a0fb76e9ef072f85b478fa5b75cf0515a94a0306 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 21 Jul 2015 16:24:23 +0800 Subject: [PATCH 3/8] For comments. --- .../expressions/stringOperations.scala | 21 +++++++++++-------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 3f82e992c8775..1f1a9fa22914a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -204,7 +204,7 @@ case class Like(left: Expression, right: Expression) val patternCode = if (literalRight != null) { - s"${patternClass} pattern = $patternClass.compile($literalRight);" + s"${patternClass} $pattern = ${patternClass}.compile($literalRight);" } else { s""" $sb $regex = new $sb("(?s)"); @@ -262,6 +262,7 @@ case class RLike(left: Expression, right: Expression) override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val patternClass = classOf[Pattern].getName + val pattern = ctx.freshName("pattern") val literalRight: String = right match { case x @ Literal(value: String, StringType) => escape(value) @@ -273,23 +274,25 @@ case class RLike(left: Expression, right: Expression) val patternCode = if (literalRight != null) { - s"${patternClass} pattern = $patternClass.compile($literalRight);" + s"${patternClass} $pattern = ${patternClass}.compile($literalRight);" } else { s""" - ${patternClass} pattern = ${patternClass}.compile(rightStr); + ${patternClass} $pattern = ${patternClass}.compile(rightStr); """ } s""" ${leftGen.code} - ${rightGen.code} - - boolean ${ev.isNull} = ${leftGen.isNull} || ${rightGen.isNull}; + boolean ${ev.isNull} = ${leftGen.isNull}; ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; if (!${ev.isNull}) { - String rightStr = ${rightGen.primitive}.toString(); - $patternCode - ${ev.primitive} = pattern.matcher(${leftGen.primitive}.toString()).find(0); + ${rightGen.code} + ${ev.isNull} = ${rightGen.isNull}; + if (!${ev.isNull}) { + String rightStr = ${rightGen.primitive}.toString(); + $patternCode + ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0); + } } """ } From aea58e0737a60a9f3dcdab49c2c8dfd66d1f8e49 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Tue, 21 Jul 2015 17:28:30 +0800 Subject: [PATCH 4/8] For comments. --- .../expressions/stringOperations.scala | 65 ++++--------------- .../spark/sql/catalyst/util/StringUtils.scala | 47 ++++++++++++++ .../sql/catalyst/util/StringUtilsSuite.scala | 34 ++++++++++ 3 files changed, 92 insertions(+), 54 deletions(-) create mode 100644 sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala create mode 100644 sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 4f49a0170cb85..ec5e93eb8b477 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -24,6 +24,7 @@ import java.util.regex.{MatchResult, Pattern} import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.expressions.codegen._ +import org.apache.spark.sql.catalyst.util.StringUtils import org.apache.spark.sql.types._ import org.apache.spark.unsafe.types.UTF8String @@ -161,28 +162,7 @@ trait StringRegexExpression extends ImplicitCastInputTypes { case class Like(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression with CodegenFallback { - // replace the _ with .{1} exactly match 1 time of any character - // replace the % with .*, match 0 or more times with any character - override def escape(v: String): String = - if (!v.isEmpty) { - "(?s)" + (' ' +: v.init).zip(v).flatMap { - case (prev, '\\') => "" - case ('\\', c) => - c match { - case '_' => "_" - case '%' => "%" - case _ => Pattern.quote("\\" + c) - } - case (prev, c) => - c match { - case '_' => "." - case '%' => ".*" - case _ => Pattern.quote(Character.toString(c)) - } - }.mkString - } else { - v - } + override def escape(v: String): String = StringUtils.escapeLikeRegex(v) override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() @@ -190,8 +170,7 @@ case class Like(left: Expression, right: Expression) override protected def genCode(ctx: CodeGenContext, ev: GeneratedExpressionCode): String = { val patternClass = classOf[Pattern].getName - val sb = classOf[StringBuilder].getName - val regex = ctx.freshName("regex") + val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex" val pattern = ctx.freshName("pattern") val literalRight: String = right match { @@ -204,35 +183,11 @@ case class Like(left: Expression, right: Expression) val patternCode = if (literalRight != null) { - s"${patternClass} $pattern = ${patternClass}.compile($literalRight);" + ctx.addMutableState(patternClass, pattern, + s"${patternClass}.compile($literalRight)") + "" } else { - s""" - $sb $regex = new $sb("(?s)"); - for (int idx = 1; idx < rightStr.length(); idx++) { - char prev = rightStr.charAt(idx - 1); - char curr = rightStr.charAt(idx); - if (prev == '\\\\') { - if (curr == '_') { - $regex.append("_"); - } else if (curr == '%') { - $regex.append("%"); - } else { - $regex.append(${patternClass}.quote("\\\\" + curr)); - } - } else { - if (curr != '\\\\') { - if (curr == '_') { - $regex.append("."); - } else if (curr == '%') { - $regex.append(".*"); - } else { - $regex.append(${patternClass}.quote((new Character(curr)).toString())); - } - } - } - } - ${patternClass} $pattern = ${patternClass}.compile($regex.toString()); - """ + s"${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr));" } s""" @@ -243,7 +198,7 @@ case class Like(left: Expression, right: Expression) ${rightGen.code} ${ev.isNull} = ${rightGen.isNull}; if (!${ev.isNull}) { - String rightStr = " " + ${rightGen.primitive}.toString(); + String rightStr = ${rightGen.primitive}.toString(); $patternCode ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches(); } @@ -274,7 +229,9 @@ case class RLike(left: Expression, right: Expression) val patternCode = if (literalRight != null) { - s"${patternClass} $pattern = ${patternClass}.compile($literalRight);" + ctx.addMutableState(patternClass, pattern, + s"${patternClass}.compile($literalRight)") + "" } else { s""" ${patternClass} $pattern = ${patternClass}.compile(rightStr); diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala new file mode 100644 index 0000000000000..9ddfb3a0d3759 --- /dev/null +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/StringUtils.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import java.util.regex.Pattern + +object StringUtils { + + // replace the _ with .{1} exactly match 1 time of any character + // replace the % with .*, match 0 or more times with any character + def escapeLikeRegex(v: String): String = { + if (!v.isEmpty) { + "(?s)" + (' ' +: v.init).zip(v).flatMap { + case (prev, '\\') => "" + case ('\\', c) => + c match { + case '_' => "_" + case '%' => "%" + case _ => Pattern.quote("\\" + c) + } + case (prev, c) => + c match { + case '_' => "." + case '%' => ".*" + case _ => Pattern.quote(Character.toString(c)) + } + }.mkString + } else { + v + } + } +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala new file mode 100644 index 0000000000000..d6f273f9e568a --- /dev/null +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/StringUtilsSuite.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.catalyst.util + +import org.apache.spark.SparkFunSuite +import org.apache.spark.sql.catalyst.util.StringUtils._ + +class StringUtilsSuite extends SparkFunSuite { + + test("escapeLikeRegex") { + assert(escapeLikeRegex("abdef") === "(?s)\\Qa\\E\\Qb\\E\\Qd\\E\\Qe\\E\\Qf\\E") + assert(escapeLikeRegex("a\\__b") === "(?s)\\Qa\\E_.\\Qb\\E") + assert(escapeLikeRegex("a_%b") === "(?s)\\Qa\\E..*\\Qb\\E") + assert(escapeLikeRegex("a%\\%b") === "(?s)\\Qa\\E.*%\\Qb\\E") + assert(escapeLikeRegex("a%") === "(?s)\\Qa\\E.*") + assert(escapeLikeRegex("**") === "(?s)\\Q*\\E\\Q*\\E") + assert(escapeLikeRegex("a_b") === "(?s)\\Qa\\E.\\Qb\\E") + } +} From 696d451e1ca528ec81fb7d42009a86f2dae53df6 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 22 Jul 2015 15:10:18 +0800 Subject: [PATCH 5/8] Check expression foldable. --- .../expressions/stringOperations.scala | 54 +++++++++++-------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 857b843bdd4f6..f442aa2bc78b8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -21,6 +21,8 @@ import java.text.DecimalFormat import java.util.Locale import java.util.regex.{MatchResult, Pattern} +import org.apache.commons.lang3.StringEscapeUtils + import org.apache.spark.sql.catalyst.InternalRow import org.apache.spark.sql.catalyst.analysis.UnresolvedException import org.apache.spark.sql.catalyst.expressions.codegen._ @@ -173,21 +175,27 @@ case class Like(left: Expression, right: Expression) val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex" val pattern = ctx.freshName("pattern") - val literalRight: String = right match { - case x @ Literal(value: String, StringType) => escape(value) - case _ => null - } - val leftGen = left.gen(ctx) val rightGen = right.gen(ctx) val patternCode = - if (literalRight != null) { - ctx.addMutableState(patternClass, pattern, - s"${patternClass}.compile($literalRight)") - "" + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString())) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + s"${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches();" + } else { + "" + } } else { - s"${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr));" + s""" + String rightStr = ${rightGen.primitive}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches(); + """ } s""" @@ -198,9 +206,7 @@ case class Like(left: Expression, right: Expression) ${rightGen.code} ${ev.isNull} = ${rightGen.isNull}; if (!${ev.isNull}) { - String rightStr = ${rightGen.primitive}.toString(); $patternCode - ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches(); } } """ @@ -219,22 +225,26 @@ case class RLike(left: Expression, right: Expression) val patternClass = classOf[Pattern].getName val pattern = ctx.freshName("pattern") - val literalRight: String = right match { - case x @ Literal(value: String, StringType) => escape(value) - case _ => null - } - val leftGen = left.gen(ctx) val rightGen = right.gen(ctx) val patternCode = - if (literalRight != null) { - ctx.addMutableState(patternClass, pattern, - s"${patternClass}.compile($literalRight)") - "" + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString()) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + s"${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0);" + } else { + "" + } } else { s""" + String rightStr = ${rightGen.primitive}.toString(); ${patternClass} $pattern = ${patternClass}.compile(rightStr); + ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0); """ } @@ -246,9 +256,7 @@ case class RLike(left: Expression, right: Expression) ${rightGen.code} ${ev.isNull} = ${rightGen.isNull}; if (!${ev.isNull}) { - String rightStr = ${rightGen.primitive}.toString(); $patternCode - ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0); } } """ From 50df9a869d9f86bd1bc6c42ea66628ec819cbaf3 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 27 Jul 2015 21:40:09 +0800 Subject: [PATCH 6/8] Use nullSafeCodeGen. --- .../expressions/stringOperations.scala | 105 +++++++----------- 1 file changed, 40 insertions(+), 65 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index badee56289ca0..71c20a4b37629 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -175,41 +175,29 @@ case class Like(left: Expression, right: Expression) val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex" val pattern = ctx.freshName("pattern") - val leftGen = left.gen(ctx) - val rightGen = right.gen(ctx) - - val patternCode = - if (right.foldable) { - val rVal = right.eval() - if (rVal != null) { - val regexStr = - StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString())) - ctx.addMutableState(patternClass, pattern, - s"""$pattern = ${patternClass}.compile("$regexStr");""") - s"${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches();" + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + val patternCode = + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString())) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + s"${ev.primitive} = $pattern.matcher(${eval1}.toString()).matches();" + } else { + "" + } } else { - "" + s""" + String rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + ${ev.primitive} = $pattern.matcher(${eval1}.toString()).matches(); + """ } - } else { - s""" - String rightStr = ${rightGen.primitive}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); - ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).matches(); - """ - } - s""" - ${leftGen.code} - boolean ${ev.isNull} = ${leftGen.isNull}; - ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { - ${rightGen.code} - ${ev.isNull} = ${rightGen.isNull}; - if (!${ev.isNull}) { - $patternCode - } - } - """ + s"$patternCode" + }) } } @@ -225,41 +213,28 @@ case class RLike(left: Expression, right: Expression) val patternClass = classOf[Pattern].getName val pattern = ctx.freshName("pattern") - val leftGen = left.gen(ctx) - val rightGen = right.gen(ctx) - - val patternCode = - if (right.foldable) { - val rVal = right.eval() - if (rVal != null) { - val regexStr = - StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString()) - ctx.addMutableState(patternClass, pattern, - s"""$pattern = ${patternClass}.compile("$regexStr");""") - s"${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0);" + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + val patternCode = + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString()) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + s"${ev.primitive} = $pattern.matcher(${eval1}.toString()).find(0);" + } else { + "" + } } else { - "" + s""" + String rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile(rightStr); + ${ev.primitive} = $pattern.matcher(${eval1}.toString()).find(0); + """ } - } else { - s""" - String rightStr = ${rightGen.primitive}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); - ${ev.primitive} = $pattern.matcher(${leftGen.primitive}.toString()).find(0); - """ - } - - s""" - ${leftGen.code} - boolean ${ev.isNull} = ${leftGen.isNull}; - ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; - if (!${ev.isNull}) { - ${rightGen.code} - ${ev.isNull} = ${rightGen.isNull}; - if (!${ev.isNull}) { - $patternCode - } - } - """ + s"$patternCode" + }) } } From ccd1b438319ac120ae00859b10bb804907e64195 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 31 Jul 2015 12:26:35 +0800 Subject: [PATCH 7/8] For comments. --- .../expressions/stringOperations.scala | 107 +++++++++++------- 1 file changed, 64 insertions(+), 43 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index b1047503771af..99a62343f138d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -174,29 +174,39 @@ case class Like(left: Expression, right: Expression) val escapeFunc = StringUtils.getClass.getName.stripSuffix("$") + ".escapeLikeRegex" val pattern = ctx.freshName("pattern") - nullSafeCodeGen(ctx, ev, (eval1, eval2) => { - val patternCode = - if (right.foldable) { - val rVal = right.eval() - if (rVal != null) { - val regexStr = - StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString())) - ctx.addMutableState(patternClass, pattern, - s"""$pattern = ${patternClass}.compile("$regexStr");""") - s"${ev.primitive} = $pattern.matcher(${eval1}.toString()).matches();" - } else { - "" + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(escape(rVal.asInstanceOf[UTF8String].toString())) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + + // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again. + val eval = left.gen(ctx) + s""" + ${eval.code} + boolean ${ev.isNull} = ${eval.isNull}; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.primitive} = $pattern.matcher(${eval.primitive}.toString()).matches(); } - } else { - s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); - ${ev.primitive} = $pattern.matcher(${eval1}.toString()).matches(); - """ - } - - s"$patternCode" - }) + """ + } else { + s""" + boolean ${ev.isNull} = true; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + """ + } + } else { + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + s""" + String rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile($escapeFunc(rightStr)); + ${ev.primitive} = $pattern.matcher(${eval1}.toString()).matches(); + """ + }) + } } } @@ -212,28 +222,39 @@ case class RLike(left: Expression, right: Expression) val patternClass = classOf[Pattern].getName val pattern = ctx.freshName("pattern") - nullSafeCodeGen(ctx, ev, (eval1, eval2) => { - val patternCode = - if (right.foldable) { - val rVal = right.eval() - if (rVal != null) { - val regexStr = - StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString()) - ctx.addMutableState(patternClass, pattern, - s"""$pattern = ${patternClass}.compile("$regexStr");""") - s"${ev.primitive} = $pattern.matcher(${eval1}.toString()).find(0);" - } else { - "" + if (right.foldable) { + val rVal = right.eval() + if (rVal != null) { + val regexStr = + StringEscapeUtils.escapeJava(rVal.asInstanceOf[UTF8String].toString()) + ctx.addMutableState(patternClass, pattern, + s"""$pattern = ${patternClass}.compile("$regexStr");""") + + // We don't use nullSafeCodeGen here because we don't want to re-evaluate right again. + val eval = left.gen(ctx) + s""" + ${eval.code} + boolean ${ev.isNull} = ${eval.isNull}; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + if (!${ev.isNull}) { + ${ev.primitive} = $pattern.matcher(${eval.primitive}.toString()).find(0); } - } else { - s""" - String rightStr = ${eval2}.toString(); - ${patternClass} $pattern = ${patternClass}.compile(rightStr); - ${ev.primitive} = $pattern.matcher(${eval1}.toString()).find(0); - """ - } - s"$patternCode" - }) + """ + } else { + s""" + boolean ${ev.isNull} = true; + ${ctx.javaType(dataType)} ${ev.primitive} = ${ctx.defaultValue(dataType)}; + """ + } + } else { + nullSafeCodeGen(ctx, ev, (eval1, eval2) => { + s""" + String rightStr = ${eval2}.toString(); + ${patternClass} $pattern = ${patternClass}.compile(rightStr); + ${ev.primitive} = $pattern.matcher(${eval1}.toString()).find(0); + """ + }) + } } } From fe5641bde1687a0f223fa3973e8fbd6387e21e9a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 31 Jul 2015 12:38:31 +0800 Subject: [PATCH 8/8] Add test for NonFoldableLiteral. --- .../expressions/StringExpressionsSuite.scala | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala index 07b952531ec2e..3ecd0d374c46b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/StringExpressionsSuite.scala @@ -191,6 +191,15 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.create(null, StringType).like("a"), null) checkEvaluation(Literal.create("a", StringType).like(Literal.create(null, StringType)), null) checkEvaluation(Literal.create(null, StringType).like(Literal.create(null, StringType)), null) + checkEvaluation( + Literal.create("a", StringType).like(NonFoldableLiteral.create("a", StringType)), true) + checkEvaluation( + Literal.create("a", StringType).like(NonFoldableLiteral.create(null, StringType)), null) + checkEvaluation( + Literal.create(null, StringType).like(NonFoldableLiteral.create("a", StringType)), null) + checkEvaluation( + Literal.create(null, StringType).like(NonFoldableLiteral.create(null, StringType)), null) + checkEvaluation("abdef" like "abdef", true) checkEvaluation("a_%b" like "a\\__b", true) checkEvaluation("addb" like "a_%b", true) @@ -232,6 +241,13 @@ class StringExpressionsSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(Literal.create(null, StringType) rlike "abdef", null) checkEvaluation("abdef" rlike Literal.create(null, StringType), null) checkEvaluation(Literal.create(null, StringType) rlike Literal.create(null, StringType), null) + checkEvaluation("abdef" rlike NonFoldableLiteral.create("abdef", StringType), true) + checkEvaluation("abdef" rlike NonFoldableLiteral.create(null, StringType), null) + checkEvaluation( + Literal.create(null, StringType) rlike NonFoldableLiteral.create("abdef", StringType), null) + checkEvaluation( + Literal.create(null, StringType) rlike NonFoldableLiteral.create(null, StringType), null) + checkEvaluation("abdef" rlike "abdef", true) checkEvaluation("abbbbc" rlike "a.*c", true)