From 2c8929e6b9fe7f9d8ed38af12d0474bd9b0bbd13 Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Tue, 25 Mar 2014 10:35:22 +0800 Subject: [PATCH 1/5] Update the unit test for expression evaluation --- .../ExpressionEvaluationSuite.scala | 179 ++++++++++++++---- 1 file changed, 146 insertions(+), 33 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index c8fd581aa7b47..0a684855fa332 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -26,7 +26,112 @@ import org.apache.spark.sql.catalyst.types._ /* Implicit conversions */ import org.apache.spark.sql.catalyst.dsl.expressions._ -class ExpressionEvaluationSuite extends FunSuite { + +/** + * Root class of expression evaluation test + */ +trait ExprEvalTest { + type Execution = (Row => Row) + + def engine: Execution +} + +case class InterpretExprEvalTest(exprs: Seq[Expression]) extends ExprEvalTest { + override def engine: Execution = new InterpretedProjection(exprs) +} + +class InterpretExpressionEvaluationSuite extends ExpressionEvaluationSuite { + override def executor(exprs: Seq[Expression]) = InterpretExprEvalTest(exprs) +} + +trait ExpressionEvaluationSuite extends FunSuite { + /** + * The sub classes need to create the ExprEvalTest object + */ + def executor(exprs: Seq[Expression]): ExprEvalTest + + val data: Row = new GenericRow(Array(1, null, 1.0, true, 4, 5, null, "abcccd")) + + // TODO add to DSL + val c1 = BoundReference(0, AttributeReference("a", IntegerType)()) + val c2 = BoundReference(1, AttributeReference("b", IntegerType)()) + val c3 = BoundReference(2, AttributeReference("c", DoubleType)()) + val c4 = BoundReference(3, AttributeReference("d", BooleanType)()) + val c5 = BoundReference(4, AttributeReference("e", IntegerType)()) + val c6 = BoundReference(5, AttributeReference("f", IntegerType)()) + val c7 = BoundReference(6, AttributeReference("g", StringType)()) + val c8 = BoundReference(7, AttributeReference("h", StringType)()) + + def verify(expected: Seq[(Boolean, Any)], result: Row, input: Row) { + Seq.tabulate(expected.size) { i => + expected(i) match { + case (false, expected) => { + assert(result.isNullAt(i) == false, s"Input:($input), Output field:$i shouldn't be null") + val real = result.apply(i) + assert(real == expected, s"Input:($input), Output field:$i is expected as $expected, but got $real") + } + case (true, _) => { + assert(result.isNullAt(i), s"Input:($input), Output field:$i is expected as null") + } + } + } + } + + def verify(expecteds: Seq[Seq[(Boolean, Any)]], results: Seq[Row], inputs: Seq[Row]) { + Range(0, expecteds.length).foreach { i => + verify(expecteds(i), results(i), inputs(i)) + } + } + + def run(exprs: Seq[Expression], expected: Seq[(Boolean, Any)], input: Row) { + val tester = executor(exprs) + verify(expected, tester.engine.apply(input), input) + } + + def run(exprs: Seq[Expression], expecteds: Seq[Seq[(Boolean, Any)]], inputs: Seq[Row]) { + val tester = executor(exprs) + + verify(expecteds, inputs.map(tester.engine.apply(_)), inputs) + } + + test("logical") { + val expected = Seq[(Boolean, Any)]( + (false, false), + (true, -1), + (false, true), + (false, true), + (false, false)) + + val exprs = Seq[Expression](And(LessThan(Cast(c1, DoubleType), c3), LessThan(c1, c2)), + Or(LessThan(Cast(c1, DoubleType), c3), LessThan(c1, c2)), + IsNull(c2), + IsNotNull(c3), + Not(c4)) + + run(exprs, expected, data) + } + + test("arithmetic") { + val exprs = Array[Expression]( + Add(c1, c2), + Add(c1, c5), + Divide(c1, c5), + Subtract(c1, c5), + Multiply(c1, c5), + Remainder(c1, c5), + UnaryMinus(c1) + ) + val expecteds = Seq[(Boolean, Any)]( + (true, 0), + (false, 5), + (false, 0), + (false, -3), + (false, 4), + (false, 1), + (false, -1)) + + run(exprs, expecteds, data) + } test("literals") { assert((Literal(1) + Literal(1)).apply(null) === 2) @@ -53,22 +158,21 @@ class ExpressionEvaluationSuite extends FunSuite { * Unknown Unknown */ - val notTrueTable = - (true, false) :: - (false, true) :: - (null, null) :: Nil - + val b1 = BoundReference(0, AttributeReference("a", BooleanType)()) + val b2 = BoundReference(1, AttributeReference("b", BooleanType)()) + test("3VL Not") { - notTrueTable.foreach { - case (v, answer) => - val expr = Not(Literal(v, BooleanType)) - val result = expr.apply(null) - if (result != answer) - fail(s"$expr should not evaluate to $result, expected: $answer") } + val table = (true, false) :: (false, true) :: (null, null) :: Nil + + val exprs = Array[Expression](Not(b1)) + val inputs = table.map { case(v, answer) => new GenericRow(Array(v)) } + val expected = table.map { case(v, answer) => Seq((answer == null, answer)) } + + run(exprs, expected, inputs) } - booleanLogicTest("AND", _ && _, - (true, true, true) :: + test("3VL AND") { + val table = (true, true, true) :: (true, false, false) :: (true, null, null) :: (false, true, false) :: @@ -76,10 +180,17 @@ class ExpressionEvaluationSuite extends FunSuite { (false, null, false) :: (null, true, null) :: (null, false, false) :: - (null, null, null) :: Nil) + (null, null, null) :: Nil + + val exprs = Seq[Expression](And(b1, b2)) + val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } + val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } + + run(exprs, expected, inputs) + } - booleanLogicTest("OR", _ || _, - (true, true, true) :: + test("3VL OR") { + val table = (true, true, true) :: (true, false, true) :: (true, null, true) :: (false, true, true) :: @@ -87,10 +198,17 @@ class ExpressionEvaluationSuite extends FunSuite { (false, null, null) :: (null, true, true) :: (null, false, null) :: - (null, null, null) :: Nil) - - booleanLogicTest("=", _ === _, - (true, true, true) :: + (null, null, null) :: Nil + + val exprs = Array[Expression](Or(b1, b2)) + val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } + val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } + + run(exprs, expected, inputs) + } + + test("3VL Equals") { + val table = (true, true, true) :: (true, false, false) :: (true, null, null) :: (false, true, false) :: @@ -98,17 +216,12 @@ class ExpressionEvaluationSuite extends FunSuite { (false, null, null) :: (null, true, null) :: (null, false, null) :: - (null, null, null) :: Nil) - - def booleanLogicTest(name: String, op: (Expression, Expression) => Expression, truthTable: Seq[(Any, Any, Any)]) { - test(s"3VL $name") { - truthTable.foreach { - case (l,r,answer) => - val expr = op(Literal(l, BooleanType), Literal(r, BooleanType)) - val result = expr.apply(null) - if (result != answer) - fail(s"$expr should not evaluate to $result, expected: $answer") - } - } + (null, null, null) :: Nil + + val exprs = Array[Expression](Equals(b1, b2)) + val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } + val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } + + run(exprs, expected, inputs) } } From 91cfd3375f4603e94944952f2b6bc2b8c5d4468e Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Tue, 25 Mar 2014 15:20:43 +0800 Subject: [PATCH 2/5] add implementation for rlike/like Conflicts: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala --- .../apache/spark/sql/catalyst/SqlParser.scala | 4 + .../expressions/stringOperations.scala | 110 +++++++++++++++++- .../ExpressionEvaluationSuite.scala | 97 +++++++++++++-- .../org/apache/spark/sql/hive/HiveQl.scala | 6 +- 4 files changed, 200 insertions(+), 17 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 9dec4e3d9e4c2..83b836f94f7cc 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -114,6 +114,8 @@ class SqlParser extends StandardTokenParsers { protected val NULL = Keyword("NULL") protected val ON = Keyword("ON") protected val OR = Keyword("OR") + protected val LIKE = Keyword("LIKE") + protected val RLIKE = Keyword("RLIKE") protected val ORDER = Keyword("ORDER") protected val OUTER = Keyword("OUTER") protected val RIGHT = Keyword("RIGHT") @@ -267,6 +269,8 @@ class SqlParser extends StandardTokenParsers { termExpression ~ ">=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => GreaterThanOrEqual(e1, e2) } | termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | + termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } | + termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } | termExpression ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ { case e1 ~ _ ~ _ ~ e2 => In(e1, e2) } | diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 7584fe03cf745..04e066237e2ae 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -20,10 +20,114 @@ package catalyst package expressions import org.apache.spark.sql.catalyst.types.BooleanType +import java.util.regex.Pattern -case class Like(left: Expression, right: Expression) extends BinaryExpression { - def dataType = BooleanType - def nullable = left.nullable // Right cannot be null. +import catalyst.types.StringType +import catalyst.types.BooleanType +import catalyst.trees.TreeNode + +import catalyst.errors.`package`.TreeNodeException +import org.apache.spark.sql.catalyst.types.DataType + + +/** + * Thrown when an invalid RegEx string is found. + */ +class InvalidRegExException[TreeType <: TreeNode[_]](tree: TreeType, reason: String) extends + errors.TreeNodeException(tree, s"$reason", null) + +trait StringRegexExpression { + self: BinaryExpression => + + type EvaluatedType = Any + + def escape(v: String): String + def nullable: Boolean = true + def dataType: DataType = BooleanType + + // try cache the pattern for Literal + private lazy val cache: Pattern = right match { + case x @ Literal(value: String, StringType) => compile(value) + case _ => null + } + + protected def compile(str: Any): Pattern = str match { + // TODO or let it be null if couldn't compile the regex? + case x: String if(x != null) => Pattern.compile(escape(x)) + case x: String => null + case _ => throw new InvalidRegExException(this, "$str can not be compiled to regex pattern") + } + + protected def pattern(str: String) = if(cache == null) compile(str) else cache + + protected def filter: PartialFunction[(Row, (String, String)), Any] = { + case (row, (null, r)) => { false } + case (row, (l, null)) => { false } + case (row, (l, r)) => { + val regex = pattern(r) + if(regex == null) { + null + } else { + regex.matcher(l).matches + } + } + } + + override def apply(input: Row): Any = { + val l = left.apply(input) + if(l == null) { + null + } else { + val r = right.apply(input) + if(r == null) { + null + } else { + filter.lift(input, (l.asInstanceOf[String], r.asInstanceOf[String])).get + } + } + } +} + +/** + * Simple RegEx pattern matching function + */ +case class Like(left: Expression, right: Expression) + extends BinaryExpression with StringRegexExpression { + def symbol = "LIKE" + + // replace the _ with .{1} exactly match 1 time of any character + // replace the % with .*, match 0 or more times with any character + override def escape(v: String) = { + val sb = new StringBuilder() + var i = 0; + while (i < v.length) { + // Make a special case for "\\_" and "\\%" + val n = v.charAt(i); + if (n == '\\' && i + 1 < v.length && (v.charAt(i + 1) == '_' || v.charAt(i + 1) == '%')) { + sb.append(v.charAt(i + 1)) + i += 1 + } else { + if (n == '_') { + sb.append("."); + } else if (n == '%') { + sb.append(".*"); + } else { + sb.append(Pattern.quote(Character.toString(n))); + } + } + + i += 1 + } + + sb.toString() + } } +case class RLike(left: Expression, right: Expression) + extends BinaryExpression with StringRegexExpression { + + def symbol = "RLIKE" + + override def escape(v: String) = v +} diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 0a684855fa332..6a108b1639463 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -26,6 +26,11 @@ import org.apache.spark.sql.catalyst.types._ /* Implicit conversions */ import org.apache.spark.sql.catalyst.dsl.expressions._ +import types._ +import expressions._ +import dsl._ +import dsl.expressions._ + /** * Root class of expression evaluation test @@ -50,7 +55,7 @@ trait ExpressionEvaluationSuite extends FunSuite { */ def executor(exprs: Seq[Expression]): ExprEvalTest - val data: Row = new GenericRow(Array(1, null, 1.0, true, 4, 5, null, "abcccd")) + val data: Row = new GenericRow(Array(1, null, 1.0, true, 4, 5, null, "abcccd", "a%")) // TODO add to DSL val c1 = BoundReference(0, AttributeReference("a", IntegerType)()) @@ -61,37 +66,72 @@ trait ExpressionEvaluationSuite extends FunSuite { val c6 = BoundReference(5, AttributeReference("f", IntegerType)()) val c7 = BoundReference(6, AttributeReference("g", StringType)()) val c8 = BoundReference(7, AttributeReference("h", StringType)()) + val c9 = BoundReference(8, AttributeReference("i", StringType)()) - def verify(expected: Seq[(Boolean, Any)], result: Row, input: Row) { + /** + * Compare each of the field if it equals the expected value. + * + * expected is a sequence of (Any, Any), + * and the first element indicates: + * true: the expected value is field is null + * false: the expected value is not null + * Exception Class: the expected exception class while computing the value + * the second element is the real value when first element equals false(not null) + */ + def verify(expected: Seq[(Any, Any)], result: Row, input: Row) { Seq.tabulate(expected.size) { i => expected(i) match { case (false, expected) => { - assert(result.isNullAt(i) == false, s"Input:($input), Output field:$i shouldn't be null") + assert(result.isNullAt(i) == false, + s"Input:($input), Output field:$i shouldn't be null") + val real = result.apply(i) - assert(real == expected, s"Input:($input), Output field:$i is expected as $expected, but got $real") + assert(real == expected, + s"Input:($input), Output field:$i is expected as $expected, but got $real") } case (true, _) => { - assert(result.isNullAt(i), s"Input:($input), Output field:$i is expected as null") + assert(result.isNullAt(i) == true, s"Input:($input), Output field:$i is expected as null") + } + case (exception: Class[_], _) => { + assert(result.isNullAt(i) == false, + s"Input:($input), Output field:$i should be exception") + + val real = result.apply(i).getClass.getName + val expect = exception.getName + assert(real == expect, + s"Input:($input), Output field:$i expect exception $expect, but got $real") } } } } - def verify(expecteds: Seq[Seq[(Boolean, Any)]], results: Seq[Row], inputs: Seq[Row]) { + def verify(expecteds: Seq[Seq[(Any, Any)]], results: Seq[Row], inputs: Seq[Row]) { Range(0, expecteds.length).foreach { i => verify(expecteds(i), results(i), inputs(i)) } } - def run(exprs: Seq[Expression], expected: Seq[(Boolean, Any)], input: Row) { + def proc(tester: ExprEvalTest, input: Row): Row = { + try { + tester.engine.apply(input) + } catch { + case x: Any => { + println(x.printStackTrace()) + new GenericRow(Array(x.asInstanceOf[Any])) + } + } + } + + def run(exprs: Seq[Expression], expected: Seq[(Any, Any)], input: Row) { val tester = executor(exprs) - verify(expected, tester.engine.apply(input), input) + + verify(expected, proc(tester,input), input) } - def run(exprs: Seq[Expression], expecteds: Seq[Seq[(Boolean, Any)]], inputs: Seq[Row]) { + def run(exprs: Seq[Expression], expecteds: Seq[Seq[(Any, Any)]], inputs: Seq[Row]) { val tester = executor(exprs) - verify(expecteds, inputs.map(tester.engine.apply(_)), inputs) + verify(expecteds, inputs.map(proc(tester,_)), inputs) } test("logical") { @@ -133,6 +173,43 @@ trait ExpressionEvaluationSuite extends FunSuite { run(exprs, expecteds, data) } + test("string like / rlike") { + val exprs = Seq( + Like(c7, Literal("a", StringType)), + Like(c7, Literal(null, StringType)), + Like(c8, Literal(null, StringType)), + Like(c8, Literal("a_c", StringType)), + Like(c8, Literal("a%c", StringType)), + Like(c8, Literal("a%d", StringType)), + Like(c8, Literal("a\\%d", StringType)), // to escape the % + Like(c8, c9), + RLike(c7, Literal("a+", StringType)), + RLike(c7, Literal(null, StringType)), + RLike(c8, Literal(null, StringType)), + RLike(c8, Literal("a.*", StringType)) + ) + + val expecteds = Seq( + (true, false), + (true, false), + (true, false), + (false, false), + (false, false), + (false, true), + (false, false), + (false, true), + (true, false), + (true, false), + (true, false), + (false, true)) + + run(exprs, expecteds, data) + + val expr = Seq(RLike(c8, Literal("[a.(*])", StringType))) + val expected = Seq((classOf[java.util.regex.PatternSyntaxException], false)) + run(expr, expected, data) + } + test("literals") { assert((Literal(1) + Literal(1)).apply(null) === 2) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 8e76a7348e957..7a2ecb165ff91 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -848,10 +848,8 @@ object HiveQl { case Token(">=", left :: right:: Nil) => GreaterThanOrEqual(nodeToExpr(left), nodeToExpr(right)) case Token("<", left :: right:: Nil) => LessThan(nodeToExpr(left), nodeToExpr(right)) case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right)) - case Token("LIKE", left :: right:: Nil) => - UnresolvedFunction("LIKE", Seq(nodeToExpr(left), nodeToExpr(right))) - case Token("RLIKE", left :: right:: Nil) => - UnresolvedFunction("RLIKE", Seq(nodeToExpr(left), nodeToExpr(right))) + case Token("LIKE", left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right)) + case Token("RLIKE", left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right)) case Token("REGEXP", left :: right:: Nil) => UnresolvedFunction("REGEXP", Seq(nodeToExpr(left), nodeToExpr(right))) case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) => From 319edb73644b4b0f310d593fb4a46bc611e2cfa7 Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Wed, 26 Mar 2014 09:38:08 +0800 Subject: [PATCH 3/5] change to spark code style --- .../sql/catalyst/expressions/stringOperations.scala | 10 ++++------ .../expressions/ExpressionEvaluationSuite.scala | 9 +++------ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 04e066237e2ae..918cfed9d5482 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -19,15 +19,13 @@ package org.apache.spark.sql package catalyst package expressions -import org.apache.spark.sql.catalyst.types.BooleanType import java.util.regex.Pattern -import catalyst.types.StringType -import catalyst.types.BooleanType -import catalyst.trees.TreeNode - -import catalyst.errors.`package`.TreeNodeException import org.apache.spark.sql.catalyst.types.DataType +import org.apache.spark.sql.catalyst.types.StringType +import org.apache.spark.sql.catalyst.types.BooleanType +import org.apache.spark.sql.catalyst.trees.TreeNode +import org.apache.spark.sql.catalyst.errors.`package`.TreeNodeException /** diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 6a108b1639463..776f4495f346d 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -26,10 +26,8 @@ import org.apache.spark.sql.catalyst.types._ /* Implicit conversions */ import org.apache.spark.sql.catalyst.dsl.expressions._ -import types._ -import expressions._ -import dsl._ -import dsl.expressions._ +import org.apache.spark.sql.catalyst.types._ +import org.apache.spark.sql.catalyst.dsl._ /** @@ -42,7 +40,7 @@ trait ExprEvalTest { } case class InterpretExprEvalTest(exprs: Seq[Expression]) extends ExprEvalTest { - override def engine: Execution = new InterpretedProjection(exprs) + override def engine: Execution = new Projection(exprs) } class InterpretExpressionEvaluationSuite extends ExpressionEvaluationSuite { @@ -116,7 +114,6 @@ trait ExpressionEvaluationSuite extends FunSuite { tester.engine.apply(input) } catch { case x: Any => { - println(x.printStackTrace()) new GenericRow(Array(x.asInstanceOf[Any])) } } From aeeb1d78dd2dfcac5f9fd0c750f7273387152efc Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Thu, 27 Mar 2014 13:12:50 +0800 Subject: [PATCH 4/5] Simplify the implementation/unit test of RLike/Like --- .../apache/spark/sql/catalyst/SqlParser.scala | 2 + .../expressions/stringOperations.scala | 42 +-- .../ExpressionEvaluationSuite.scala | 338 ++++++------------ .../org/apache/spark/sql/hive/HiveQl.scala | 3 +- 4 files changed, 135 insertions(+), 250 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala index 83b836f94f7cc..0c851c2ee2183 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/SqlParser.scala @@ -116,6 +116,7 @@ class SqlParser extends StandardTokenParsers { protected val OR = Keyword("OR") protected val LIKE = Keyword("LIKE") protected val RLIKE = Keyword("RLIKE") + protected val REGEXP = Keyword("REGEXP") protected val ORDER = Keyword("ORDER") protected val OUTER = Keyword("OUTER") protected val RIGHT = Keyword("RIGHT") @@ -270,6 +271,7 @@ class SqlParser extends StandardTokenParsers { termExpression ~ "!=" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | termExpression ~ "<>" ~ termExpression ^^ { case e1 ~ _ ~ e2 => Not(Equals(e1, e2)) } | termExpression ~ RLIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } | + termExpression ~ REGEXP ~ termExpression ^^ { case e1 ~ _ ~ e2 => RLike(e1, e2) } | termExpression ~ LIKE ~ termExpression ^^ { case e1 ~ _ ~ e2 => Like(e1, e2) } | termExpression ~ IN ~ "(" ~ rep1sep(termExpression, ",") <~ ")" ^^ { case e1 ~ _ ~ _ ~ e2 => In(e1, e2) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 918cfed9d5482..5ff2b1f85f039 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -28,18 +28,12 @@ import org.apache.spark.sql.catalyst.trees.TreeNode import org.apache.spark.sql.catalyst.errors.`package`.TreeNodeException -/** - * Thrown when an invalid RegEx string is found. - */ -class InvalidRegExException[TreeType <: TreeNode[_]](tree: TreeType, reason: String) extends - errors.TreeNodeException(tree, s"$reason", null) - trait StringRegexExpression { self: BinaryExpression => type EvaluatedType = Any - def escape(v: String): String + def escape(v: String): String = v def nullable: Boolean = true def dataType: DataType = BooleanType @@ -49,28 +43,15 @@ trait StringRegexExpression { case _ => null } - protected def compile(str: Any): Pattern = str match { - // TODO or let it be null if couldn't compile the regex? - case x: String if(x != null) => Pattern.compile(escape(x)) - case x: String => null - case _ => throw new InvalidRegExException(this, "$str can not be compiled to regex pattern") + protected def compile(str: String): Pattern = if(str == null) { + null + } else { + // Let it raise exception if couldn't compile the regex string + Pattern.compile(escape(str)) } - + protected def pattern(str: String) = if(cache == null) compile(str) else cache - protected def filter: PartialFunction[(Row, (String, String)), Any] = { - case (row, (null, r)) => { false } - case (row, (l, null)) => { false } - case (row, (l, r)) => { - val regex = pattern(r) - if(regex == null) { - null - } else { - regex.matcher(l).matches - } - } - } - override def apply(input: Row): Any = { val l = left.apply(input) if(l == null) { @@ -80,7 +61,12 @@ trait StringRegexExpression { if(r == null) { null } else { - filter.lift(input, (l.asInstanceOf[String], r.asInstanceOf[String])).get + val regex = pattern(r.asInstanceOf[String]) + if(regex == null) { + null + } else { + regex.matcher(l.asInstanceOf[String]).matches + } } } } @@ -126,6 +112,4 @@ case class RLike(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { def symbol = "RLIKE" - - override def escape(v: String) = v } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index 776f4495f346d..efcffcfe5f1ff 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -26,186 +26,7 @@ import org.apache.spark.sql.catalyst.types._ /* Implicit conversions */ import org.apache.spark.sql.catalyst.dsl.expressions._ -import org.apache.spark.sql.catalyst.types._ -import org.apache.spark.sql.catalyst.dsl._ - - -/** - * Root class of expression evaluation test - */ -trait ExprEvalTest { - type Execution = (Row => Row) - - def engine: Execution -} - -case class InterpretExprEvalTest(exprs: Seq[Expression]) extends ExprEvalTest { - override def engine: Execution = new Projection(exprs) -} - -class InterpretExpressionEvaluationSuite extends ExpressionEvaluationSuite { - override def executor(exprs: Seq[Expression]) = InterpretExprEvalTest(exprs) -} - -trait ExpressionEvaluationSuite extends FunSuite { - /** - * The sub classes need to create the ExprEvalTest object - */ - def executor(exprs: Seq[Expression]): ExprEvalTest - - val data: Row = new GenericRow(Array(1, null, 1.0, true, 4, 5, null, "abcccd", "a%")) - - // TODO add to DSL - val c1 = BoundReference(0, AttributeReference("a", IntegerType)()) - val c2 = BoundReference(1, AttributeReference("b", IntegerType)()) - val c3 = BoundReference(2, AttributeReference("c", DoubleType)()) - val c4 = BoundReference(3, AttributeReference("d", BooleanType)()) - val c5 = BoundReference(4, AttributeReference("e", IntegerType)()) - val c6 = BoundReference(5, AttributeReference("f", IntegerType)()) - val c7 = BoundReference(6, AttributeReference("g", StringType)()) - val c8 = BoundReference(7, AttributeReference("h", StringType)()) - val c9 = BoundReference(8, AttributeReference("i", StringType)()) - - /** - * Compare each of the field if it equals the expected value. - * - * expected is a sequence of (Any, Any), - * and the first element indicates: - * true: the expected value is field is null - * false: the expected value is not null - * Exception Class: the expected exception class while computing the value - * the second element is the real value when first element equals false(not null) - */ - def verify(expected: Seq[(Any, Any)], result: Row, input: Row) { - Seq.tabulate(expected.size) { i => - expected(i) match { - case (false, expected) => { - assert(result.isNullAt(i) == false, - s"Input:($input), Output field:$i shouldn't be null") - - val real = result.apply(i) - assert(real == expected, - s"Input:($input), Output field:$i is expected as $expected, but got $real") - } - case (true, _) => { - assert(result.isNullAt(i) == true, s"Input:($input), Output field:$i is expected as null") - } - case (exception: Class[_], _) => { - assert(result.isNullAt(i) == false, - s"Input:($input), Output field:$i should be exception") - - val real = result.apply(i).getClass.getName - val expect = exception.getName - assert(real == expect, - s"Input:($input), Output field:$i expect exception $expect, but got $real") - } - } - } - } - - def verify(expecteds: Seq[Seq[(Any, Any)]], results: Seq[Row], inputs: Seq[Row]) { - Range(0, expecteds.length).foreach { i => - verify(expecteds(i), results(i), inputs(i)) - } - } - - def proc(tester: ExprEvalTest, input: Row): Row = { - try { - tester.engine.apply(input) - } catch { - case x: Any => { - new GenericRow(Array(x.asInstanceOf[Any])) - } - } - } - - def run(exprs: Seq[Expression], expected: Seq[(Any, Any)], input: Row) { - val tester = executor(exprs) - - verify(expected, proc(tester,input), input) - } - - def run(exprs: Seq[Expression], expecteds: Seq[Seq[(Any, Any)]], inputs: Seq[Row]) { - val tester = executor(exprs) - - verify(expecteds, inputs.map(proc(tester,_)), inputs) - } - - test("logical") { - val expected = Seq[(Boolean, Any)]( - (false, false), - (true, -1), - (false, true), - (false, true), - (false, false)) - - val exprs = Seq[Expression](And(LessThan(Cast(c1, DoubleType), c3), LessThan(c1, c2)), - Or(LessThan(Cast(c1, DoubleType), c3), LessThan(c1, c2)), - IsNull(c2), - IsNotNull(c3), - Not(c4)) - - run(exprs, expected, data) - } - - test("arithmetic") { - val exprs = Array[Expression]( - Add(c1, c2), - Add(c1, c5), - Divide(c1, c5), - Subtract(c1, c5), - Multiply(c1, c5), - Remainder(c1, c5), - UnaryMinus(c1) - ) - val expecteds = Seq[(Boolean, Any)]( - (true, 0), - (false, 5), - (false, 0), - (false, -3), - (false, 4), - (false, 1), - (false, -1)) - - run(exprs, expecteds, data) - } - - test("string like / rlike") { - val exprs = Seq( - Like(c7, Literal("a", StringType)), - Like(c7, Literal(null, StringType)), - Like(c8, Literal(null, StringType)), - Like(c8, Literal("a_c", StringType)), - Like(c8, Literal("a%c", StringType)), - Like(c8, Literal("a%d", StringType)), - Like(c8, Literal("a\\%d", StringType)), // to escape the % - Like(c8, c9), - RLike(c7, Literal("a+", StringType)), - RLike(c7, Literal(null, StringType)), - RLike(c8, Literal(null, StringType)), - RLike(c8, Literal("a.*", StringType)) - ) - - val expecteds = Seq( - (true, false), - (true, false), - (true, false), - (false, false), - (false, false), - (false, true), - (false, false), - (false, true), - (true, false), - (true, false), - (true, false), - (false, true)) - - run(exprs, expecteds, data) - - val expr = Seq(RLike(c8, Literal("[a.(*])", StringType))) - val expected = Seq((classOf[java.util.regex.PatternSyntaxException], false)) - run(expr, expected, data) - } +class ExpressionEvaluationSuite extends FunSuite { test("literals") { assert((Literal(1) + Literal(1)).apply(null) === 2) @@ -232,21 +53,22 @@ trait ExpressionEvaluationSuite extends FunSuite { * Unknown Unknown */ - val b1 = BoundReference(0, AttributeReference("a", BooleanType)()) - val b2 = BoundReference(1, AttributeReference("b", BooleanType)()) - - test("3VL Not") { - val table = (true, false) :: (false, true) :: (null, null) :: Nil + val notTrueTable = + (true, false) :: + (false, true) :: + (null, null) :: Nil - val exprs = Array[Expression](Not(b1)) - val inputs = table.map { case(v, answer) => new GenericRow(Array(v)) } - val expected = table.map { case(v, answer) => Seq((answer == null, answer)) } - - run(exprs, expected, inputs) + test("3VL Not") { + notTrueTable.foreach { + case (v, answer) => + val expr = Not(Literal(v, BooleanType)) + val result = expr.apply(null) + if (result != answer) + fail(s"$expr should not evaluate to $result, expected: $answer") } } - test("3VL AND") { - val table = (true, true, true) :: + booleanLogicTest("AND", _ && _, + (true, true, true) :: (true, false, false) :: (true, null, null) :: (false, true, false) :: @@ -254,17 +76,10 @@ trait ExpressionEvaluationSuite extends FunSuite { (false, null, false) :: (null, true, null) :: (null, false, false) :: - (null, null, null) :: Nil - - val exprs = Seq[Expression](And(b1, b2)) - val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } - val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } - - run(exprs, expected, inputs) - } + (null, null, null) :: Nil) - test("3VL OR") { - val table = (true, true, true) :: + booleanLogicTest("OR", _ || _, + (true, true, true) :: (true, false, true) :: (true, null, true) :: (false, true, true) :: @@ -272,17 +87,10 @@ trait ExpressionEvaluationSuite extends FunSuite { (false, null, null) :: (null, true, true) :: (null, false, null) :: - (null, null, null) :: Nil - - val exprs = Array[Expression](Or(b1, b2)) - val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } - val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } - - run(exprs, expected, inputs) - } - - test("3VL Equals") { - val table = (true, true, true) :: + (null, null, null) :: Nil) + + booleanLogicTest("=", _ === _, + (true, true, true) :: (true, false, false) :: (true, null, null) :: (false, true, false) :: @@ -290,12 +98,104 @@ trait ExpressionEvaluationSuite extends FunSuite { (false, null, null) :: (null, true, null) :: (null, false, null) :: - (null, null, null) :: Nil - - val exprs = Array[Expression](Equals(b1, b2)) - val inputs = table.map { case(v1, v2, answer) => new GenericRow(Array(v1, v2)) } - val expected = table.map { case(v1, v2, answer) => Seq((answer == null, answer)) } + (null, null, null) :: Nil) + + def booleanLogicTest(name: String, op: (Expression, Expression) => Expression, truthTable: Seq[(Any, Any, Any)]) { + test(s"3VL $name") { + truthTable.foreach { + case (l,r,answer) => + val expr = op(Literal(l, BooleanType), Literal(r, BooleanType)) + val result = expr.apply(null) + if (result != answer) + fail(s"$expr should not evaluate to $result, expected: $answer") + } + } + } + + val c1 = BoundReference(0, AttributeReference("a", StringType)()) // null + val c2 = BoundReference(1, AttributeReference("b", StringType)()) // "addb" + val c3 = BoundReference(2, AttributeReference("c", StringType)()) // "a" + val c4 = BoundReference(3, AttributeReference("d", StringType)()) // "abdef" + val c5 = BoundReference(4, AttributeReference("e", StringType)()) // "a_%b" + val c6 = BoundReference(5, AttributeReference("f", StringType)()) // "a\\__b" + val c7 = BoundReference(6, AttributeReference("g", StringType)()) // "a%\\%b" + val c8 = BoundReference(7, AttributeReference("h", StringType)()) // "a%" + val c9 = BoundReference(8, AttributeReference("i", StringType)()) // "**" + + val cs1: String = null + val cs2 = "addb" + val cs3 = "a" + val cs4 = "abdef" + val cs5 = "a_%b" + val cs6 = "a\\__b" + val cs7 = "a%\\%b" + val cs8 = "a%" + val cs9 = "**" + val regexData: Row = new GenericRow(Array[Any](cs1, cs2, cs3, cs4, cs5, cs6, cs7, cs8, cs9)) - run(exprs, expected, inputs) + regexTest(regexData, "Like - pattern with Dynamic regex string", Like(_, _), + (c1, c3, null) :: // null, "a" + (c1, c1, null) :: // null, null + (c4, c4, true) :: // "abdef", "abdef" + (c5, c6, true) :: // "a_%b", "a\\__b" + (c2, c5, true) :: // "addb", "a_%b" + (c2, c6, false) :: // "addb", "a\\__b" + (c2, c7, false) :: // "addb", "a%\\%b" + (c5, c7, true) :: // "a_%b", "a%\\%b" + (c2, c8, true) :: // "addb", "a%" + (c2, c9, false) :: // "addb", "**" + Nil + ) + + regexTest(regexData, "Like - pattern with Literal regex string", Like(_, _), + (Literal(cs1), Literal(cs3), null) :: // null, "a" + (Literal(cs1), Literal(cs1), null) :: // null, null + (Literal(cs4), Literal(cs4), true) :: // "abdef", "abdef" + (Literal(cs5), Literal(cs6), true) :: // "a_%b", "a\\__b" + (Literal(cs2), Literal(cs5), true) :: // "addb", "a_%b" + (Literal(cs2), Literal(cs6), false) :: // "addb", "a\\__b" + (Literal(cs2), Literal(cs7), false) :: // "addb", "a%\\%b" + (Literal(cs5), Literal(cs7), true) :: // "a_%b", "a%\\%b" + (Literal(cs2), Literal(cs8), true) :: // "addb", "a%" + (Literal(cs2), Literal(cs9), false) :: // "addb", "**" + Nil + ) + + regexTest(regexData, "RLike - pattern with Literal regex string", RLike(_, _), + (Literal(cs4), Literal(cs4), true) :: // "abdef", "abdef" + (Literal("abbbbc"), Literal("a.*c"), true) :: + (Literal("abbbbc"), Literal("**"), classOf[java.util.regex.PatternSyntaxException]) :: + Nil + ) + + def regexTest(row: Row, name: String, op: (Expression, Expression) => Expression, + truthTable: Seq[(Expression, Expression, Any)]) { + + test(s"regex: $name") { + truthTable.foreach { + case (l, r, null) => + val expr = op(l, r) + val result = expr.apply(row) + if (result != null) fail(s"$expr should not evaluate to $result, expected: null") + case (l, r, answer: Class[_]) => + val expr = op(l, r) + try{ + expr.apply(row) + // will fail if no exception thrown + fail(s"$expr should throw exception ${answer.getCanonicalName()}, but it didn't") + } catch { + // raise by fail() method + case x if (x.isInstanceOf[org.scalatest.exceptions.TestFailedException]) => throw x + // the same exception as expected it, do nothing + case x if answer.getCanonicalName() == x.getClass().getCanonicalName() => + case x => fail(s"$expr should not throw exception $x, expected: $answer") + } + case (l, r, answer) => + val expr = op(l, r) + val result = expr.apply(row) + if (result != answer) + fail(s"$expr should not evaluate to $result, expected: $answer") + } + } } } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala index 7a2ecb165ff91..280dbfc1be7e6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveQl.scala @@ -850,8 +850,7 @@ object HiveQl { case Token("<=", left :: right:: Nil) => LessThanOrEqual(nodeToExpr(left), nodeToExpr(right)) case Token("LIKE", left :: right:: Nil) => Like(nodeToExpr(left), nodeToExpr(right)) case Token("RLIKE", left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right)) - case Token("REGEXP", left :: right:: Nil) => - UnresolvedFunction("REGEXP", Seq(nodeToExpr(left), nodeToExpr(right))) + case Token("REGEXP", left :: right:: Nil) => RLike(nodeToExpr(left), nodeToExpr(right)) case Token("TOK_FUNCTION", Token("TOK_ISNOTNULL", Nil) :: child :: Nil) => IsNotNull(nodeToExpr(child)) case Token("TOK_FUNCTION", Token("TOK_ISNULL", Nil) :: child :: Nil) => From 84f72e9012828062fe41d1d849b91996c4b70175 Mon Sep 17 00:00:00 2001 From: Cheng Hao Date: Fri, 28 Mar 2014 13:14:48 +0800 Subject: [PATCH 5/5] fix bug in RLike/Like & Simplify the unit test --- .../spark/sql/catalyst/dsl/package.scala | 10 +- .../expressions/stringOperations.scala | 10 +- .../ExpressionEvaluationSuite.scala | 160 +++++++++--------- 3 files changed, 95 insertions(+), 85 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala index e6255bcafa8aa..6e9ef607114d3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/dsl/package.scala @@ -71,6 +71,9 @@ package object dsl { def === (other: Expression) = Equals(expr, other) def != (other: Expression) = Not(Equals(expr, other)) + def like(other: Expression) = Like(expr, other) + def rlike(other: Expression) = RLike(expr, other) + def asc = SortOrder(expr, Ascending) def desc = SortOrder(expr, Descending) @@ -91,7 +94,10 @@ package object dsl { implicit def symbolToUnresolvedAttribute(s: Symbol) = analysis.UnresolvedAttribute(s.name) implicit class DslSymbol(sym: Symbol) extends ImplicitAttribute { def s = sym.name } - implicit class DslString(val s: String) extends ImplicitAttribute + implicit class DslString(val s: String) extends ImplicitOperators { + def expr: Expression = Literal(s) + def attr = analysis.UnresolvedAttribute(s) + } abstract class ImplicitAttribute extends ImplicitOperators { def s: String @@ -111,6 +117,8 @@ package object dsl { // Protobuf terminology def required = a.withNullability(false) + + def at(ordinal: Int) = BoundReference(ordinal, a) } } diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala index 5ff2b1f85f039..7d665e2e02aa8 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringOperations.scala @@ -33,7 +33,9 @@ trait StringRegexExpression { type EvaluatedType = Any - def escape(v: String): String = v + def escape(v: String): String + def matches(regex: Pattern, str: String): Boolean + def nullable: Boolean = true def dataType: DataType = BooleanType @@ -65,7 +67,7 @@ trait StringRegexExpression { if(regex == null) { null } else { - regex.matcher(l.asInstanceOf[String]).matches + matches(regex, l.asInstanceOf[String]) } } } @@ -106,10 +108,14 @@ case class Like(left: Expression, right: Expression) sb.toString() } + + override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).matches() } case class RLike(left: Expression, right: Expression) extends BinaryExpression with StringRegexExpression { def symbol = "RLIKE" + override def escape(v: String): String = v + override def matches(regex: Pattern, str: String): Boolean = regex.matcher(str).find(0) } diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala index efcffcfe5f1ff..162c36a0d673e 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/ExpressionEvaluationSuite.scala @@ -111,91 +111,87 @@ class ExpressionEvaluationSuite extends FunSuite { } } } + + def evaluate(expression: Expression, inputRow: Row = EmptyRow): Any = { + expression.apply(inputRow) + } + + def checkEvaluation(expression: Expression, expected: Any, inputRow: Row = EmptyRow): Unit = { + val actual = try evaluate(expression, inputRow) catch { + case e: Exception => fail(s"Exception evaluating $expression", e) + } + if(actual != expected) { + val input = if(inputRow == EmptyRow) "" else s", input: $inputRow" + fail(s"Incorrect Evaluation: $expression, actual: $actual, expected: $expected$input") + } + } + + test("LIKE literal Regular Expression") { + checkEvaluation(Literal(null, StringType).like("a"), null) + checkEvaluation(Literal(null, StringType).like(Literal(null, StringType)), null) + checkEvaluation("abdef" like "abdef", true) + checkEvaluation("a_%b" like "a\\__b", true) + checkEvaluation("addb" like "a_%b", true) + checkEvaluation("addb" like "a\\__b", false) + checkEvaluation("addb" like "a%\\%b", false) + checkEvaluation("a_%b" like "a%\\%b", true) + checkEvaluation("addb" like "a%", true) + checkEvaluation("addb" like "**", false) + checkEvaluation("abc" like "a%", true) + checkEvaluation("abc" like "b%", false) + checkEvaluation("abc" like "bc%", false) + } - val c1 = BoundReference(0, AttributeReference("a", StringType)()) // null - val c2 = BoundReference(1, AttributeReference("b", StringType)()) // "addb" - val c3 = BoundReference(2, AttributeReference("c", StringType)()) // "a" - val c4 = BoundReference(3, AttributeReference("d", StringType)()) // "abdef" - val c5 = BoundReference(4, AttributeReference("e", StringType)()) // "a_%b" - val c6 = BoundReference(5, AttributeReference("f", StringType)()) // "a\\__b" - val c7 = BoundReference(6, AttributeReference("g", StringType)()) // "a%\\%b" - val c8 = BoundReference(7, AttributeReference("h", StringType)()) // "a%" - val c9 = BoundReference(8, AttributeReference("i", StringType)()) // "**" - - val cs1: String = null - val cs2 = "addb" - val cs3 = "a" - val cs4 = "abdef" - val cs5 = "a_%b" - val cs6 = "a\\__b" - val cs7 = "a%\\%b" - val cs8 = "a%" - val cs9 = "**" - val regexData: Row = new GenericRow(Array[Any](cs1, cs2, cs3, cs4, cs5, cs6, cs7, cs8, cs9)) + test("LIKE Non-literal Regular Expression") { + val regEx = 'a.string.at(0) + checkEvaluation("abcd" like regEx, null, new GenericRow(Array[Any](null))) + checkEvaluation("abdef" like regEx, true, new GenericRow(Array[Any]("abdef"))) + checkEvaluation("a_%b" like regEx, true, new GenericRow(Array[Any]("a\\__b"))) + checkEvaluation("addb" like regEx, true, new GenericRow(Array[Any]("a_%b"))) + checkEvaluation("addb" like regEx, false, new GenericRow(Array[Any]("a\\__b"))) + checkEvaluation("addb" like regEx, false, new GenericRow(Array[Any]("a%\\%b"))) + checkEvaluation("a_%b" like regEx, true, new GenericRow(Array[Any]("a%\\%b"))) + checkEvaluation("addb" like regEx, true, new GenericRow(Array[Any]("a%"))) + checkEvaluation("addb" like regEx, false, new GenericRow(Array[Any]("**"))) + checkEvaluation("abc" like regEx, true, new GenericRow(Array[Any]("a%"))) + checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("b%"))) + checkEvaluation("abc" like regEx, false, new GenericRow(Array[Any]("bc%"))) + } + + test("RLIKE literal Regular Expression") { + checkEvaluation("abdef" rlike "abdef", true) + checkEvaluation("abbbbc" rlike "a.*c", true) - regexTest(regexData, "Like - pattern with Dynamic regex string", Like(_, _), - (c1, c3, null) :: // null, "a" - (c1, c1, null) :: // null, null - (c4, c4, true) :: // "abdef", "abdef" - (c5, c6, true) :: // "a_%b", "a\\__b" - (c2, c5, true) :: // "addb", "a_%b" - (c2, c6, false) :: // "addb", "a\\__b" - (c2, c7, false) :: // "addb", "a%\\%b" - (c5, c7, true) :: // "a_%b", "a%\\%b" - (c2, c8, true) :: // "addb", "a%" - (c2, c9, false) :: // "addb", "**" - Nil - ) - - regexTest(regexData, "Like - pattern with Literal regex string", Like(_, _), - (Literal(cs1), Literal(cs3), null) :: // null, "a" - (Literal(cs1), Literal(cs1), null) :: // null, null - (Literal(cs4), Literal(cs4), true) :: // "abdef", "abdef" - (Literal(cs5), Literal(cs6), true) :: // "a_%b", "a\\__b" - (Literal(cs2), Literal(cs5), true) :: // "addb", "a_%b" - (Literal(cs2), Literal(cs6), false) :: // "addb", "a\\__b" - (Literal(cs2), Literal(cs7), false) :: // "addb", "a%\\%b" - (Literal(cs5), Literal(cs7), true) :: // "a_%b", "a%\\%b" - (Literal(cs2), Literal(cs8), true) :: // "addb", "a%" - (Literal(cs2), Literal(cs9), false) :: // "addb", "**" - Nil - ) - - regexTest(regexData, "RLike - pattern with Literal regex string", RLike(_, _), - (Literal(cs4), Literal(cs4), true) :: // "abdef", "abdef" - (Literal("abbbbc"), Literal("a.*c"), true) :: - (Literal("abbbbc"), Literal("**"), classOf[java.util.regex.PatternSyntaxException]) :: - Nil - ) - - def regexTest(row: Row, name: String, op: (Expression, Expression) => Expression, - truthTable: Seq[(Expression, Expression, Any)]) { + checkEvaluation("fofo" rlike "^fo", true) + checkEvaluation("fo\no" rlike "^fo\no$", true) + checkEvaluation("Bn" rlike "^Ba*n", true) + checkEvaluation("afofo" rlike "fo", true) + checkEvaluation("afofo" rlike "^fo", false) + checkEvaluation("Baan" rlike "^Ba?n", false) + checkEvaluation("axe" rlike "pi|apa", false) + checkEvaluation("pip" rlike "^(pi)*$", false) - test(s"regex: $name") { - truthTable.foreach { - case (l, r, null) => - val expr = op(l, r) - val result = expr.apply(row) - if (result != null) fail(s"$expr should not evaluate to $result, expected: null") - case (l, r, answer: Class[_]) => - val expr = op(l, r) - try{ - expr.apply(row) - // will fail if no exception thrown - fail(s"$expr should throw exception ${answer.getCanonicalName()}, but it didn't") - } catch { - // raise by fail() method - case x if (x.isInstanceOf[org.scalatest.exceptions.TestFailedException]) => throw x - // the same exception as expected it, do nothing - case x if answer.getCanonicalName() == x.getClass().getCanonicalName() => - case x => fail(s"$expr should not throw exception $x, expected: $answer") - } - case (l, r, answer) => - val expr = op(l, r) - val result = expr.apply(row) - if (result != answer) - fail(s"$expr should not evaluate to $result, expected: $answer") - } + checkEvaluation("abc" rlike "^ab", true) + checkEvaluation("abc" rlike "^bc", false) + checkEvaluation("abc" rlike "^ab", true) + checkEvaluation("abc" rlike "^bc", false) + + intercept[java.util.regex.PatternSyntaxException] { + evaluate("abbbbc" rlike "**") + } + } + + test("RLIKE Non-literal Regular Expression") { + val regEx = 'a.string.at(0) + checkEvaluation("abdef" rlike regEx, true, new GenericRow(Array[Any]("abdef"))) + checkEvaluation("abbbbc" rlike regEx, true, new GenericRow(Array[Any]("a.*c"))) + checkEvaluation("fofo" rlike regEx, true, new GenericRow(Array[Any]("^fo"))) + checkEvaluation("fo\no" rlike regEx, true, new GenericRow(Array[Any]("^fo\no$"))) + checkEvaluation("Bn" rlike regEx, true, new GenericRow(Array[Any]("^Ba*n"))) + + intercept[java.util.regex.PatternSyntaxException] { + evaluate("abbbbc" rlike regEx, new GenericRow(Array[Any]("**"))) } } } +