Skip to content

Commit

Permalink
[SPARK-21654][SQL] Complement SQL predicates expression description
Browse files Browse the repository at this point in the history
## What changes were proposed in this pull request?

SQL predicates don't have complete expression description. This patch goes to complement the description by adding arguments, examples.

This change also adds related test cases for the SQL predicate expressions.

## How was this patch tested?

Existing tests. And added predicate test.

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes #18869 from viirya/SPARK-21654.
  • Loading branch information
viirya authored and gatorsmile committed Sep 4, 2017
1 parent 07fd68a commit 9f30d92
Show file tree
Hide file tree
Showing 4 changed files with 460 additions and 13 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -133,8 +133,25 @@ case class Not(child: Expression)
/**
* Evaluates to `true` if `list` contains `value`.
*/
// scalastyle:off line.size.limit
@ExpressionDescription(
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.")
usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.",
arguments = """
Arguments:
* expr1, expr2, expr3, ... - the arguments must be same type.
""",
examples = """
Examples:
> SELECT 1 _FUNC_(1, 2, 3);
true
> SELECT 1 _FUNC_(2, 3, 4);
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3));
false
> SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3));
true
""")
// scalastyle:on line.size.limit
case class In(value: Expression, list: Seq[Expression]) extends Predicate {

require(list != null, "list should not be null")
Expand Down Expand Up @@ -491,7 +508,24 @@ object Equality {
// TODO: although map type is not orderable, technically map type should be able to be used
// in equality comparison
@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be used in equality comparison. Map type is not supported.
For complex types such array/struct, the data types of fields must be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
NULL
> SELECT NULL _FUNC_ NULL;
NULL
""")
case class EqualTo(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -510,6 +544,23 @@ case class EqualTo(left: Expression, right: Expression)
usage = """
expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands,
but returns true if both are null, false if one of the them is null.
""",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be used in equality comparison. Map type is not supported.
For complex types such array/struct, the data types of fields must be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1 _FUNC_ '1';
true
> SELECT true _FUNC_ NULL;
false
> SELECT NULL _FUNC_ NULL;
true
""")
case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison {

Expand Down Expand Up @@ -540,7 +591,27 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 1 _FUNC_ 2;
true
> SELECT 1.1 _FUNC_ '1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class LessThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -550,7 +621,27 @@ case class LessThan(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 2;
true
> SELECT 1.0 _FUNC_ '1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
true
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class LessThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -560,7 +651,27 @@ case class LessThanOrEqual(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2 _FUNC_ '1.1';
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class GreaterThan(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand All @@ -570,7 +681,27 @@ case class GreaterThan(left: Expression, right: Expression)
}

@ExpressionDescription(
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.")
usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.",
arguments = """
Arguments:
* expr1, expr2 - the two expressions must be same type or can be casted to a common type,
and must be a type that can be ordered. For example, map type is not orderable, so it
is not supported. For complex types such array/struct, the data types of fields must
be orderable.
""",
examples = """
Examples:
> SELECT 2 _FUNC_ 1;
true
> SELECT 2.0 _FUNC_ '2.1';
false
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52');
true
> SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52');
false
> SELECT 1 _FUNC_ NULL;
NULL
""")
case class GreaterThanOrEqual(left: Expression, right: Expression)
extends BinaryComparison with NullIntolerant {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
(null, false, null) ::
(null, null, null) :: Nil)

test("IN") {
test("basic IN predicate test") {
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1),
Literal(2))), null)
checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType),
Expand Down Expand Up @@ -151,19 +151,32 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true)
checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false)

val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType,
LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType)
primitiveTypes.foreach { t =>
val dataGen = RandomDataGenerator.forType(t, nullable = true).get
}

test("IN with different types") {
def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = {
val maybeDataGen = RandomDataGenerator.forType(dataType, nullable = nullable)
// Actually we won't pass in unsupported data types, this is a safety check.
val dataGen = maybeDataGen.getOrElse(
fail(s"Failed to create data generator for type $dataType"))
val inputData = Seq.fill(10) {
val value = dataGen.apply()
value match {
def cleanData(value: Any) = value match {
case d: Double if d.isNaN => 0.0d
case f: Float if f.isNaN => 0.0f
case _ => value
}
value match {
case s: Seq[_] => s.map(cleanData(_))
case m: Map[_, _] =>
val pair = m.unzip
val newKeys = pair._1.map(cleanData(_))
val newValues = pair._2.map(cleanData(_))
newKeys.zip(newValues).toMap
case _ => cleanData(value)
}
}
val input = inputData.map(NonFoldableLiteral.create(_, t))
val input = inputData.map(NonFoldableLiteral.create(_, dataType))
val expected = if (inputData(0) == null) {
null
} else if (inputData.slice(1, 10).contains(inputData(0))) {
Expand All @@ -175,6 +188,55 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper {
}
checkEvaluation(In(input(0), input.slice(1, 10)), expected)
}

val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t =>
RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType]
} ++ Seq(DecimalType.USER_DEFAULT)

val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true))

// Basic types:
for (
dataType <- atomicTypes;
nullable <- Seq(true, false)) {
testWithRandomDataGeneration(dataType, nullable)
}

// Array types:
for (
arrayType <- atomicArrayTypes;
nullable <- Seq(true, false)
if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined) {
testWithRandomDataGeneration(arrayType, nullable)
}

// Struct types:
for (
colOneType <- atomicTypes;
colTwoType <- atomicTypes;
nullable <- Seq(true, false)) {
val structType = StructType(
StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil)
testWithRandomDataGeneration(structType, nullable)
}

// Map types: not supported
for (
keyType <- atomicTypes;
valueType <- atomicTypes;
nullable <- Seq(true, false)) {
val mapType = MapType(keyType, valueType)
val e = intercept[Exception] {
testWithRandomDataGeneration(mapType, nullable)
}
if (e.getMessage.contains("Code generation of")) {
// If the `value` expression is null, `eval` will be short-circuited.
// Codegen version evaluation will be run then.
assert(e.getMessage.contains("cannot generate equality code for un-comparable type"))
} else {
assert(e.getMessage.contains("Exception evaluating"))
}
}
}

test("INSET") {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
-- EqualTo
select 1 = 1;
select 1 = '1';
select 1.0 = '1';

-- GreaterThan
select 1 > '1';
select 2 > '1.0';
select 2 > '2.0';
select 2 > '2.2';
select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52';

-- GreaterThanOrEqual
select 1 >= '1';
select 2 >= '1.0';
select 2 >= '2.0';
select 2.0 >= '2.2';
select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52';

-- LessThan
select 1 < '1';
select 2 < '1.0';
select 2 < '2.0';
select 2.0 < '2.2';
select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52';

-- LessThanOrEqual
select 1 <= '1';
select 2 <= '1.0';
select 2 <= '2.0';
select 2.0 <= '2.2';
select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52');
select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52';
Loading

0 comments on commit 9f30d92

Please sign in to comment.