From 989584369e913824354f4b6ed77bb83f7bf6f5e9 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Mon, 7 Aug 2017 08:41:46 +0000 Subject: [PATCH 1/7] Complement SQL predicates document. --- .../sql/catalyst/expressions/predicates.scala | 123 +++++++++++++++++- 1 file changed, 117 insertions(+), 6 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 7bf10f199f1c7..0d7eeba7e3517 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -133,8 +133,25 @@ case class Not(child: Expression) /** * Evaluates to `true` if `list` contains `value`. */ +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.") + usage = "expr1 _FUNC_(expr2, expr3, ...) - Returns true if `expr` equals to any valN.", + arguments = """ + Arguments: + * expr1, expr2, expr3, ... - the arguments must be same type. + """, + examples = """ + Examples: + > SELECT 1 _FUNC_(1, 2, 3); + true + > SELECT 1 _FUNC_(2, 3, 4); + false + > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)); + false + > SELECT named_struct('a', 1, 'b', 2) _FUNC_(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); + true + """) +// scalastyle:on line.size.limit case class In(value: Expression, list: Seq[Expression]) extends Predicate { require(list != null, "list should not be null") @@ -484,7 +501,20 @@ object Equality { } @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.") + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.", + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type. + """, + examples = """ + Examples: + > SELECT 2 _FUNC_ 2; + true + > SELECT true _FUNC_ NULL; + NULL + > SELECT NULL _FUNC_ NULL; + NULL + """) case class EqualTo(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -518,6 +548,19 @@ case class EqualTo(left: Expression, right: Expression) usage = """ expr1 _FUNC_ expr2 - Returns same result as the EQUAL(=) operator for non-null operands, but returns true if both are null, false if one of the them is null. + """, + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type. + """, + examples = """ + Examples: + > SELECT 2 _FUNC_ 2; + true + > SELECT true _FUNC_ NULL; + false + > SELECT NULL _FUNC_ NULL; + true """) case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComparison { @@ -564,8 +607,25 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp } } +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.") + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.", + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + """, + examples = """ + Examples: + > SELECT 1 _FUNC_ 2; + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); + false + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); + true + > SELECT 1 _FUNC_ NULL; + NULL + """) +// scalastyle:on line.size.limit case class LessThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -576,8 +636,25 @@ case class LessThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2) } +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.") + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.", + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + """, + examples = """ + Examples: + > SELECT 2 _FUNC_ 2; + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); + true + > SELECT 1 _FUNC_ NULL; + NULL + """) +// scalastyle:on line.size.limit case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -588,8 +665,25 @@ case class LessThanOrEqual(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2) } +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.") + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + """, + examples = """ + Examples: + > SELECT 2 _FUNC_ 1; + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); + false + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); + false + > SELECT 1 _FUNC_ NULL; + NULL + """) +// scalastyle:on line.size.limit case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -600,8 +694,25 @@ case class GreaterThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2) } +// scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.") + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", + arguments = """ + Arguments: + * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + """, + examples = """ + Examples: + > SELECT 2 _FUNC_ 1; + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); + true + > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); + false + > SELECT 1 _FUNC_ NULL; + NULL + """) +// scalastyle:on line.size.limit case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { From 1369fd5d9e6e78db90fa4a1af15ceeff9be38478 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 9 Aug 2017 03:31:29 +0000 Subject: [PATCH 2/7] More clear statement on binary comparison expressions. Add some test cases. --- .../sql/catalyst/expressions/predicates.scala | 26 +- .../sql-tests/inputs/predicate-functions.sql | 42 +++ .../results/predicate-functions.sql.out | 250 ++++++++++++++++++ 3 files changed, 311 insertions(+), 7 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 0d7eeba7e3517..89c0c5ad123bb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -504,12 +504,14 @@ object Equality { usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type. """, examples = """ Examples: > SELECT 2 _FUNC_ 2; true + > SELECT 1 _FUNC_ '1'; + true > SELECT true _FUNC_ NULL; NULL > SELECT NULL _FUNC_ NULL; @@ -551,12 +553,14 @@ case class EqualTo(left: Expression, right: Expression) """, arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type. """, examples = """ Examples: > SELECT 2 _FUNC_ 2; true + > SELECT 1 _FUNC_ '1'; + true > SELECT true _FUNC_ NULL; false > SELECT NULL _FUNC_ NULL; @@ -612,12 +616,14 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. """, examples = """ Examples: > SELECT 1 _FUNC_ 2; true + > SELECT 1.1 _FUNC_ '1'; + false > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); false > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); @@ -641,12 +647,14 @@ case class LessThan(left: Expression, right: Expression) usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. """, examples = """ Examples: > SELECT 2 _FUNC_ 2; true + > SELECT 1.0 _FUNC_ '1'; + true > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); true > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); @@ -670,12 +678,14 @@ case class LessThanOrEqual(left: Expression, right: Expression) usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. """, examples = """ Examples: > SELECT 2 _FUNC_ 1; true + > SELECT 2 _FUNC_ '1.1'; + true > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); false > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); @@ -696,15 +706,17 @@ case class GreaterThan(left: Expression, right: Expression) // scalastyle:off line.size.limit @ExpressionDescription( - usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", + usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type which must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. """, examples = """ Examples: > SELECT 2 _FUNC_ 1; true + > SELECT 2.0 _FUNC_ '2.1'; + false > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-07-30 04:17:52'); true > SELECT to_date('2009-07-30 04:17:52') _FUNC_ to_date('2009-08-01 04:17:52'); diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql new file mode 100644 index 0000000000000..efaa3f3eb77c4 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql @@ -0,0 +1,42 @@ +-- In +select 1 in(1, 2, 3); +select 1 in(2, 3, 4); +select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)); +select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); + +-- EqualTo +select 1 = 1; +select 1 = '1'; +select 1.0 = '1'; + +-- GreaterThan +select 1 > '1'; +select 2 > '1.0'; +select 2 > '2.0'; +select 2 > '2.2'; +select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52'); +select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52'; + +-- GreaterThanOrEqual +select 1 >= '1'; +select 2 >= '1.0'; +select 2 >= '2.0'; +select 2.0 >= '2.2'; +select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52'); +select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52'; + +-- LessThan +select 1 < '1'; +select 2 < '1.0'; +select 2 < '2.0'; +select 2.0 < '2.2'; +select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52'); +select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52'; + +-- LessThanOrEqual +select 1 <= '1'; +select 2 <= '1.0'; +select 2 <= '2.0'; +select 2.0 <= '2.2'; +select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52'); +select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52'; diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out new file mode 100644 index 0000000000000..424cacc8b5279 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -0,0 +1,250 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 31 + + +-- !query 0 +select 1 in(1, 2, 3) +-- !query 0 schema +struct<(1 IN (1, 2, 3)):boolean> +-- !query 0 output +true + + +-- !query 1 +select 1 in(2, 3, 4) +-- !query 1 schema +struct<(1 IN (2, 3, 4)):boolean> +-- !query 1 output +false + + +-- !query 2 +select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)) +-- !query 2 schema +struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 1), named_struct(a, 1, b, 3))):boolean> +-- !query 2 output +false + + +-- !query 3 +select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)) +-- !query 3 schema +struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 2), named_struct(a, 1, b, 3))):boolean> +-- !query 3 output +true + + +-- !query 4 +select 1 = 1 +-- !query 4 schema +struct<(1 = 1):boolean> +-- !query 4 output +true + + +-- !query 5 +select 1 = '1' +-- !query 5 schema +struct<(1 = CAST(1 AS INT)):boolean> +-- !query 5 output +true + + +-- !query 6 +select 1.0 = '1' +-- !query 6 schema +struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> +-- !query 6 output +true + + +-- !query 7 +select 1 > '1' +-- !query 7 schema +struct<(1 > CAST(1 AS INT)):boolean> +-- !query 7 output +false + + +-- !query 8 +select 2 > '1.0' +-- !query 8 schema +struct<(2 > CAST(1.0 AS INT)):boolean> +-- !query 8 output +true + + +-- !query 9 +select 2 > '2.0' +-- !query 9 schema +struct<(2 > CAST(2.0 AS INT)):boolean> +-- !query 9 output +false + + +-- !query 10 +select 2 > '2.2' +-- !query 10 schema +struct<(2 > CAST(2.2 AS INT)):boolean> +-- !query 10 output +false + + +-- !query 11 +select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') +-- !query 11 schema +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> +-- !query 11 output +false + + +-- !query 12 +select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' +-- !query 12 schema +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> +-- !query 12 output +false + + +-- !query 13 +select 1 >= '1' +-- !query 13 schema +struct<(1 >= CAST(1 AS INT)):boolean> +-- !query 13 output +true + + +-- !query 14 +select 2 >= '1.0' +-- !query 14 schema +struct<(2 >= CAST(1.0 AS INT)):boolean> +-- !query 14 output +true + + +-- !query 15 +select 2 >= '2.0' +-- !query 15 schema +struct<(2 >= CAST(2.0 AS INT)):boolean> +-- !query 15 output +true + + +-- !query 16 +select 2.0 >= '2.2' +-- !query 16 schema +struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> +-- !query 16 output +false + + +-- !query 17 +select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') +-- !query 17 schema +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> +-- !query 17 output +true + + +-- !query 18 +select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' +-- !query 18 schema +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> +-- !query 18 output +false + + +-- !query 19 +select 1 < '1' +-- !query 19 schema +struct<(1 < CAST(1 AS INT)):boolean> +-- !query 19 output +false + + +-- !query 20 +select 2 < '1.0' +-- !query 20 schema +struct<(2 < CAST(1.0 AS INT)):boolean> +-- !query 20 output +false + + +-- !query 21 +select 2 < '2.0' +-- !query 21 schema +struct<(2 < CAST(2.0 AS INT)):boolean> +-- !query 21 output +false + + +-- !query 22 +select 2.0 < '2.2' +-- !query 22 schema +struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> +-- !query 22 output +true + + +-- !query 23 +select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') +-- !query 23 schema +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> +-- !query 23 output +false + + +-- !query 24 +select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' +-- !query 24 schema +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> +-- !query 24 output +true + + +-- !query 25 +select 1 <= '1' +-- !query 25 schema +struct<(1 <= CAST(1 AS INT)):boolean> +-- !query 25 output +true + + +-- !query 26 +select 2 <= '1.0' +-- !query 26 schema +struct<(2 <= CAST(1.0 AS INT)):boolean> +-- !query 26 output +false + + +-- !query 27 +select 2 <= '2.0' +-- !query 27 schema +struct<(2 <= CAST(2.0 AS INT)):boolean> +-- !query 27 output +true + + +-- !query 28 +select 2.0 <= '2.2' +-- !query 28 schema +struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> +-- !query 28 output +true + + +-- !query 29 +select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') +-- !query 29 schema +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> +-- !query 29 output +true + + +-- !query 30 +select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' +-- !query 30 schema +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> +-- !query 30 output +true From bca2b0bc71f5a7ea2d909dfad5e6e094e587d288 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 9 Aug 2017 04:36:44 +0000 Subject: [PATCH 3/7] Adjust line break. --- .../spark/sql/catalyst/expressions/predicates.scala | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 89c0c5ad123bb..eee2826e21836 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -616,7 +616,8 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be ordered/compared. """, examples = """ Examples: @@ -647,7 +648,8 @@ case class LessThan(left: Expression, right: Expression) usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be ordered/compared. """, examples = """ Examples: @@ -678,7 +680,8 @@ case class LessThanOrEqual(left: Expression, right: Expression) usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be ordered/compared. """, examples = """ Examples: @@ -709,7 +712,8 @@ case class GreaterThan(left: Expression, right: Expression) usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type, and must be a type that can be ordered/compared. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be ordered/compared. """, examples = """ Examples: From b64c9e67dfb34668ba93089d84f932778c005d7b Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Wed, 9 Aug 2017 04:45:06 +0000 Subject: [PATCH 4/7] Remove unnecessary scalastyle off comments. --- .../spark/sql/catalyst/expressions/predicates.scala | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index eee2826e21836..fbc684dc256b3 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -611,7 +611,6 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp } } -// scalastyle:off line.size.limit @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than `expr2`.", arguments = """ @@ -632,7 +631,6 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp > SELECT 1 _FUNC_ NULL; NULL """) -// scalastyle:on line.size.limit case class LessThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -643,7 +641,6 @@ case class LessThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lt(input1, input2) } -// scalastyle:off line.size.limit @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is less than or equal to `expr2`.", arguments = """ @@ -664,7 +661,6 @@ case class LessThan(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """) -// scalastyle:on line.size.limit case class LessThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -675,7 +671,6 @@ case class LessThanOrEqual(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.lteq(input1, input2) } -// scalastyle:off line.size.limit @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than `expr2`.", arguments = """ @@ -696,7 +691,6 @@ case class LessThanOrEqual(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """) -// scalastyle:on line.size.limit case class GreaterThan(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { @@ -707,7 +701,6 @@ case class GreaterThan(left: Expression, right: Expression) protected override def nullSafeEval(input1: Any, input2: Any): Any = ordering.gt(input1, input2) } -// scalastyle:off line.size.limit @ExpressionDescription( usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` is greater than or equal to `expr2`.", arguments = """ @@ -728,7 +721,6 @@ case class GreaterThan(left: Expression, right: Expression) > SELECT 1 _FUNC_ NULL; NULL """) -// scalastyle:on line.size.limit case class GreaterThanOrEqual(left: Expression, right: Expression) extends BinaryComparison with NullIntolerant { From 099c6715377ba67274516417cee30b60f94f587a Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Fri, 1 Sep 2017 08:25:06 +0000 Subject: [PATCH 5/7] Note that only accept the type can be used for equality comparison in Equal/EqualNullSafe.. --- .../apache/spark/sql/catalyst/expressions/predicates.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index b324b2b5f63ca..6b99f0c207018 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -511,7 +511,8 @@ object Equality { usage = "expr1 _FUNC_ expr2 - Returns true if `expr1` equals `expr2`, or false otherwise.", arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be used in equality comparison. """, examples = """ Examples: @@ -545,7 +546,8 @@ case class EqualTo(left: Expression, right: Expression) """, arguments = """ Arguments: - * expr1, expr2 - the two expressions must be same type or can be casted to a common type. + * expr1, expr2 - the two expressions must be same type or can be casted to a common type, + and must be a type that can be used in equality comparison. """, examples = """ Examples: From 444c64d3137126c1b6da84b33825961d3d691821 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Sep 2017 02:40:32 +0000 Subject: [PATCH 6/7] Improve test case for `In`. --- .../sql/catalyst/expressions/predicates.scala | 22 ++- .../catalyst/expressions/PredicateSuite.scala | 78 +++++--- .../sql-tests/inputs/predicate-functions.sql | 6 - .../results/predicate-functions.sql.out | 176 +++++++----------- 4 files changed, 144 insertions(+), 138 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 6b99f0c207018..efcd45fad779c 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -512,7 +512,8 @@ object Equality { arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be used in equality comparison. + and must be a type that can be used in equality comparison. Map type is not supported. + For complex types such array/struct, the data types of fields must be orderable. """, examples = """ Examples: @@ -547,7 +548,8 @@ case class EqualTo(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be used in equality comparison. + and must be a type that can be used in equality comparison. Map type is not supported. + For complex types such array/struct, the data types of fields must be orderable. """, examples = """ Examples: @@ -593,7 +595,9 @@ case class EqualNullSafe(left: Expression, right: Expression) extends BinaryComp arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -621,7 +625,9 @@ case class LessThan(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -649,7 +655,9 @@ case class LessThanOrEqual(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: @@ -677,7 +685,9 @@ case class GreaterThan(left: Expression, right: Expression) arguments = """ Arguments: * expr1, expr2 - the two expressions must be same type or can be casted to a common type, - and must be a type that can be ordered/compared. + and must be a type that can be ordered. For example, map type is not orderable, so it + is not supported. For complex types such array/struct, the data types of fields must + be orderable. """, examples = """ Examples: diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index 055c31c2b3018..999261d87630b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -123,7 +123,7 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { (null, false, null) :: (null, null, null) :: Nil) - test("IN") { + test("basic IN predicate test") { checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), Seq(Literal(1), Literal(2))), null) checkEvaluation(In(NonFoldableLiteral.create(null, IntegerType), @@ -151,29 +151,63 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^Ba*n"))), true) checkEvaluation(In(Literal("^Ba*n"), Seq(Literal("aa"), Literal("^n"))), false) - val primitiveTypes = Seq(IntegerType, FloatType, DoubleType, StringType, ByteType, ShortType, - LongType, BinaryType, BooleanType, DecimalType.USER_DEFAULT, TimestampType) - primitiveTypes.foreach { t => - val dataGen = RandomDataGenerator.forType(t, nullable = true).get - val inputData = Seq.fill(10) { - val value = dataGen.apply() - value match { - case d: Double if d.isNaN => 0.0d - case f: Float if f.isNaN => 0.0f - case _ => value + } + + test("IN with different types") { + def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = { + val dataGen = RandomDataGenerator.forType(dataType, nullable = nullable) + if (dataGen.isDefined) { + val inputData = Seq.fill(10) { + val value = dataGen.get.apply() + value match { + case d: Double if d.isNaN => 0.0d + case f: Float if f.isNaN => 0.0f + case _ => value + } } + val input = inputData.map(NonFoldableLiteral.create(_, dataType)) + val expected = if (inputData(0) == null) { + null + } else if (inputData.slice(1, 10).contains(inputData(0))) { + true + } else if (inputData.slice(1, 10).contains(null)) { + null + } else { + false + } + checkEvaluation(In(input(0), input.slice(1, 10)), expected) } - val input = inputData.map(NonFoldableLiteral.create(_, t)) - val expected = if (inputData(0) == null) { - null - } else if (inputData.slice(1, 10).contains(inputData(0))) { - true - } else if (inputData.slice(1, 10).contains(null)) { - null - } else { - false - } - checkEvaluation(In(input(0), input.slice(1, 10)), expected) + } + + val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t => + RandomDataGenerator.forType(t).isDefined && !t.isInstanceOf[DecimalType] + } ++ Seq(DecimalType.USER_DEFAULT) + + val atomicArrayTypes = atomicTypes.map(ArrayType(_, containsNull = true)) + + // Basic types: + for ( + dataType <- atomicTypes; + nullable <- Seq(true, false)) { + testWithRandomDataGeneration(dataType, nullable) + } + + // Array types: + for ( + arrayType <- atomicArrayTypes; + nullable <- Seq(true, false) + if RandomDataGenerator.forType(arrayType.elementType, arrayType.containsNull).isDefined) { + testWithRandomDataGeneration(arrayType, nullable) + } + + // Struct types: + for ( + colOneType <- atomicTypes; + colTwoType <- atomicTypes; + nullable <- Seq(true, false)) { + val structType = StructType( + StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil) + testWithRandomDataGeneration(structType, nullable) } } diff --git a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql index efaa3f3eb77c4..3b3d4ad64b3ec 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/predicate-functions.sql @@ -1,9 +1,3 @@ --- In -select 1 in(1, 2, 3); -select 1 in(2, 3, 4); -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)); -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)); - -- EqualTo select 1 = 1; select 1 = '1'; diff --git a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out index 424cacc8b5279..8e7e04c8e1c4f 100644 --- a/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/predicate-functions.sql.out @@ -1,250 +1,218 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 31 +-- Number of queries: 27 -- !query 0 -select 1 in(1, 2, 3) +select 1 = 1 -- !query 0 schema -struct<(1 IN (1, 2, 3)):boolean> +struct<(1 = 1):boolean> -- !query 0 output true -- !query 1 -select 1 in(2, 3, 4) +select 1 = '1' -- !query 1 schema -struct<(1 IN (2, 3, 4)):boolean> +struct<(1 = CAST(1 AS INT)):boolean> -- !query 1 output -false +true -- !query 2 -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 1), named_struct('a', 1, 'b', 3)) +select 1.0 = '1' -- !query 2 schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 1), named_struct(a, 1, b, 3))):boolean> +struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> -- !query 2 output -false +true -- !query 3 -select named_struct('a', 1, 'b', 2) in(named_struct('a', 1, 'b', 2), named_struct('a', 1, 'b', 3)) +select 1 > '1' -- !query 3 schema -struct<(named_struct(a, 1, b, 2) IN (named_struct(a, 1, b, 2), named_struct(a, 1, b, 3))):boolean> +struct<(1 > CAST(1 AS INT)):boolean> -- !query 3 output -true +false -- !query 4 -select 1 = 1 +select 2 > '1.0' -- !query 4 schema -struct<(1 = 1):boolean> +struct<(2 > CAST(1.0 AS INT)):boolean> -- !query 4 output true -- !query 5 -select 1 = '1' +select 2 > '2.0' -- !query 5 schema -struct<(1 = CAST(1 AS INT)):boolean> +struct<(2 > CAST(2.0 AS INT)):boolean> -- !query 5 output -true +false -- !query 6 -select 1.0 = '1' +select 2 > '2.2' -- !query 6 schema -struct<(1.0 = CAST(1 AS DECIMAL(2,1))):boolean> +struct<(2 > CAST(2.2 AS INT)):boolean> -- !query 6 output -true +false -- !query 7 -select 1 > '1' +select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') -- !query 7 schema -struct<(1 > CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> -- !query 7 output false -- !query 8 -select 2 > '1.0' +select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' -- !query 8 schema -struct<(2 > CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> -- !query 8 output -true +false -- !query 9 -select 2 > '2.0' +select 1 >= '1' -- !query 9 schema -struct<(2 > CAST(2.0 AS INT)):boolean> +struct<(1 >= CAST(1 AS INT)):boolean> -- !query 9 output -false +true -- !query 10 -select 2 > '2.2' +select 2 >= '1.0' -- !query 10 schema -struct<(2 > CAST(2.2 AS INT)):boolean> +struct<(2 >= CAST(1.0 AS INT)):boolean> -- !query 10 output -false +true -- !query 11 -select to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52') +select 2 >= '2.0' -- !query 11 schema -struct<(to_date('2009-07-30 04:17:52') > to_date('2009-07-30 04:17:52')):boolean> +struct<(2 >= CAST(2.0 AS INT)):boolean> -- !query 11 output -false +true -- !query 12 -select to_date('2009-07-30 04:17:52') > '2009-07-30 04:17:52' +select 2.0 >= '2.2' -- !query 12 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) > 2009-07-30 04:17:52):boolean> +struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 12 output false -- !query 13 -select 1 >= '1' +select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') -- !query 13 schema -struct<(1 >= CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> -- !query 13 output true -- !query 14 -select 2 >= '1.0' +select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' -- !query 14 schema -struct<(2 >= CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> -- !query 14 output -true +false -- !query 15 -select 2 >= '2.0' +select 1 < '1' -- !query 15 schema -struct<(2 >= CAST(2.0 AS INT)):boolean> +struct<(1 < CAST(1 AS INT)):boolean> -- !query 15 output -true +false -- !query 16 -select 2.0 >= '2.2' +select 2 < '1.0' -- !query 16 schema -struct<(2.0 >= CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 < CAST(1.0 AS INT)):boolean> -- !query 16 output false -- !query 17 -select to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52') +select 2 < '2.0' -- !query 17 schema -struct<(to_date('2009-07-30 04:17:52') >= to_date('2009-07-30 04:17:52')):boolean> +struct<(2 < CAST(2.0 AS INT)):boolean> -- !query 17 output -true +false -- !query 18 -select to_date('2009-07-30 04:17:52') >= '2009-07-30 04:17:52' +select 2.0 < '2.2' -- !query 18 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) >= 2009-07-30 04:17:52):boolean> +struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 18 output -false +true -- !query 19 -select 1 < '1' +select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') -- !query 19 schema -struct<(1 < CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> -- !query 19 output false -- !query 20 -select 2 < '1.0' +select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' -- !query 20 schema -struct<(2 < CAST(1.0 AS INT)):boolean> +struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> -- !query 20 output -false +true -- !query 21 -select 2 < '2.0' +select 1 <= '1' -- !query 21 schema -struct<(2 < CAST(2.0 AS INT)):boolean> +struct<(1 <= CAST(1 AS INT)):boolean> -- !query 21 output -false +true -- !query 22 -select 2.0 < '2.2' +select 2 <= '1.0' -- !query 22 schema -struct<(2.0 < CAST(2.2 AS DECIMAL(2,1))):boolean> +struct<(2 <= CAST(1.0 AS INT)):boolean> -- !query 22 output -true +false -- !query 23 -select to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52') +select 2 <= '2.0' -- !query 23 schema -struct<(to_date('2009-07-30 04:17:52') < to_date('2009-07-30 04:17:52')):boolean> +struct<(2 <= CAST(2.0 AS INT)):boolean> -- !query 23 output -false +true -- !query 24 -select to_date('2009-07-30 04:17:52') < '2009-07-30 04:17:52' +select 2.0 <= '2.2' -- !query 24 schema -struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) < 2009-07-30 04:17:52):boolean> +struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> -- !query 24 output true -- !query 25 -select 1 <= '1' +select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') -- !query 25 schema -struct<(1 <= CAST(1 AS INT)):boolean> +struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> -- !query 25 output true -- !query 26 -select 2 <= '1.0' --- !query 26 schema -struct<(2 <= CAST(1.0 AS INT)):boolean> --- !query 26 output -false - - --- !query 27 -select 2 <= '2.0' --- !query 27 schema -struct<(2 <= CAST(2.0 AS INT)):boolean> --- !query 27 output -true - - --- !query 28 -select 2.0 <= '2.2' --- !query 28 schema -struct<(2.0 <= CAST(2.2 AS DECIMAL(2,1))):boolean> --- !query 28 output -true - - --- !query 29 -select to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52') --- !query 29 schema -struct<(to_date('2009-07-30 04:17:52') <= to_date('2009-07-30 04:17:52')):boolean> --- !query 29 output -true - - --- !query 30 select to_date('2009-07-30 04:17:52') <= '2009-07-30 04:17:52' --- !query 30 schema +-- !query 26 schema struct<(CAST(to_date('2009-07-30 04:17:52') AS STRING) <= 2009-07-30 04:17:52):boolean> --- !query 30 output +-- !query 26 output true From ec9199abf23fd254e76467ed6e6b252129d808e5 Mon Sep 17 00:00:00 2001 From: Liang-Chi Hsieh Date: Sun, 3 Sep 2017 10:02:54 +0000 Subject: [PATCH 7/7] Add negative test for map type. --- .../catalyst/expressions/PredicateSuite.scala | 66 +++++++++++++------ 1 file changed, 47 insertions(+), 19 deletions(-) diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index 999261d87630b..1438a88c19e0b 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -155,28 +155,38 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { test("IN with different types") { def testWithRandomDataGeneration(dataType: DataType, nullable: Boolean): Unit = { - val dataGen = RandomDataGenerator.forType(dataType, nullable = nullable) - if (dataGen.isDefined) { - val inputData = Seq.fill(10) { - val value = dataGen.get.apply() - value match { - case d: Double if d.isNaN => 0.0d - case f: Float if f.isNaN => 0.0f - case _ => value - } + val maybeDataGen = RandomDataGenerator.forType(dataType, nullable = nullable) + // Actually we won't pass in unsupported data types, this is a safety check. + val dataGen = maybeDataGen.getOrElse( + fail(s"Failed to create data generator for type $dataType")) + val inputData = Seq.fill(10) { + val value = dataGen.apply() + def cleanData(value: Any) = value match { + case d: Double if d.isNaN => 0.0d + case f: Float if f.isNaN => 0.0f + case _ => value } - val input = inputData.map(NonFoldableLiteral.create(_, dataType)) - val expected = if (inputData(0) == null) { - null - } else if (inputData.slice(1, 10).contains(inputData(0))) { - true - } else if (inputData.slice(1, 10).contains(null)) { - null - } else { - false + value match { + case s: Seq[_] => s.map(cleanData(_)) + case m: Map[_, _] => + val pair = m.unzip + val newKeys = pair._1.map(cleanData(_)) + val newValues = pair._2.map(cleanData(_)) + newKeys.zip(newValues).toMap + case _ => cleanData(value) } - checkEvaluation(In(input(0), input.slice(1, 10)), expected) } + val input = inputData.map(NonFoldableLiteral.create(_, dataType)) + val expected = if (inputData(0) == null) { + null + } else if (inputData.slice(1, 10).contains(inputData(0))) { + true + } else if (inputData.slice(1, 10).contains(null)) { + null + } else { + false + } + checkEvaluation(In(input(0), input.slice(1, 10)), expected) } val atomicTypes = DataTypeTestUtils.atomicTypes.filter { t => @@ -209,6 +219,24 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { StructField("a", colOneType) :: StructField("b", colTwoType) :: Nil) testWithRandomDataGeneration(structType, nullable) } + + // Map types: not supported + for ( + keyType <- atomicTypes; + valueType <- atomicTypes; + nullable <- Seq(true, false)) { + val mapType = MapType(keyType, valueType) + val e = intercept[Exception] { + testWithRandomDataGeneration(mapType, nullable) + } + if (e.getMessage.contains("Code generation of")) { + // If the `value` expression is null, `eval` will be short-circuited. + // Codegen version evaluation will be run then. + assert(e.getMessage.contains("cannot generate equality code for un-comparable type")) + } else { + assert(e.getMessage.contains("Exception evaluating")) + } + } } test("INSET") {