From 60f2829fc418a5aa7d265ecc0dca38d1f41e1ba6 Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 3 Nov 2017 15:17:42 +0100 Subject: [PATCH 1/2] [SPARK-22418][SQL][TEST] Add test cases for NULL Handling --- .../apache/spark/sql/NullHandlingSuite.scala | 130 ++++++++++++++++++ 1 file changed, 130 insertions(+) create mode 100644 sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala diff --git a/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala new file mode 100644 index 0000000000000..7d8e735c5b684 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala @@ -0,0 +1,130 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.sql + +import org.apache.spark.sql.functions._ +import org.apache.spark.sql.test.SharedSQLContext + +case class T1(a: Int, b: Option[Int], c: Option[Int]) + +/** + * This test suite takes https://sqlite.org/nulls.html as a reference. + */ +class NullHandlingSuite extends QueryTest with SharedSQLContext { + import testImplicits._ + + lazy val t1: DataFrame = Seq(T1(1, Some(0), Some(0)), T1(2, Some(0), Some(1)), + T1(3, Some(1), Some(0)), T1(4, Some(1), Some(1)), T1(5, None, Some(0)), + T1(6, None, Some(1)), T1(7, None, None)).toDF() + lazy val zeros = Seq(0, 0.0f, 0.0d, BigDecimal(0)) + + test("Adding anything to null gives null") { + val actualResult = t1.select($"b" + $"c") + val expectedResult = Seq(Row(0), Row(1), Row(1), Row(2), Row(null), Row(null), Row(null)) + checkAnswer(actualResult, expectedResult) + } + + test("Multiplying null by zero gives null") { + zeros.foreach { zero => + val actualResult = t1.select($"b" * zero) + val expectedResult = Seq(Row(zero), Row(zero), Row(zero), Row(zero), + Row(null), Row(null), Row(null)) + checkAnswer(actualResult, expectedResult) + } + } + + test("nulls are NOT distinct in SELECT DISTINCT") { + val actualResult = t1.select($"b").distinct() + val expectedResult = Seq(Row(0), Row(1), Row(null)) + checkAnswer(actualResult, expectedResult) + } + + test("nulls are NOT distinct in UNION") { + val actualResult = t1.select($"b").union(t1.select($"b")).distinct() + val expectedResult = Seq(Row(0), Row(1), Row(null)) + checkAnswer(actualResult, expectedResult) + } + + test("CASE WHEN null THEN 1 ELSE 0 END is 0") { + zeros.foreach { zero => + // case when b<>0 then 1 else 0 end + val actualResult = t1.select(when($"b" =!= zero, lit(1)).otherwise(lit(0))) + val expectedResult = Seq(Row(0), Row(0), Row(1), Row(1), Row(0), Row(0), Row(0)) + checkAnswer(actualResult, expectedResult) + + // case when not b<>0 then 1 else 0 end + val actualResult1 = t1.select(when(not($"b" =!= zero), lit(1)).otherwise(lit(0))) + val expectedResult1 = Seq(Row(1), Row(1), Row(0), Row(0), Row(0), Row(0), Row(0)) + checkAnswer(actualResult1, expectedResult1) + + // case when b<>0 and c<>0 then 1 else 0 end + val actualResult2 = t1.select(when($"b" =!= zero and $"c" =!= zero, lit(1)).otherwise(lit(0))) + val expectedResult2 = Seq(Row(0), Row(0), Row(0), Row(1), Row(0), Row(0), Row(0)) + checkAnswer(actualResult2, expectedResult2) + + // case when not (b<>0 and c<>0) then 1 else 0 end + val actualResult3 = t1.select(when(not($"b" =!= zero and $"c" =!= zero), lit(1)) + .otherwise(lit(0))) + val expectedResult3 = Seq(Row(1), Row(1), Row(1), Row(0), Row(1), Row(0), Row(0)) + checkAnswer(actualResult3, expectedResult3) + + // case when b<>0 or c<>0 then 1 else 0 end + val actualResult4 = t1.select(when($"b" =!= zero or $"c" =!= zero, lit(1)).otherwise(lit(0))) + val expectedResult4 = Seq(Row(0), Row(1), Row(1), Row(1), Row(0), Row(1), Row(0)) + checkAnswer(actualResult4, expectedResult4) + + // case when not (b<>0 or c<>0) then 1 else 0 end + val actualResult5 = t1.select(when(not($"b" =!= zero or $"c" =!= zero), lit(1)) + .otherwise(lit(0))) + val expectedResult5 = Seq(Row(1), Row(0), Row(0), Row(0), Row(0), Row(0), Row(0)) + checkAnswer(actualResult5, expectedResult5) + } + } + + test("null with aggregate operators") { + val actualResult = t1.select(count($"*"), count($"b"), sum($"b"), avg($"b"), + min($"b"), max($"b")) + val expectedResult = Seq(Row(7, 4, 2, 0.5, 0, 1)) + checkAnswer(actualResult, expectedResult) + } + + test("Check the behavior of NULLs in WHERE clauses") { + val actualResult = t1.where($"b" < 10).select($"a") + val expectedResult = Seq(Row(1), Row(2), Row(3), Row(4)) + checkAnswer(actualResult, expectedResult) + + val actualResult1 = t1.where(not($"b" < 10)).select($"a") + val expectedResult1 = Seq() + checkAnswer(actualResult1, expectedResult1) + + val actualResult2 = t1.where($"b" < 10 or $"c" === 1).select($"a") + val expectedResult2 = Seq(Row(1), Row(2), Row(3), Row(4), Row(6)) + checkAnswer(actualResult2, expectedResult2) + + val actualResult3 = t1.where(not($"b" < 10 or $"c" === 1)).select($"a") + val expectedResult3 = Seq() + checkAnswer(actualResult3, expectedResult3) + + val actualResult4 = t1.where($"b" < 10 and $"c" === 1).select($"a") + val expectedResult4 = Seq(Row(2), Row(4)) + checkAnswer(actualResult4, expectedResult4) + + val actualResult5 = t1.where(not($"b" < 10 and $"c" === 1)).select($"a") + val expectedResult5 = Seq(Row(1), Row(3), Row(5)) + checkAnswer(actualResult5, expectedResult5) + } +} From 92308a4341849258caf549d1bcbeabd9002d3ead Mon Sep 17 00:00:00 2001 From: Marco Gaido Date: Fri, 3 Nov 2017 21:43:56 +0100 Subject: [PATCH 2/2] moving null handling tests to sql --- .../sql-tests/inputs/null-handling.sql | 48 +++ .../sql-tests/results/null-handling.sql.out | 305 ++++++++++++++++++ .../apache/spark/sql/NullHandlingSuite.scala | 130 -------- 3 files changed, 353 insertions(+), 130 deletions(-) create mode 100644 sql/core/src/test/resources/sql-tests/inputs/null-handling.sql create mode 100644 sql/core/src/test/resources/sql-tests/results/null-handling.sql.out delete mode 100644 sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala diff --git a/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql new file mode 100644 index 0000000000000..b90b0a6ac7500 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/null-handling.sql @@ -0,0 +1,48 @@ +-- Create a test table with data +create table t1(a int, b int, c int) using parquet; +insert into t1 values(1,0,0); +insert into t1 values(2,0,1); +insert into t1 values(3,1,0); +insert into t1 values(4,1,1); +insert into t1 values(5,null,0); +insert into t1 values(6,null,1); +insert into t1 values(7,null,null); + +-- Adding anything to null gives null +select a, b+c from t1; + +-- Multiplying null by zero gives null +select a+10, b*0 from t1; + +-- nulls are NOT distinct in SELECT DISTINCT +select distinct b from t1; + +-- nulls are NOT distinct in UNION +select b from t1 union select b from t1; + +-- CASE WHEN null THEN 1 ELSE 0 END is 0 +select a+20, case b when c then 1 else 0 end from t1; +select a+30, case c when b then 1 else 0 end from t1; +select a+40, case when b<>0 then 1 else 0 end from t1; +select a+50, case when not b<>0 then 1 else 0 end from t1; +select a+60, case when b<>0 and c<>0 then 1 else 0 end from t1; + +-- "not (null AND false)" is true +select a+70, case when not (b<>0 and c<>0) then 1 else 0 end from t1; + +-- "null OR true" is true +select a+80, case when b<>0 or c<>0 then 1 else 0 end from t1; +select a+90, case when not (b<>0 or c<>0) then 1 else 0 end from t1; + +-- null with aggregate operators +select count(*), count(b), sum(b), avg(b), min(b), max(b) from t1; + +-- Check the behavior of NULLs in WHERE clauses +select a+100 from t1 where b<10; +select a+110 from t1 where not b>10; +select a+120 from t1 where b<10 OR c=1; +select a+130 from t1 where b<10 AND c=1; +select a+140 from t1 where not (b<10 AND c=1); +select a+150 from t1 where not (c=1 AND b<10); + +drop table t1; diff --git a/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out new file mode 100644 index 0000000000000..5005dfeb6cd14 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/null-handling.sql.out @@ -0,0 +1,305 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 28 + + +-- !query 0 +create table t1(a int, b int, c int) using parquet +-- !query 0 schema +struct<> +-- !query 0 output + + + +-- !query 1 +insert into t1 values(1,0,0) +-- !query 1 schema +struct<> +-- !query 1 output + + + +-- !query 2 +insert into t1 values(2,0,1) +-- !query 2 schema +struct<> +-- !query 2 output + + + +-- !query 3 +insert into t1 values(3,1,0) +-- !query 3 schema +struct<> +-- !query 3 output + + + +-- !query 4 +insert into t1 values(4,1,1) +-- !query 4 schema +struct<> +-- !query 4 output + + + +-- !query 5 +insert into t1 values(5,null,0) +-- !query 5 schema +struct<> +-- !query 5 output + + + +-- !query 6 +insert into t1 values(6,null,1) +-- !query 6 schema +struct<> +-- !query 6 output + + + +-- !query 7 +insert into t1 values(7,null,null) +-- !query 7 schema +struct<> +-- !query 7 output + + + +-- !query 8 +select a, b+c from t1 +-- !query 8 schema +struct +-- !query 8 output +1 0 +2 1 +3 1 +4 2 +5 NULL +6 NULL +7 NULL + + +-- !query 9 +select a+10, b*0 from t1 +-- !query 9 schema +struct<(a + 10):int,(b * 0):int> +-- !query 9 output +11 0 +12 0 +13 0 +14 0 +15 NULL +16 NULL +17 NULL + + +-- !query 10 +select distinct b from t1 +-- !query 10 schema +struct +-- !query 10 output +0 +1 +NULL + + +-- !query 11 +select b from t1 union select b from t1 +-- !query 11 schema +struct +-- !query 11 output +0 +1 +NULL + + +-- !query 12 +select a+20, case b when c then 1 else 0 end from t1 +-- !query 12 schema +struct<(a + 20):int,CASE WHEN (b = c) THEN 1 ELSE 0 END:int> +-- !query 12 output +21 1 +22 0 +23 0 +24 1 +25 0 +26 0 +27 0 + + +-- !query 13 +select a+30, case c when b then 1 else 0 end from t1 +-- !query 13 schema +struct<(a + 30):int,CASE WHEN (c = b) THEN 1 ELSE 0 END:int> +-- !query 13 output +31 1 +32 0 +33 0 +34 1 +35 0 +36 0 +37 0 + + +-- !query 14 +select a+40, case when b<>0 then 1 else 0 end from t1 +-- !query 14 schema +struct<(a + 40):int,CASE WHEN (NOT (b = 0)) THEN 1 ELSE 0 END:int> +-- !query 14 output +41 0 +42 0 +43 1 +44 1 +45 0 +46 0 +47 0 + + +-- !query 15 +select a+50, case when not b<>0 then 1 else 0 end from t1 +-- !query 15 schema +struct<(a + 50):int,CASE WHEN (NOT (NOT (b = 0))) THEN 1 ELSE 0 END:int> +-- !query 15 output +51 1 +52 1 +53 0 +54 0 +55 0 +56 0 +57 0 + + +-- !query 16 +select a+60, case when b<>0 and c<>0 then 1 else 0 end from t1 +-- !query 16 schema +struct<(a + 60):int,CASE WHEN ((NOT (b = 0)) AND (NOT (c = 0))) THEN 1 ELSE 0 END:int> +-- !query 16 output +61 0 +62 0 +63 0 +64 1 +65 0 +66 0 +67 0 + + +-- !query 17 +select a+70, case when not (b<>0 and c<>0) then 1 else 0 end from t1 +-- !query 17 schema +struct<(a + 70):int,CASE WHEN (NOT ((NOT (b = 0)) AND (NOT (c = 0)))) THEN 1 ELSE 0 END:int> +-- !query 17 output +71 1 +72 1 +73 1 +74 0 +75 1 +76 0 +77 0 + + +-- !query 18 +select a+80, case when b<>0 or c<>0 then 1 else 0 end from t1 +-- !query 18 schema +struct<(a + 80):int,CASE WHEN ((NOT (b = 0)) OR (NOT (c = 0))) THEN 1 ELSE 0 END:int> +-- !query 18 output +81 0 +82 1 +83 1 +84 1 +85 0 +86 1 +87 0 + + +-- !query 19 +select a+90, case when not (b<>0 or c<>0) then 1 else 0 end from t1 +-- !query 19 schema +struct<(a + 90):int,CASE WHEN (NOT ((NOT (b = 0)) OR (NOT (c = 0)))) THEN 1 ELSE 0 END:int> +-- !query 19 output +91 1 +92 0 +93 0 +94 0 +95 0 +96 0 +97 0 + + +-- !query 20 +select count(*), count(b), sum(b), avg(b), min(b), max(b) from t1 +-- !query 20 schema +struct +-- !query 20 output +7 4 2 0.5 0 1 + + +-- !query 21 +select a+100 from t1 where b<10 +-- !query 21 schema +struct<(a + 100):int> +-- !query 21 output +101 +102 +103 +104 + + +-- !query 22 +select a+110 from t1 where not b>10 +-- !query 22 schema +struct<(a + 110):int> +-- !query 22 output +111 +112 +113 +114 + + +-- !query 23 +select a+120 from t1 where b<10 OR c=1 +-- !query 23 schema +struct<(a + 120):int> +-- !query 23 output +121 +122 +123 +124 +126 + + +-- !query 24 +select a+130 from t1 where b<10 AND c=1 +-- !query 24 schema +struct<(a + 130):int> +-- !query 24 output +132 +134 + + +-- !query 25 +select a+140 from t1 where not (b<10 AND c=1) +-- !query 25 schema +struct<(a + 140):int> +-- !query 25 output +141 +143 +145 + + +-- !query 26 +select a+150 from t1 where not (c=1 AND b<10) +-- !query 26 schema +struct<(a + 150):int> +-- !query 26 output +151 +153 +155 + + +-- !query 27 +drop table t1 +-- !query 27 schema +struct<> +-- !query 27 output + diff --git a/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala deleted file mode 100644 index 7d8e735c5b684..0000000000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/NullHandlingSuite.scala +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.sql - -import org.apache.spark.sql.functions._ -import org.apache.spark.sql.test.SharedSQLContext - -case class T1(a: Int, b: Option[Int], c: Option[Int]) - -/** - * This test suite takes https://sqlite.org/nulls.html as a reference. - */ -class NullHandlingSuite extends QueryTest with SharedSQLContext { - import testImplicits._ - - lazy val t1: DataFrame = Seq(T1(1, Some(0), Some(0)), T1(2, Some(0), Some(1)), - T1(3, Some(1), Some(0)), T1(4, Some(1), Some(1)), T1(5, None, Some(0)), - T1(6, None, Some(1)), T1(7, None, None)).toDF() - lazy val zeros = Seq(0, 0.0f, 0.0d, BigDecimal(0)) - - test("Adding anything to null gives null") { - val actualResult = t1.select($"b" + $"c") - val expectedResult = Seq(Row(0), Row(1), Row(1), Row(2), Row(null), Row(null), Row(null)) - checkAnswer(actualResult, expectedResult) - } - - test("Multiplying null by zero gives null") { - zeros.foreach { zero => - val actualResult = t1.select($"b" * zero) - val expectedResult = Seq(Row(zero), Row(zero), Row(zero), Row(zero), - Row(null), Row(null), Row(null)) - checkAnswer(actualResult, expectedResult) - } - } - - test("nulls are NOT distinct in SELECT DISTINCT") { - val actualResult = t1.select($"b").distinct() - val expectedResult = Seq(Row(0), Row(1), Row(null)) - checkAnswer(actualResult, expectedResult) - } - - test("nulls are NOT distinct in UNION") { - val actualResult = t1.select($"b").union(t1.select($"b")).distinct() - val expectedResult = Seq(Row(0), Row(1), Row(null)) - checkAnswer(actualResult, expectedResult) - } - - test("CASE WHEN null THEN 1 ELSE 0 END is 0") { - zeros.foreach { zero => - // case when b<>0 then 1 else 0 end - val actualResult = t1.select(when($"b" =!= zero, lit(1)).otherwise(lit(0))) - val expectedResult = Seq(Row(0), Row(0), Row(1), Row(1), Row(0), Row(0), Row(0)) - checkAnswer(actualResult, expectedResult) - - // case when not b<>0 then 1 else 0 end - val actualResult1 = t1.select(when(not($"b" =!= zero), lit(1)).otherwise(lit(0))) - val expectedResult1 = Seq(Row(1), Row(1), Row(0), Row(0), Row(0), Row(0), Row(0)) - checkAnswer(actualResult1, expectedResult1) - - // case when b<>0 and c<>0 then 1 else 0 end - val actualResult2 = t1.select(when($"b" =!= zero and $"c" =!= zero, lit(1)).otherwise(lit(0))) - val expectedResult2 = Seq(Row(0), Row(0), Row(0), Row(1), Row(0), Row(0), Row(0)) - checkAnswer(actualResult2, expectedResult2) - - // case when not (b<>0 and c<>0) then 1 else 0 end - val actualResult3 = t1.select(when(not($"b" =!= zero and $"c" =!= zero), lit(1)) - .otherwise(lit(0))) - val expectedResult3 = Seq(Row(1), Row(1), Row(1), Row(0), Row(1), Row(0), Row(0)) - checkAnswer(actualResult3, expectedResult3) - - // case when b<>0 or c<>0 then 1 else 0 end - val actualResult4 = t1.select(when($"b" =!= zero or $"c" =!= zero, lit(1)).otherwise(lit(0))) - val expectedResult4 = Seq(Row(0), Row(1), Row(1), Row(1), Row(0), Row(1), Row(0)) - checkAnswer(actualResult4, expectedResult4) - - // case when not (b<>0 or c<>0) then 1 else 0 end - val actualResult5 = t1.select(when(not($"b" =!= zero or $"c" =!= zero), lit(1)) - .otherwise(lit(0))) - val expectedResult5 = Seq(Row(1), Row(0), Row(0), Row(0), Row(0), Row(0), Row(0)) - checkAnswer(actualResult5, expectedResult5) - } - } - - test("null with aggregate operators") { - val actualResult = t1.select(count($"*"), count($"b"), sum($"b"), avg($"b"), - min($"b"), max($"b")) - val expectedResult = Seq(Row(7, 4, 2, 0.5, 0, 1)) - checkAnswer(actualResult, expectedResult) - } - - test("Check the behavior of NULLs in WHERE clauses") { - val actualResult = t1.where($"b" < 10).select($"a") - val expectedResult = Seq(Row(1), Row(2), Row(3), Row(4)) - checkAnswer(actualResult, expectedResult) - - val actualResult1 = t1.where(not($"b" < 10)).select($"a") - val expectedResult1 = Seq() - checkAnswer(actualResult1, expectedResult1) - - val actualResult2 = t1.where($"b" < 10 or $"c" === 1).select($"a") - val expectedResult2 = Seq(Row(1), Row(2), Row(3), Row(4), Row(6)) - checkAnswer(actualResult2, expectedResult2) - - val actualResult3 = t1.where(not($"b" < 10 or $"c" === 1)).select($"a") - val expectedResult3 = Seq() - checkAnswer(actualResult3, expectedResult3) - - val actualResult4 = t1.where($"b" < 10 and $"c" === 1).select($"a") - val expectedResult4 = Seq(Row(2), Row(4)) - checkAnswer(actualResult4, expectedResult4) - - val actualResult5 = t1.where(not($"b" < 10 and $"c" === 1)).select($"a") - val expectedResult5 = Seq(Row(1), Row(3), Row(5)) - checkAnswer(actualResult5, expectedResult5) - } -}