[SPARK-16866][SQL] Basic infrastructure for file-based SQL end-to-end…

… tests
apache · Aug 3, 2016 · ba9b678 · ba9b678
1 parent b73a570
commit ba9b678
Show file tree

Hide file tree

Showing 5 changed files with 252 additions and 36 deletions.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql b/sql/core/src/test/resources/sql-tests/inputs/blacklist.sql
@@ -0,0 +1,4 @@
+-- This is a query file that has been blacklisted.
+-- It includes a query that should crash Spark.
+-- If the test case is run, the whole suite would fail.
+some random not working query that should crash Spark.
diff --git a/sql/core/src/test/resources/sql-tests/inputs/number-format.sql b/sql/core/src/test/resources/sql-tests/inputs/number-format.sql
@@ -0,0 +1,13 @@
+-- Verifies how we parse numbers
+
+-- parse as ints
+select 1, -1;
+
+-- parse as longs
+select 2147483648, -2147483649;
+
+-- parse as decimals
+select 9223372036854775808, -9223372036854775809;
+
+-- various floating point formats
+select 0.3, -0.8, .5, -.18;
diff --git a/sql/core/src/test/resources/sql-tests/results/number-format.sql.xml b/sql/core/src/test/resources/sql-tests/results/number-format.sql.xml
@@ -0,0 +1,49 @@
+<testcase>
+<!-- Automatically generated by ${getClass.getName} -->
+<query>
+  <sql><![CDATA[-- Verifies how we parse numbers
+
+-- parse as ints
+select 1, -1]]></sql>
+  <schema><![CDATA[int, int]]></schema>
+  <output><![CDATA[
++---+----+
+|  1|(-1)|
++---+----+
+|  1|  -1|
++---+----+]]></output>
+</query>
+<query>
+  <sql><![CDATA[-- parse as longs
+select 2147483648, -2147483649]]></sql>
+  <schema><![CDATA[bigint, bigint]]></schema>
+  <output><![CDATA[
++----------+-------------+
+|2147483648|(-2147483649)|
++----------+-------------+
+|2147483648|  -2147483649|
++----------+-------------+]]></output>
+</query>
+<query>
+  <sql><![CDATA[-- parse as decimals
+select 9223372036854775808, -9223372036854775809]]></sql>
+  <schema><![CDATA[decimal(19,0), decimal(19,0)]]></schema>
+  <output><![CDATA[
++-------------------+----------------------+
+|9223372036854775808|(-9223372036854775809)|
++-------------------+----------------------+
+|9223372036854775808|  -9223372036854775809|
++-------------------+----------------------+]]></output>
+</query>
+<query>
+  <sql><![CDATA[-- various floating point formats
+select 0.3, -0.8, .5, -.18]]></sql>
+  <schema><![CDATA[decimal(1,1), decimal(1,1), decimal(1,1), decimal(2,2)]]></schema>
+  <output><![CDATA[
++---+------+---+-------+
+|0.3|(-0.8)|0.5|(-0.18)|
++---+------+---+-------+
+|0.3|  -0.8|0.5|  -0.18|
++---+------+---+-------+]]></output>
+</query>
+</testcase>
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -1358,42 +1358,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
     )
   }
 
-  test("Floating point number format") {
-    checkAnswer(
-      sql("SELECT 0.3"), Row(BigDecimal(0.3))
-    )
-
-    checkAnswer(
-      sql("SELECT -0.8"), Row(BigDecimal(-0.8))
-    )
-
-    checkAnswer(
-      sql("SELECT .5"), Row(BigDecimal(0.5))
-    )
-
-    checkAnswer(
-      sql("SELECT -.18"), Row(BigDecimal(-0.18))
-    )
-  }
-
-  test("Auto cast integer type") {
-    checkAnswer(
-      sql(s"SELECT ${Int.MaxValue + 1L}"), Row(Int.MaxValue + 1L)
-    )
-
-    checkAnswer(
-      sql(s"SELECT ${Int.MinValue - 1L}"), Row(Int.MinValue - 1L)
-    )
-
-    checkAnswer(
-      sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
-    )
-
-    checkAnswer(
-      sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
-    )
-  }
-
   test("Test to check we can apply sign to expression") {
 
     checkAnswer(

diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -0,0 +1,186 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql
+
+import java.io.File
+import java.util.{Locale, TimeZone}
+
+import org.apache.spark.sql.catalyst.rules.RuleExecutor
+import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
+import org.apache.spark.sql.test.SharedSQLContext
+
+/**
+ * End-to-end test cases for SQL queries.
+ *
+ * Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs".
+ * Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results".
+ *
+ * To re-generate golden files, run:
+ * {{{
+ *   SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
+ * }}}
+ *
+ * The format for input files is simple:
+ *  1. A list of SQL queries separated by semicolon.
+ *  2. Lines starting with -- are treated as comments and ignored.
+ *
+ * For example:
+ * {{{
+ *   -- this is a comment
+ *   select 1 + 2;
+ *   select current_date;
+ * }}}
+ *
+ * Result files are encoded as XMLs.
+ */
+class SQLQueryTestSuite extends QueryTest with SharedSQLContext {
+
+  private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"
+
+  private val inputFilePath = "src/test/resources/sql-tests/inputs/"
+  private val goldenFilePath = "src/test/resources/sql-tests/results/"
+
+  /** List of test cases to ignore, in lower cases. */
+  private val blackList = Set(
+    "blacklist.sql"  // Do NOT remove this one. It is here to test the blacklist functionality.
+  )
+
+  // Create all the test cases.
+  listTestCases().foreach(createScalaTestCase)
+
+  /** A test case. */
+  private case class TestCase(name: String, inputFile: String, resultFile: String)
+
+  /** A single SQL query's output. */
+  private case class QueryOutput(sql: String, schema: String, output: String) {
+    def toXML: String = {
+      // We are explicitly not using multi-line string due to stripMargin removing |s,
+      // and not using XML interpolation because there is no simple way to indent outputs nicely
+      // (scala.xml.PrettyPrinter has issue with tabs).
+      "<query>\n" +
+      s"  <sql><![CDATA[$sql]]></sql>\n" +
+      s"  <schema><![CDATA[$schema]]></schema>\n" +
+      s"  <output><![CDATA[\n$output]]></output>\n" +
+      s"</query>"
+    }
+  }
+
+  private def createScalaTestCase(testCase: TestCase): Unit = {
+    if (blackList.contains(testCase.name.toLowerCase)) {
+      // Create a test case to ignore this case.
+      ignore(testCase.name) { /* Do nothing */ }
+    } else {
+      // Create a test case to run this case.
+      test(testCase.name) { runTest(testCase) }
+    }
+  }
+
+  /** Run a test case. */
+  private def runTest(testCase: TestCase): Unit = {
+    val input = fileToString(new File(testCase.inputFile))
+
+    // List of SQL queries to run
+    val queries: Seq[String] = {
+      val cleaned = input.split("\n").filterNot(_.matches("--.*(?<=[^\\\\]);")).mkString("\n")
+      cleaned.split("(?<=[^\\\\]);").map(_.trim).filterNot(q => q == "").toSeq
+    }
+
+    // Run the SQL queries preparing them for comparison.
+    val outputs: Seq[QueryOutput] = queries.map { sql =>
+      val df = spark.sql(sql)
+      // We might need to do some query canonicalization in the future.
+      QueryOutput(
+        sql = sql,
+        schema = df.schema.map(_.dataType.simpleString).mkString(", "),
+        output = df.showString(_numRows = 10000, truncate = 10000).trim)
+    }
+
+    if (regenerateGoldenFiles) {
+      // If generate golden file flag is on, create the golden file.
+      // Again, we are explicitly not using multi-line string due to stripMargin removing |s,
+      // and not using XML interpolation because there is no simple way to indent outputs nicely
+      // (scala.xml.PrettyPrinter has issue with tabs).
+      val xmlOutput = {
+        "<testcase>\n" +
+        "<!-- Automatically generated by ${getClass.getName} -->\n" +
+        outputs.map(_.toXML).mkString("\n") +
+        "\n</testcase>\n"
+      }
+      stringToFile(new File(testCase.resultFile), xmlOutput)
+    }
+
+    // Read back the golden file.
+    val expectedOutputs: Seq[QueryOutput] = {
+      val xml = scala.xml.XML.loadString(fileToString(new File(testCase.resultFile)))
+      (xml \ "query").map { q =>
+        QueryOutput(
+          sql = (q \ "sql").text,
+          schema = (q \ "schema").text,
+          output = (q \ "output").text.trim)
+      }
+    }
+
+    // Compare results.
+    assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
+      outputs.size
+    }
+
+    outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
+      assertResult(expected.sql, s"SQL query should match for query #$i") { output.sql }
+      assertResult(expected.schema, s"Schema should match for query #$i") { output.schema }
+      assertResult(expected.output, s"Result should match for query #$i") { output.output }
+    }
+  }
+
+  private def listTestCases(): Seq[TestCase] = {
+    listFilesRecursively(new File(inputFilePath)).map { file =>
+      val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".xml"
+      TestCase(file.getName, file.getAbsolutePath, resultFile)
+    }
+  }
+
+  /** Returns all the files (not directories) in a directory, recursively. */
+  private def listFilesRecursively(path: File): Seq[File] = {
+    val (dirs, files) = path.listFiles().partition(_.isDirectory)
+    files ++ dirs.flatMap(listFilesRecursively)
+  }
+
+  private val originalTimeZone = TimeZone.getDefault
+  private val originalLocale = Locale.getDefault
+
+  override def beforeAll(): Unit = {
+    super.beforeAll()
+    // Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
+    TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
+    // Add Locale setting
+    Locale.setDefault(Locale.US)
+    RuleExecutor.resetTime()
+  }
+
+  override def afterAll(): Unit = {
+    try {
+      TimeZone.setDefault(originalTimeZone)
+      Locale.setDefault(originalLocale)
+
+      // For debugging dump some statistics about how much time was spent in various optimizer rules
+      logWarning(RuleExecutor.dumpTimeSpent())
+    } finally {
+      super.afterAll()
+    }
+  }
+}