Skip to content

Commit

Permalink
[SPARK-16866][SQL] Basic infrastructure for file-based SQL end-to-end…
Browse files Browse the repository at this point in the history
… tests
  • Loading branch information
petermaxlee committed Aug 3, 2016
1 parent b73a570 commit ba9b678
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 36 deletions.
4 changes: 4 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/blacklist.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-- This is a query file that has been blacklisted.
-- It includes a query that should crash Spark.
-- If the test case is run, the whole suite would fail.
some random not working query that should crash Spark.
13 changes: 13 additions & 0 deletions sql/core/src/test/resources/sql-tests/inputs/number-format.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
-- Verifies how we parse numbers

-- parse as ints
select 1, -1;

-- parse as longs
select 2147483648, -2147483649;

-- parse as decimals
select 9223372036854775808, -9223372036854775809;

-- various floating point formats
select 0.3, -0.8, .5, -.18;
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<testcase>
<!-- Automatically generated by ${getClass.getName} -->
<query>
<sql><![CDATA[-- Verifies how we parse numbers
-- parse as ints
select 1, -1]]></sql>
<schema><![CDATA[int, int]]></schema>
<output><![CDATA[
+---+----+
| 1|(-1)|
+---+----+
| 1| -1|
+---+----+]]></output>
</query>
<query>
<sql><![CDATA[-- parse as longs
select 2147483648, -2147483649]]></sql>
<schema><![CDATA[bigint, bigint]]></schema>
<output><![CDATA[
+----------+-------------+
|2147483648|(-2147483649)|
+----------+-------------+
|2147483648| -2147483649|
+----------+-------------+]]></output>
</query>
<query>
<sql><![CDATA[-- parse as decimals
select 9223372036854775808, -9223372036854775809]]></sql>
<schema><![CDATA[decimal(19,0), decimal(19,0)]]></schema>
<output><![CDATA[
+-------------------+----------------------+
|9223372036854775808|(-9223372036854775809)|
+-------------------+----------------------+
|9223372036854775808| -9223372036854775809|
+-------------------+----------------------+]]></output>
</query>
<query>
<sql><![CDATA[-- various floating point formats
select 0.3, -0.8, .5, -.18]]></sql>
<schema><![CDATA[decimal(1,1), decimal(1,1), decimal(1,1), decimal(2,2)]]></schema>
<output><![CDATA[
+---+------+---+-------+
|0.3|(-0.8)|0.5|(-0.18)|
+---+------+---+-------+
|0.3| -0.8|0.5| -0.18|
+---+------+---+-------+]]></output>
</query>
</testcase>
36 changes: 0 additions & 36 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1358,42 +1358,6 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext {
)
}

test("Floating point number format") {
checkAnswer(
sql("SELECT 0.3"), Row(BigDecimal(0.3))
)

checkAnswer(
sql("SELECT -0.8"), Row(BigDecimal(-0.8))
)

checkAnswer(
sql("SELECT .5"), Row(BigDecimal(0.5))
)

checkAnswer(
sql("SELECT -.18"), Row(BigDecimal(-0.18))
)
}

test("Auto cast integer type") {
checkAnswer(
sql(s"SELECT ${Int.MaxValue + 1L}"), Row(Int.MaxValue + 1L)
)

checkAnswer(
sql(s"SELECT ${Int.MinValue - 1L}"), Row(Int.MinValue - 1L)
)

checkAnswer(
sql("SELECT 9223372036854775808"), Row(new java.math.BigDecimal("9223372036854775808"))
)

checkAnswer(
sql("SELECT -9223372036854775809"), Row(new java.math.BigDecimal("-9223372036854775809"))
)
}

test("Test to check we can apply sign to expression") {

checkAnswer(
Expand Down
186 changes: 186 additions & 0 deletions sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.spark.sql

import java.io.File
import java.util.{Locale, TimeZone}

import org.apache.spark.sql.catalyst.rules.RuleExecutor
import org.apache.spark.sql.catalyst.util.{fileToString, stringToFile}
import org.apache.spark.sql.test.SharedSQLContext

/**
* End-to-end test cases for SQL queries.
*
* Each case is loaded from a file in "spark/sql/core/src/test/resources/sql-tests/inputs".
* Each case has a golden result file in "spark/sql/core/src/test/resources/sql-tests/results".
*
* To re-generate golden files, run:
* {{{
* SPARK_GENERATE_GOLDEN_FILES=1 build/sbt "sql/test-only *SQLQueryTestSuite"
* }}}
*
* The format for input files is simple:
* 1. A list of SQL queries separated by semicolon.
* 2. Lines starting with -- are treated as comments and ignored.
*
* For example:
* {{{
* -- this is a comment
* select 1 + 2;
* select current_date;
* }}}
*
* Result files are encoded as XMLs.
*/
class SQLQueryTestSuite extends QueryTest with SharedSQLContext {

private val regenerateGoldenFiles: Boolean = System.getenv("SPARK_GENERATE_GOLDEN_FILES") == "1"

private val inputFilePath = "src/test/resources/sql-tests/inputs/"
private val goldenFilePath = "src/test/resources/sql-tests/results/"

/** List of test cases to ignore, in lower cases. */
private val blackList = Set(
"blacklist.sql" // Do NOT remove this one. It is here to test the blacklist functionality.
)

// Create all the test cases.
listTestCases().foreach(createScalaTestCase)

/** A test case. */
private case class TestCase(name: String, inputFile: String, resultFile: String)

/** A single SQL query's output. */
private case class QueryOutput(sql: String, schema: String, output: String) {
def toXML: String = {
// We are explicitly not using multi-line string due to stripMargin removing |s,
// and not using XML interpolation because there is no simple way to indent outputs nicely
// (scala.xml.PrettyPrinter has issue with tabs).
"<query>\n" +
s" <sql><![CDATA[$sql]]></sql>\n" +
s" <schema><![CDATA[$schema]]></schema>\n" +
s" <output><![CDATA[\n$output]]></output>\n" +
s"</query>"
}
}

private def createScalaTestCase(testCase: TestCase): Unit = {
if (blackList.contains(testCase.name.toLowerCase)) {
// Create a test case to ignore this case.
ignore(testCase.name) { /* Do nothing */ }
} else {
// Create a test case to run this case.
test(testCase.name) { runTest(testCase) }
}
}

/** Run a test case. */
private def runTest(testCase: TestCase): Unit = {
val input = fileToString(new File(testCase.inputFile))

// List of SQL queries to run
val queries: Seq[String] = {
val cleaned = input.split("\n").filterNot(_.matches("--.*(?<=[^\\\\]);")).mkString("\n")
cleaned.split("(?<=[^\\\\]);").map(_.trim).filterNot(q => q == "").toSeq
}

// Run the SQL queries preparing them for comparison.
val outputs: Seq[QueryOutput] = queries.map { sql =>
val df = spark.sql(sql)
// We might need to do some query canonicalization in the future.
QueryOutput(
sql = sql,
schema = df.schema.map(_.dataType.simpleString).mkString(", "),
output = df.showString(_numRows = 10000, truncate = 10000).trim)
}

if (regenerateGoldenFiles) {
// If generate golden file flag is on, create the golden file.
// Again, we are explicitly not using multi-line string due to stripMargin removing |s,
// and not using XML interpolation because there is no simple way to indent outputs nicely
// (scala.xml.PrettyPrinter has issue with tabs).
val xmlOutput = {
"<testcase>\n" +
"<!-- Automatically generated by ${getClass.getName} -->\n" +
outputs.map(_.toXML).mkString("\n") +
"\n</testcase>\n"
}
stringToFile(new File(testCase.resultFile), xmlOutput)
}

// Read back the golden file.
val expectedOutputs: Seq[QueryOutput] = {
val xml = scala.xml.XML.loadString(fileToString(new File(testCase.resultFile)))
(xml \ "query").map { q =>
QueryOutput(
sql = (q \ "sql").text,
schema = (q \ "schema").text,
output = (q \ "output").text.trim)
}
}

// Compare results.
assertResult(expectedOutputs.size, s"Number of queries should be ${expectedOutputs.size}") {
outputs.size
}

outputs.zip(expectedOutputs).zipWithIndex.foreach { case ((output, expected), i) =>
assertResult(expected.sql, s"SQL query should match for query #$i") { output.sql }
assertResult(expected.schema, s"Schema should match for query #$i") { output.schema }
assertResult(expected.output, s"Result should match for query #$i") { output.output }
}
}

private def listTestCases(): Seq[TestCase] = {
listFilesRecursively(new File(inputFilePath)).map { file =>
val resultFile = file.getAbsolutePath.replace(inputFilePath, goldenFilePath) + ".xml"
TestCase(file.getName, file.getAbsolutePath, resultFile)
}
}

/** Returns all the files (not directories) in a directory, recursively. */
private def listFilesRecursively(path: File): Seq[File] = {
val (dirs, files) = path.listFiles().partition(_.isDirectory)
files ++ dirs.flatMap(listFilesRecursively)
}

private val originalTimeZone = TimeZone.getDefault
private val originalLocale = Locale.getDefault

override def beforeAll(): Unit = {
super.beforeAll()
// Timezone is fixed to America/Los_Angeles for those timezone sensitive tests (timestamp_*)
TimeZone.setDefault(TimeZone.getTimeZone("America/Los_Angeles"))
// Add Locale setting
Locale.setDefault(Locale.US)
RuleExecutor.resetTime()
}

override def afterAll(): Unit = {
try {
TimeZone.setDefault(originalTimeZone)
Locale.setDefault(originalLocale)

// For debugging dump some statistics about how much time was spent in various optimizer rules
logWarning(RuleExecutor.dumpTimeSpent())
} finally {
super.afterAll()
}
}
}

0 comments on commit ba9b678

Please sign in to comment.