From 9bab05ff9e5f3c6fb1ae36d6af757353576b2b91 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 12 Aug 2015 17:50:00 -0700 Subject: [PATCH 01/13] Set SHUFFLE_PARTITIONS correctly. --- .../apache/spark/sql/test/TestSQLContext.scala | 9 ++------- .../org/apache/spark/sql/hive/test/TestHive.scala | 15 ++++----------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala index b3a4231da91c2..fca2680b72bf1 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -27,6 +27,8 @@ import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan class LocalSQLContext extends SQLContext( new SparkContext("local[2]", "TestSQLContext", new SparkConf() + // Fewer partitions to speed up testing + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") .set("spark.sql.testkey", "true") // SPARK-8910 .set("spark.ui.enabled", "false"))) { @@ -35,13 +37,6 @@ class LocalSQLContext new this.SQLSession() } - protected[sql] class SQLSession extends super.SQLSession { - protected[sql] override lazy val conf: SQLConf = new SQLConf { - /** Fewer partitions to speed up testing. */ - override def numShufflePartitions: Int = this.getConf(SQLConf.SHUFFLE_PARTITIONS, 5) - } - } - /** * Turn a logical plan into a [[DataFrame]]. This should be removed once we have an easier way to * construct [[DataFrame]] directly out of local data without relying on implicits. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 4eae699ac3b51..45e23fef88d2e 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -50,6 +50,10 @@ object TestHive "TestSQLContext", new SparkConf() .set("spark.sql.test", "") + // Fewer partitions to speed up testing + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") + .set(SQLConf.DIALECT.key, "hiveql") + .set(SQLConf.CASE_SENSITIVE.key, "false") .set("spark.sql.hive.metastore.barrierPrefixes", "org.apache.spark.sql.hive.execution.PairSerDe") // SPARK-8910 @@ -119,17 +123,6 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { new this.SQLSession() } - protected[hive] class SQLSession extends super.SQLSession { - /** Fewer partitions to speed up testing. */ - protected[sql] override lazy val conf: SQLConf = new SQLConf { - override def numShufflePartitions: Int = getConf(SQLConf.SHUFFLE_PARTITIONS, 5) - // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared. - // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" - override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql") - override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false) - } - } - /** * Returns the value of specified environmental variable as a [[java.io.File]] after checking * to ensure it exists From d754db4d2ad6315da526782d314a55ddada07e4c Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 12 Aug 2015 18:03:01 -0700 Subject: [PATCH 02/13] Cover more cases. --- .../apache/spark/sql/hive/HiveSparkSubmitSuite.scala | 12 ++++++++++++ .../sql/hive/execution/ConcurrentHiveSuite.scala | 9 ++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 0c29646114465..b031fe40af2cf 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -19,6 +19,8 @@ package org.apache.spark.sql.hive import java.io.File +import org.apache.spark.sql.SQLConf + import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} @@ -153,6 +155,11 @@ object SparkSubmitClassLoaderTest extends Logging { def main(args: Array[String]) { Utils.configTestLog4j("INFO") val conf = new SparkConf() + conf + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") + .set(SQLConf.DIALECT.key, "hiveql") + .set(SQLConf.CASE_SENSITIVE.key, "false") + .set("spark.ui.enabled", "false") val sc = new SparkContext(conf) val hiveContext = new TestHiveContext(sc) val df = hiveContext.createDataFrame((1 to 100).map(i => (i, i))).toDF("i", "j") @@ -244,6 +251,11 @@ object SparkSQLConfTest extends Logging { // For this simple test, we do not really clone this object. override def clone: SparkConf = this } + conf + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") + .set(SQLConf.DIALECT.key, "hiveql") + .set(SQLConf.CASE_SENSITIVE.key, "false") + .set("spark.ui.enabled", "false") val sc = new SparkContext(conf) val hiveContext = new TestHiveContext(sc) // Run a simple command to make sure all lazy vals in hiveContext get instantiated. diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala index b0d3dd44daedc..c635ca3a1cfc3 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala @@ -17,6 +17,7 @@ package org.apache.spark.sql.hive.execution +import org.apache.spark.sql.SQLConf import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.hive.test.TestHiveContext import org.scalatest.BeforeAndAfterAll @@ -25,8 +26,14 @@ class ConcurrentHiveSuite extends SparkFunSuite with BeforeAndAfterAll { ignore("multiple instances not supported") { test("Multiple Hive Instances") { (1 to 10).map { i => + val conf = new SparkConf() + conf + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") + .set(SQLConf.DIALECT.key, "hiveql") + .set(SQLConf.CASE_SENSITIVE.key, "false") + .set("spark.ui.enabled", "false") val ts = - new TestHiveContext(new SparkContext("local", s"TestSQLContext$i", new SparkConf())) + new TestHiveContext(new SparkContext("local", s"TestSQLContext$i", conf)) ts.executeSql("SHOW TABLES").toRdd.collect() ts.executeSql("SELECT * FROM src").toRdd.collect() ts.executeSql("SHOW TABLES").toRdd.collect() From 876f33df7245f402e74ca06dd097cfcf8fac7a92 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Wed, 12 Aug 2015 18:21:15 -0700 Subject: [PATCH 03/13] mima. --- project/MimaExcludes.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/project/MimaExcludes.scala b/project/MimaExcludes.scala index 784f83c10e023..708b1f2601e14 100644 --- a/project/MimaExcludes.scala +++ b/project/MimaExcludes.scala @@ -62,6 +62,8 @@ object MimaExcludes { "org.apache.spark.ml.classification.LogisticCostFun.this"), // SQL execution is considered private. excludePackage("org.apache.spark.sql.execution"), + // Always exclude LocalSQLContext, which is only used in unit tests. + ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.test.LocalSQLContext$SQLSession"), // The old JSON RDD is removed in favor of streaming Jackson ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.json.JsonRDD$"), ProblemFilters.exclude[MissingClassProblem]("org.apache.spark.sql.json.JsonRDD"), From df2f79067392fe015d1d2f2910a16e14810651af Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 13 Aug 2015 08:47:59 -0700 Subject: [PATCH 04/13] One more. --- sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala index 75791e9d53c20..d385e1765a4ec 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala @@ -74,8 +74,10 @@ class SQLConfSuite extends QueryTest { test("deprecated property") { ctx.conf.clear() + val original = ctx.conf.numShufflePartitions ctx.sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10") assert(ctx.conf.numShufflePartitions === 10) + ctx.sql(s"set ${SQLConf.SHUFFLE_PARTITIONS}=$original") } test("invalid conf value") { From 4e548f596247283ca7dc25bf1356dd23229f4c5f Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 13 Aug 2015 15:01:46 -0700 Subject: [PATCH 05/13] One more. --- .../org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index b031fe40af2cf..a209464b26ff0 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -271,7 +271,11 @@ object SPARK_9757 extends QueryTest with Logging { val sparkContext = new SparkContext( new SparkConf() .set("spark.sql.hive.metastore.version", "0.13.1") - .set("spark.sql.hive.metastore.jars", "maven")) + .set("spark.sql.hive.metastore.jars", "maven") + .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") + .set(SQLConf.DIALECT.key, "hiveql") + .set(SQLConf.CASE_SENSITIVE.key, "false") + .set("spark.ui.enabled", "false")) val hiveContext = new TestHiveContext(sparkContext) import hiveContext.implicits._ From 4d6fcf8ca18d4aa6776d6f16219d349e628d81bc Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Thu, 13 Aug 2015 18:57:14 -0700 Subject: [PATCH 06/13] style --- .../scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index a209464b26ff0..0190571dff3b8 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -19,8 +19,6 @@ package org.apache.spark.sql.hive import java.io.File -import org.apache.spark.sql.SQLConf - import scala.collection.mutable.ArrayBuffer import scala.sys.process.{Process, ProcessLogger} @@ -32,6 +30,7 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} +import org.apache.spark.sql.SQLConf import org.apache.spark.sql.types.DecimalType import org.apache.spark.util.{ResetSystemProperties, Utils} From 1db871b85d56698d61b3a3d9145643a1924031f7 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 17 Aug 2015 09:37:05 -0700 Subject: [PATCH 07/13] A better way to override confs in unit tests. --- .../org/apache/spark/sql/SQLConfSuite.scala | 7 ++-- .../spark/sql/test/TestSQLContext.scala | 32 +++++++++++++++++-- .../apache/spark/sql/hive/test/TestHive.scala | 31 ++++++++++++++++++ .../spark/sql/hive/HiveSparkSubmitSuite.scala | 16 ++-------- .../hive/execution/ConcurrentHiveSuite.scala | 7 +--- 5 files changed, 68 insertions(+), 25 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala index ad25a8d6b224d..48cddac461090 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala @@ -17,7 +17,7 @@ package org.apache.spark.sql -import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.test.{TestSQLContext, SharedSQLContext} class SQLConfSuite extends QueryTest with SharedSQLContext { @@ -33,7 +33,10 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { test("programmatic ways of basic setting and getting") { ctx.conf.clear() - assert(ctx.getAllConfs.size === 0) + // The number of confs set after we call clear equals to the number of + // confs in TestSQLContext.overrideConfs (we always override these confs + // when we use TestSQLContext). + assert(ctx.getAllConfs.size === TestSQLContext.overrideConfs.size) ctx.setConf(testKey, testVal) assert(ctx.getConf(testKey) === testVal) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index eb9f47b09c9bc..f6aafbecffb7d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -29,9 +29,28 @@ private[sql] class TestSQLContext(sc: SparkContext) extends SQLContext(sc) { sel def this() { this(new SparkContext("local[2]", "test-sql-context", new SparkConf() - .set("spark.sql.testkey", "true") - // Fewer partitions to speed up testing - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5"))) + .set("spark.sql.testkey", "true"))) + } + + // At here, we make sure we set those test specific confs correctly when we create + // the SQLConf as well as when we call clear. + protected[sql] override def createSession(): SQLSession = new this.SQLSession() + + protected[sql] class SQLSession extends super.SQLSession { + protected[sql] override lazy val conf: SQLConf = new SQLConf { + + TestSQLContext.overrideConfs.map { + case (key, value) => setConfString(key, value) + } + + override def clear(): Unit = { + super.clear() + + TestSQLContext.overrideConfs.map { + case (key, value) => setConfString(key, value) + } + } + } } // Needed for Java tests @@ -43,3 +62,10 @@ private[sql] class TestSQLContext(sc: SparkContext) extends SQLContext(sc) { sel protected override def _sqlContext: SQLContext = self } } + +private[sql] object TestSQLContext { + val overrideConfs: Map[String, String] = + Map( + // Fewer shuffle partitions to speed up testing. + SQLConf.SHUFFLE_PARTITIONS.key -> "5") +} diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 45e23fef88d2e..b461c908333d3 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -119,10 +119,29 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { override def executePlan(plan: LogicalPlan): this.QueryExecution = new this.QueryExecution(plan) + // At here, we make sure we set those test specific confs correctly when we create + // the SQLConf as well as when we call clear. override protected[sql] def createSession(): SQLSession = { new this.SQLSession() } + protected[hive] class SQLSession extends super.SQLSession { + protected[sql] override lazy val conf: SQLConf = new SQLConf { + + TestHiveContext.overrideConfs.map { + case (key, value) => setConfString(key, value) + } + + override def clear(): Unit = { + super.clear() + + TestHiveContext.overrideConfs.map { + case (key, value) => setConfString(key, value) + } + } + } + } + /** * Returns the value of specified environmental variable as a [[java.io.File]] after checking * to ensure it exists @@ -454,3 +473,15 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { } } } + +private[hive] object TestHiveContext { + val overrideConfs: Map[String, String] = + Map( + // Fewer shuffle partitions to speed up testing. + SQLConf.SHUFFLE_PARTITIONS.key -> "5", + // In unit test, conf.clear() is called in SQLConfSuite, which clear all the conf values. + // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" + SQLConf.DIALECT.key -> "hiveql", + // HiveQl uses case-insensitive resolution. + SQLConf.CASE_SENSITIVE.key -> "false") +} diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala index 0190571dff3b8..c9d044fb98a8b 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/HiveSparkSubmitSuite.scala @@ -30,7 +30,6 @@ import org.scalatest.time.SpanSugar._ import org.apache.spark._ import org.apache.spark.sql.QueryTest import org.apache.spark.sql.hive.test.{TestHive, TestHiveContext} -import org.apache.spark.sql.SQLConf import org.apache.spark.sql.types.DecimalType import org.apache.spark.util.{ResetSystemProperties, Utils} @@ -154,11 +153,7 @@ object SparkSubmitClassLoaderTest extends Logging { def main(args: Array[String]) { Utils.configTestLog4j("INFO") val conf = new SparkConf() - conf - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") - .set(SQLConf.DIALECT.key, "hiveql") - .set(SQLConf.CASE_SENSITIVE.key, "false") - .set("spark.ui.enabled", "false") + conf.set("spark.ui.enabled", "false") val sc = new SparkContext(conf) val hiveContext = new TestHiveContext(sc) val df = hiveContext.createDataFrame((1 to 100).map(i => (i, i))).toDF("i", "j") @@ -250,11 +245,7 @@ object SparkSQLConfTest extends Logging { // For this simple test, we do not really clone this object. override def clone: SparkConf = this } - conf - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") - .set(SQLConf.DIALECT.key, "hiveql") - .set(SQLConf.CASE_SENSITIVE.key, "false") - .set("spark.ui.enabled", "false") + conf.set("spark.ui.enabled", "false") val sc = new SparkContext(conf) val hiveContext = new TestHiveContext(sc) // Run a simple command to make sure all lazy vals in hiveContext get instantiated. @@ -271,9 +262,6 @@ object SPARK_9757 extends QueryTest with Logging { new SparkConf() .set("spark.sql.hive.metastore.version", "0.13.1") .set("spark.sql.hive.metastore.jars", "maven") - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") - .set(SQLConf.DIALECT.key, "hiveql") - .set(SQLConf.CASE_SENSITIVE.key, "false") .set("spark.ui.enabled", "false")) val hiveContext = new TestHiveContext(sparkContext) diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala index c635ca3a1cfc3..e38d1eb5779fe 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/ConcurrentHiveSuite.scala @@ -17,7 +17,6 @@ package org.apache.spark.sql.hive.execution -import org.apache.spark.sql.SQLConf import org.apache.spark.{SparkConf, SparkContext, SparkFunSuite} import org.apache.spark.sql.hive.test.TestHiveContext import org.scalatest.BeforeAndAfterAll @@ -27,11 +26,7 @@ class ConcurrentHiveSuite extends SparkFunSuite with BeforeAndAfterAll { test("Multiple Hive Instances") { (1 to 10).map { i => val conf = new SparkConf() - conf - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") - .set(SQLConf.DIALECT.key, "hiveql") - .set(SQLConf.CASE_SENSITIVE.key, "false") - .set("spark.ui.enabled", "false") + conf.set("spark.ui.enabled", "false") val ts = new TestHiveContext(new SparkContext("local", s"TestSQLContext$i", conf)) ts.executeSql("SHOW TABLES").toRdd.collect() From 75aeab782c82c543882166954cf53e1aa56b79b5 Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 17 Aug 2015 09:44:06 -0700 Subject: [PATCH 08/13] Remove unnecessary changes. --- .../test/scala/org/apache/spark/sql/test/TestSQLContext.scala | 3 +-- .../main/scala/org/apache/spark/sql/hive/test/TestHive.scala | 4 ---- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index f6aafbecffb7d..c82096d59c7ea 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -28,8 +28,7 @@ private[sql] class TestSQLContext(sc: SparkContext) extends SQLContext(sc) { sel def this() { this(new SparkContext("local[2]", "test-sql-context", - new SparkConf() - .set("spark.sql.testkey", "true"))) + new SparkConf().set("spark.sql.testkey", "true"))) } // At here, we make sure we set those test specific confs correctly when we create diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index b461c908333d3..4f721d89a0357 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -50,10 +50,6 @@ object TestHive "TestSQLContext", new SparkConf() .set("spark.sql.test", "") - // Fewer partitions to speed up testing - .set(SQLConf.SHUFFLE_PARTITIONS.key, "5") - .set(SQLConf.DIALECT.key, "hiveql") - .set(SQLConf.CASE_SENSITIVE.key, "false") .set("spark.sql.hive.metastore.barrierPrefixes", "org.apache.spark.sql.hive.execution.PairSerDe") // SPARK-8910 From bc4b8e0910ec07507bd3231a9115cba6d9187e8f Mon Sep 17 00:00:00 2001 From: Yin Huai Date: Mon, 17 Aug 2015 11:44:37 -0700 Subject: [PATCH 09/13] Update tests and add comments. --- .../scala/org/apache/spark/sql/SQLConfSuite.scala | 9 +++++---- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 14 ++++++++++++-- .../org/apache/spark/sql/test/TestSQLContext.scala | 4 ++++ .../org/apache/spark/sql/hive/test/TestHive.scala | 4 ++++ 4 files changed, 25 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala index 48cddac461090..f20ea581a0c97 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala @@ -32,11 +32,12 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { } test("programmatic ways of basic setting and getting") { + // Set a conf first. + ctx.setConf(testKey, testVal) + // Clear the conf. ctx.conf.clear() - // The number of confs set after we call clear equals to the number of - // confs in TestSQLContext.overrideConfs (we always override these confs - // when we use TestSQLContext). - assert(ctx.getAllConfs.size === TestSQLContext.overrideConfs.size) + // After clear, only overrideConfs used by unit test should be in the SQLConf. + assert(ctx.getAllConfs === TestSQLContext.overrideConfs) ctx.setConf(testKey, testVal) assert(ctx.getConf(testKey) === testVal) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index c329fdb2a6bb1..d9b662e78319c 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -25,7 +25,7 @@ import org.apache.spark.sql.catalyst.DefaultParserDialect import org.apache.spark.sql.catalyst.errors.DialectException import org.apache.spark.sql.execution.aggregate import org.apache.spark.sql.functions._ -import org.apache.spark.sql.test.SharedSQLContext +import org.apache.spark.sql.test.{TestSQLContext, SharedSQLContext} import org.apache.spark.sql.test.SQLTestData._ import org.apache.spark.sql.types._ @@ -990,7 +990,17 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val nonexistentKey = "nonexistent" // "set" itself returns all config variables currently specified in SQLConf. - assert(sql("SET").collect().size == 0) + assert(sql("SET").collect().size == TestSQLContext.overrideConfs.size) + sql("SET").collect().foreach { row => + val key = row.getString(0) + val value = row.getString(1) + assert( + TestSQLContext.overrideConfs.contains(key), + s"$key should not exist in SQLConf.") + assert( + TestSQLContext.overrideConfs(key) === value, + s"The value of $key should be ${TestSQLContext.overrideConfs(key)} instead of $value.") + } // "set key=val" sql(s"SET $testKey=$testVal") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index c82096d59c7ea..8e622ec8c8d3a 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -63,6 +63,10 @@ private[sql] class TestSQLContext(sc: SparkContext) extends SQLContext(sc) { sel } private[sql] object TestSQLContext { + + /** + * A map used to store all confs that need to be overridden in sql/core unit tests. + */ val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 4f721d89a0357..11f5999acff81 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -471,6 +471,10 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { } private[hive] object TestHiveContext { + + /** + * A map used to store all confs that need to be overridden in sql/hive unit tests. + */ val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. From c45cca64087ed024a2504377d0c73edc9ce116e9 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 4 Sep 2015 10:57:35 -0700 Subject: [PATCH 10/13] fix test --- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index d9b662e78319c..b5199fb8777a5 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -1001,20 +1001,19 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { TestSQLContext.overrideConfs(key) === value, s"The value of $key should be ${TestSQLContext.overrideConfs(key)} instead of $value.") } + val overrideConfs = sql("SET").collect() // "set key=val" sql(s"SET $testKey=$testVal") checkAnswer( sql("SET"), - Row(testKey, testVal) + overrideConfs ++ Seq(Row(testKey, testVal)) ) sql(s"SET ${testKey + testKey}=${testVal + testVal}") checkAnswer( sql("set"), - Seq( - Row(testKey, testVal), - Row(testKey + testKey, testVal + testVal)) + overrideConfs ++ Seq(Row(testKey, testVal), Row(testKey + testKey, testVal + testVal)) ) // "set key" From 841c942ac895c242ca4617369780bb3c570c45fc Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 4 Sep 2015 11:49:46 -0700 Subject: [PATCH 11/13] address comments --- .../scala/org/apache/spark/sql/SQLConfSuite.scala | 11 +++++++---- .../scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala index f20ea581a0c97..55ea594cafcf1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLConfSuite.scala @@ -46,7 +46,7 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { // Tests SQLConf as accessed from a SQLContext is mutable after // the latter is initialized, unlike SparkConf inside a SparkContext. - assert(ctx.getConf(testKey) == testVal) + assert(ctx.getConf(testKey) === testVal) assert(ctx.getConf(testKey, testVal + "_") === testVal) assert(ctx.getAllConfs.contains(testKey)) @@ -78,9 +78,12 @@ class SQLConfSuite extends QueryTest with SharedSQLContext { test("deprecated property") { ctx.conf.clear() val original = ctx.conf.numShufflePartitions - sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10") - assert(ctx.conf.numShufflePartitions === 10) - sql(s"set ${SQLConf.SHUFFLE_PARTITIONS}=$original") + try{ + sql(s"set ${SQLConf.Deprecated.MAPRED_REDUCE_TASKS}=10") + assert(ctx.conf.numShufflePartitions === 10) + } finally { + sql(s"set ${SQLConf.SHUFFLE_PARTITIONS}=$original") + } } test("invalid conf value") { diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index ec1f5c01e6b44..8423862751090 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -991,7 +991,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val nonexistentKey = "nonexistent" // "set" itself returns all config variables currently specified in SQLConf. - assert(sql("SET").collect().size == TestSQLContext.overrideConfs.size) + assert(sql("SET").collect().size === TestSQLContext.overrideConfs.size) sql("SET").collect().foreach { row => val key = row.getString(0) val value = row.getString(1) From 6fa951cfc0ad4fdf2994a40b1c8c9f24feec1c33 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 4 Sep 2015 12:06:14 -0700 Subject: [PATCH 12/13] address comments --- .../test/scala/org/apache/spark/sql/SQLQuerySuite.scala | 2 +- .../scala/org/apache/spark/sql/test/TestSQLContext.scala | 8 ++++---- .../scala/org/apache/spark/sql/hive/test/TestHive.scala | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala index 8423862751090..5906e2a5566db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala @@ -997,7 +997,7 @@ class SQLQuerySuite extends QueryTest with SharedSQLContext { val value = row.getString(1) assert( TestSQLContext.overrideConfs.contains(key), - s"$key should not exist in SQLConf.") + s"$key should exist in SQLConf.") assert( TestSQLContext.overrideConfs(key) === value, s"The value of $key should be ${TestSQLContext.overrideConfs(key)} instead of $value.") diff --git a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala index 8e622ec8c8d3a..43a46fb0648a9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/test/TestSQLContext.scala @@ -31,20 +31,20 @@ private[sql] class TestSQLContext(sc: SparkContext) extends SQLContext(sc) { sel new SparkConf().set("spark.sql.testkey", "true"))) } - // At here, we make sure we set those test specific confs correctly when we create + // Make sure we set those test specific confs correctly when we create // the SQLConf as well as when we call clear. protected[sql] override def createSession(): SQLSession = new this.SQLSession() + /** A special [[SQLSession]] that uses fewer shuffle partitions than normal. */ protected[sql] class SQLSession extends super.SQLSession { protected[sql] override lazy val conf: SQLConf = new SQLConf { - TestSQLContext.overrideConfs.map { - case (key, value) => setConfString(key, value) - } + clear() override def clear(): Unit = { super.clear() + // Make sure we start with the default test configs even after clear TestSQLContext.overrideConfs.map { case (key, value) => setConfString(key, value) } diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index adde3c8479ffe..2caaa42c7af7b 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -111,7 +111,7 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { override def executePlan(plan: LogicalPlan): this.QueryExecution = new this.QueryExecution(plan) - // At here, we make sure we set those test specific confs correctly when we create + // Make sure we set those test specific confs correctly when we create // the SQLConf as well as when we call clear. override protected[sql] def createSession(): SQLSession = { new this.SQLSession() From 5e8d4a35b9b81eff80f5e77aa60d35897c6e1b93 Mon Sep 17 00:00:00 2001 From: Davies Liu Date: Fri, 4 Sep 2015 14:54:22 -0700 Subject: [PATCH 13/13] fix test --- .../apache/spark/sql/hive/test/TestHive.scala | 16 +++++++--------- .../sql/hive/execution/HiveQuerySuite.scala | 8 +++++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala index 2caaa42c7af7b..658f11564707f 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/test/TestHive.scala @@ -119,10 +119,12 @@ class TestHiveContext(sc: SparkContext) extends HiveContext(sc) { protected[hive] class SQLSession extends super.SQLSession { protected[sql] override lazy val conf: SQLConf = new SQLConf { + // TODO as in unit test, conf.clear() probably be called, all of the value will be cleared. + // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" + override def dialect: String = super.getConf(SQLConf.DIALECT, "hiveql") + override def caseSensitiveAnalysis: Boolean = getConf(SQLConf.CASE_SENSITIVE, false) - TestHiveContext.overrideConfs.map { - case (key, value) => setConfString(key, value) - } + clear() override def clear(): Unit = { super.clear() @@ -467,10 +469,6 @@ private[hive] object TestHiveContext { val overrideConfs: Map[String, String] = Map( // Fewer shuffle partitions to speed up testing. - SQLConf.SHUFFLE_PARTITIONS.key -> "5", - // In unit test, conf.clear() is called in SQLConfSuite, which clear all the conf values. - // The super.getConf(SQLConf.DIALECT) is "sql" by default, we need to set it as "hiveql" - SQLConf.DIALECT.key -> "hiveql", - // HiveQl uses case-insensitive resolution. - SQLConf.CASE_SENSITIVE.key -> "false") + SQLConf.SHUFFLE_PARTITIONS.key -> "5" + ) } diff --git a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala index 83f9f3eaa3a5e..767428cf0a8d4 100644 --- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala +++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/execution/HiveQuerySuite.scala @@ -31,6 +31,7 @@ import org.apache.spark.sql.{AnalysisException, DataFrame, Row} import org.apache.spark.sql.catalyst.expressions.Cast import org.apache.spark.sql.catalyst.plans.logical.Project import org.apache.spark.sql.hive._ +import org.apache.spark.sql.hive.test.TestHiveContext import org.apache.spark.sql.hive.test.TestHive import org.apache.spark.sql.hive.test.TestHive._ @@ -1104,18 +1105,19 @@ class HiveQuerySuite extends HiveComparisonTest with BeforeAndAfter { // "SET" itself returns all config variables currently specified in SQLConf. // TODO: Should we be listing the default here always? probably... - assert(sql("SET").collect().size == 0) + assert(sql("SET").collect().size == TestHiveContext.overrideConfs.size) + val defaults = collectResults(sql("SET")) assertResult(Set(testKey -> testVal)) { collectResults(sql(s"SET $testKey=$testVal")) } assert(hiveconf.get(testKey, "") == testVal) - assertResult(Set(testKey -> testVal))(collectResults(sql("SET"))) + assertResult(defaults ++ Set(testKey -> testVal))(collectResults(sql("SET"))) sql(s"SET ${testKey + testKey}=${testVal + testVal}") assert(hiveconf.get(testKey + testKey, "") == testVal + testVal) - assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) { + assertResult(defaults ++ Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) { collectResults(sql("SET")) }