From 83954118c6151ec2ae83867d44fa482bac30afd4 Mon Sep 17 00:00:00 2001 From: Xiu Guo Date: Sun, 8 Nov 2015 12:33:07 -0800 Subject: [PATCH 1/2] [SPARK-11562][SQL] Provide option to switch SQLContext/HiveContext for Spark shell --- docs/configuration.md | 7 ++++++ .../org/apache/spark/repl/SparkILoop.scala | 22 +++++++++++++---- .../scala/org/apache/spark/repl/Main.scala | 24 +++++++++++++++---- 3 files changed, 44 insertions(+), 9 deletions(-) diff --git a/docs/configuration.md b/docs/configuration.md index 85e7d1202d2ab..563dcda48932a 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -201,6 +201,13 @@ of the most common options to set are: or remotely ("cluster") on one of the nodes inside the cluster. + + spark.sql.useHiveContext + true + + A flag to toggle HiveContext/SQLContext as the default type of sqlContext when spark-shell launches. + + Apart from these, the following properties are also available, and may be useful in some situations: diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 22749c4609345..4ed66e32cb9f8 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -132,6 +132,7 @@ class SparkILoop( @DeveloperApi var sparkContext: SparkContext = _ var sqlContext: SQLContext = _ + var useHiveContext: Boolean = _ override def echoCommandMessage(msg: String) { intp.reporter printMessage msg @@ -1026,17 +1027,30 @@ class SparkILoop( @DeveloperApi def createSQLContext(): SQLContext = { - val name = "org.apache.spark.sql.hive.HiveContext" + useHiveContext = sparkContext.getConf.getBoolean("spark.sql.useHiveContext", true) + val name = { + if (useHiveContext) "org.apache.spark.sql.hive.HiveContext" + else "org.apache.spark.sql.SQLContext" + } + val loader = Utils.getContextOrSparkClassLoader try { sqlContext = loader.loadClass(name).getConstructor(classOf[SparkContext]) .newInstance(sparkContext).asInstanceOf[SQLContext] - logInfo("Created sql context (with Hive support)..") + if (useHiveContext) { + logInfo("Created sql context (with Hive support). To use sqlContext (without Hive), " + + "set spark.sql.useHiveContext to false before launching spark-shell.") + } + else { + logInfo("Created sql context.") + } } catch { - case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => + case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError + if useHiveContext => sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context..") + logInfo("Created sql context without Hive support, " + + "build Spark with -Phive to enable Hive support.") } sqlContext } diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala index 44650f25f7a18..b545488bb3040 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala @@ -37,6 +37,7 @@ object Main extends Logging { // the creation of SecurityManager has to be lazy so SPARK_YARN_MODE is set if needed var sparkContext: SparkContext = _ var sqlContext: SQLContext = _ + var useHiveContext: Boolean = conf.getBoolean("spark.sql.useHiveContext", true) var interp = new SparkILoop // this is a public var because tests reset it. private var hasErrors = false @@ -66,7 +67,8 @@ object Main extends Logging { def getAddedJars: Array[String] = { val envJars = sys.env.get("ADD_JARS") if (envJars.isDefined) { - logWarning("ADD_JARS environment variable is deprecated, use --jar spark submit argument instead") + logWarning("ADD_JARS environment variable is deprecated, " + + "use --jar spark submit argument instead") } val propJars = sys.props.get("spark.jars").flatMap { p => if (p == "") None else Some(p) } val jars = propJars.orElse(envJars).getOrElse("") @@ -98,16 +100,28 @@ object Main extends Logging { } def createSQLContext(): SQLContext = { - val name = "org.apache.spark.sql.hive.HiveContext" + val name = { + if (useHiveContext) "org.apache.spark.sql.hive.HiveContext" + else "org.apache.spark.sql.SQLContext" + } + val loader = Utils.getContextOrSparkClassLoader try { sqlContext = loader.loadClass(name).getConstructor(classOf[SparkContext]) .newInstance(sparkContext).asInstanceOf[SQLContext] - logInfo("Created sql context (with Hive support)..") + if (useHiveContext) { + logInfo("Created sql context (with Hive support). To use sqlContext (without Hive), " + + "set spark.sql.useHiveContext to false before launching spark-shell.") + } + else { + logInfo("Created sql context.") + } } catch { - case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError => + case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError + if useHiveContext => sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context..") + logInfo("Created sql context without Hive support, " + + "build Spark with -Phive to enable Hive support.") } sqlContext } From 969008ef85723eb35d4ab038c9158eb3ea12b031 Mon Sep 17 00:00:00 2001 From: Xiu Guo Date: Sat, 19 Dec 2015 11:15:16 -0800 Subject: [PATCH 2/2] SPARK-11562 log message improvement --- .../src/main/scala/org/apache/spark/repl/SparkILoop.scala | 7 +++---- .../src/main/scala/org/apache/spark/repl/Main.scala | 6 +++--- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala index 4ed66e32cb9f8..f785305983fbd 100644 --- a/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala +++ b/repl/scala-2.10/src/main/scala/org/apache/spark/repl/SparkILoop.scala @@ -132,7 +132,6 @@ class SparkILoop( @DeveloperApi var sparkContext: SparkContext = _ var sqlContext: SQLContext = _ - var useHiveContext: Boolean = _ override def echoCommandMessage(msg: String) { intp.reporter printMessage msg @@ -1027,7 +1026,7 @@ class SparkILoop( @DeveloperApi def createSQLContext(): SQLContext = { - useHiveContext = sparkContext.getConf.getBoolean("spark.sql.useHiveContext", true) + val useHiveContext = sparkContext.getConf.getBoolean("spark.sql.useHiveContext", true) val name = { if (useHiveContext) "org.apache.spark.sql.hive.HiveContext" else "org.apache.spark.sql.SQLContext" @@ -1049,8 +1048,8 @@ class SparkILoop( case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError if useHiveContext => sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context without Hive support, " + - "build Spark with -Phive to enable Hive support.") + logInfo("Created sql context without Hive support. " + + "To enable Hive support, build Spark with -Phive profile.") } sqlContext } diff --git a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala index b545488bb3040..9c62862567df4 100644 --- a/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala +++ b/repl/scala-2.11/src/main/scala/org/apache/spark/repl/Main.scala @@ -37,7 +37,6 @@ object Main extends Logging { // the creation of SecurityManager has to be lazy so SPARK_YARN_MODE is set if needed var sparkContext: SparkContext = _ var sqlContext: SQLContext = _ - var useHiveContext: Boolean = conf.getBoolean("spark.sql.useHiveContext", true) var interp = new SparkILoop // this is a public var because tests reset it. private var hasErrors = false @@ -100,6 +99,7 @@ object Main extends Logging { } def createSQLContext(): SQLContext = { + val useHiveContext = conf.getBoolean("spark.sql.useHiveContext", true) val name = { if (useHiveContext) "org.apache.spark.sql.hive.HiveContext" else "org.apache.spark.sql.SQLContext" @@ -120,8 +120,8 @@ object Main extends Logging { case _: java.lang.ClassNotFoundException | _: java.lang.NoClassDefFoundError if useHiveContext => sqlContext = new SQLContext(sparkContext) - logInfo("Created sql context without Hive support, " + - "build Spark with -Phive to enable Hive support.") + logInfo("Created sql context without Hive support. " + + "To enable Hive support, build Spark with -Phive profile.") } sqlContext }