apache · JoshRosen · Nov 5, 2014 · Nov 5, 2014 · Nov 6, 2014 · Nov 6, 2014
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -57,6 +57,9 @@ import org.apache.spark.util.{CallSite, ClosureCleaner, MetadataCleaner, Metadat
  * Main entry point for Spark functionality. A SparkContext represents the connection to a Spark
  * cluster, and can be used to create RDDs, accumulators and broadcast variables on that cluster.
  *
+ * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
+ * creating a new one.  This limitation will eventually be removed; see SPARK-2243 for more details.
+ *
  * @param config a Spark Config object describing the application configuration. Any settings in
  *   this config overrides the default configs as well as system properties.
  */
@@ -179,6 +182,30 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
   conf.setIfMissing("spark.driver.host", Utils.localHostName())
   conf.setIfMissing("spark.driver.port", "0")
 
+  // This is placed after the configuration validation so that common configuration errors, like
+  // forgetting to pass a master url or app name, don't prevent subsequent SparkContexts from being
+  // constructed.
+  SparkContext.SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+    SparkContext.activeSparkContextCreationSite.foreach { creationSite =>
+      val errMsg = "Only one SparkContext may be active in this JVM (see SPARK-2243)."
+      val errDetails = if (SparkContext.activeSparkContextIsFullyConstructed) {
+        s"The currently active SparkContext was created at:\n${creationSite.longForm}"
+      } else {
+        s"Another SparkContext is either being constructed or threw an exception from its" +
+        " constructor; please restart your JVM in order to create a new SparkContext." +
+        s"The current SparkContext was created at:\n${creationSite.longForm}"
+      }
+      val exception = new SparkException(s"$errMsg $errDetails")
+      if (conf.getBoolean("spark.driver.disableMultipleSparkContextsErrorChecking", false)) {
+        logWarning("Multiple SparkContext error detection is disabled!", exception)
+      } else {
+        throw exception
+      }
+    }
+    SparkContext.activeSparkContextCreationSite = Some(Utils.getCallSite())
+    SparkContext.activeSparkContextIsFullyConstructed = false
+  }
+
   val jars: Seq[String] =
     conf.getOption("spark.jars").map(_.split(",")).map(_.filter(_.size != 0)).toSeq.flatten
 
@@ -1071,27 +1098,31 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
 
   /** Shut down the SparkContext. */
   def stop() {
-    postApplicationEnd()
-    ui.foreach(_.stop())
-    // Do this only if not stopped already - best case effort.
-    // prevent NPE if stopped more than once.
-    val dagSchedulerCopy = dagScheduler
-    dagScheduler = null
-    if (dagSchedulerCopy != null) {
-      env.metricsSystem.report()
-      metadataCleaner.cancel()
-      env.actorSystem.stop(heartbeatReceiver)
-      cleaner.foreach(_.stop())
-      dagSchedulerCopy.stop()
-      taskScheduler = null
-      // TODO: Cache.stop()?
-      env.stop()
-      SparkEnv.set(null)
-      listenerBus.stop()
-      eventLogger.foreach(_.stop())
-      logInfo("Successfully stopped SparkContext")
-    } else {
-      logInfo("SparkContext already stopped")
+    SparkContext.SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+      SparkContext.activeSparkContextCreationSite = None
+      SparkContext.activeSparkContextIsFullyConstructed = false
+      postApplicationEnd()
+      ui.foreach(_.stop())
+      // Do this only if not stopped already - best case effort.
+      // prevent NPE if stopped more than once.
+      val dagSchedulerCopy = dagScheduler
+      dagScheduler = null
+      if (dagSchedulerCopy != null) {
+        env.metricsSystem.report()
+        metadataCleaner.cancel()
+        env.actorSystem.stop(heartbeatReceiver)
+        cleaner.foreach(_.stop())
+        dagSchedulerCopy.stop()
+        taskScheduler = null
+        // TODO: Cache.stop()?
+        env.stop()
+        SparkEnv.set(null)
+        listenerBus.stop()
+        eventLogger.foreach(_.stop())
+        logInfo("Successfully stopped SparkContext")
+      } else {
+        logInfo("SparkContext already stopped")
+      }
     }
   }
 
@@ -1157,7 +1188,7 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
     if (dagScheduler == null) {
       throw new SparkException("SparkContext has been shutdown")
     }
-    val callSite = getCallSite
+    val callSite = Utils.getCallSite()
     val cleanedFunc = clean(func)
     logInfo("Starting job: " + callSite.shortForm)
     dagScheduler.runJob(rdd, cleanedFunc, partitions, callSite, allowLocal,
@@ -1380,6 +1411,10 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
   private[spark] def cleanup(cleanupTime: Long) {
     persistentRdds.clearOldValues(cleanupTime)
   }
+
+  SparkContext.SPARK_CONTEXT_CONSTRUCTOR_LOCK.synchronized {
+    SparkContext.activeSparkContextIsFullyConstructed = true
+  }
 }
 
 /**
@@ -1388,6 +1423,30 @@ class SparkContext(config: SparkConf) extends SparkStatusAPI with Logging {
  */
 object SparkContext extends Logging {
 
+  /**
+   * Lock that prevents multiple threads from being in the SparkContext constructor at the same
+   * time.
+   */
+  private[spark] val SPARK_CONTEXT_CONSTRUCTOR_LOCK = new Object()
+
+  /**
+   * Records the creation site of the last SparkContext to successfully enter the constructor.
+   * This may be an active SparkContext, or a SparkContext that is currently under construction.
+   *
+   * Access to this field should be guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK
+   */
+  private[spark] var activeSparkContextCreationSite: Option[CallSite] = None
+
+  /**
+   * Tracks whether `activeSparkContextCreationSite` refers to a fully-constructed SparkContext
+   * or a partially-constructed one that is either still executing its constructor or threw
+   * an exception from its constructor.  This is used to enable better error-reporting when
+   * SparkContext construction fails due to existing contexts.
+   *
+   * Access to this field should be guarded by SPARK_CONTEXT_CONSTRUCTOR_LOCK
+   */
+  private[spark] var activeSparkContextIsFullyConstructed: Boolean = false
+
   private[spark] val SPARK_JOB_DESCRIPTION = "spark.job.description"
 
   private[spark] val SPARK_JOB_GROUP_ID = "spark.jobGroup.id"

diff --git a/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala b/core/src/main/scala/org/apache/spark/api/java/JavaSparkContext.scala
@@ -46,6 +46,9 @@ import org.apache.spark.rdd.{EmptyRDD, HadoopRDD, NewHadoopRDD, RDD}
 /**
  * A Java-friendly version of [[org.apache.spark.SparkContext]] that returns
  * [[org.apache.spark.api.java.JavaRDD]]s and works with Java collections instead of Scala ones.
+ *
+ * Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before
+ * creating a new one.  This limitation will eventually be removed; see SPARK-2243 for more details.
  */
 class JavaSparkContext(val sc: SparkContext)
   extends JavaSparkContextVarargsWorkaround with Closeable {

diff --git a/core/src/test/scala/org/apache/spark/SparkContextSuite.scala b/core/src/test/scala/org/apache/spark/SparkContextSuite.scala
@@ -21,9 +21,53 @@ import org.scalatest.FunSuite
 
 import org.apache.hadoop.io.BytesWritable
 
-class SparkContextSuite extends FunSuite {
-  //Regression test for SPARK-3121
+class SparkContextSuite extends FunSuite with LocalSparkContext {
+
+  test("Only one SparkContext may be active at a time") {
+    // Regression test for SPARK-4180
+    val conf = new SparkConf().setAppName("test").setMaster("local")
+    sc = new SparkContext(conf)
+    // A SparkContext is already running, so we shouldn't be able to create a second one
+    intercept[SparkException] { new SparkContext(conf) }
+    // After stopping the running context, we should be able to create a new one
+    resetSparkContext()
+    sc = new SparkContext(conf)
+  }
+
+  test("Can still construct a new SparkContext after failing due to missing app name or master") {
+    val missingMaster = new SparkConf()
+    val missingAppName = missingMaster.clone.setMaster("local")
+    val validConf = missingAppName.clone.setAppName("test")
+    // We shouldn't be able to construct SparkContexts because these are invalid configurations
+    intercept[SparkException] { new SparkContext(missingMaster) }
+    intercept[SparkException] { new SparkContext(missingAppName) }
+    // Even though those earlier calls failed, we should still be able to create a new SparkContext
+    sc = new SparkContext(validConf)
+  }
+
+  test("Check for multiple SparkContexts can be disabled via undocumented debug option") {
+    val propertyName = "spark.driver.disableMultipleSparkContextsErrorChecking"
+    val originalPropertyValue = System.getProperty(propertyName)
+    var secondSparkContext: SparkContext = null
+    try {
+      System.setProperty(propertyName, "true")
+      val conf = new SparkConf().setAppName("test").setMaster("local")
+      sc = new SparkContext(conf)
+      secondSparkContext = new SparkContext(conf)
+    } finally {
+      if (secondSparkContext != null) {
+        secondSparkContext.stop()
+      }
+      if (originalPropertyValue != null) {
+        System.setProperty(propertyName, originalPropertyValue)
+      } else {
+        System.clearProperty(propertyName)
+      }
+    }
+  }
+
   test("BytesWritable implicit conversion is correct") {
+    // Regression test for SPARK-3121
     val bytesWritable = new BytesWritable()
     val inputArray = (1 to 10).map(_.toByte).toArray
     bytesWritable.set(inputArray, 0, 10)

diff --git a/docs/programming-guide.md b/docs/programming-guide.md
@@ -117,6 +117,8 @@ The first thing a Spark program must do is to create a [SparkContext](api/scala/
 how to access a cluster. To create a `SparkContext` you first need to build a [SparkConf](api/scala/index.html#org.apache.spark.SparkConf) object
 that contains information about your application.
 
+Only one SparkContext may be active per JVM.  You must `stop()` the active SparkContext before creating a new one.
+
 {% highlight scala %}
 val conf = new SparkConf().setAppName(appName).setMaster(master)
 new SparkContext(conf)

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala b/streaming/src/main/scala/org/apache/spark/streaming/StreamingContext.scala
@@ -487,20 +487,20 @@ class StreamingContext private[streaming] (
    *                       received data to be completed
    */
   def stop(stopSparkContext: Boolean, stopGracefully: Boolean): Unit = synchronized {
-    // Warn (but not fail) if context is stopped twice,
-    // or context is stopped before starting
-    if (state == Initialized) {
-      logWarning("StreamingContext has not been started yet")
-      return
-    }
     if (state == Stopped) {
       logWarning("StreamingContext has already been stopped")
-      return
-    } // no need to throw an exception as its okay to stop twice
-    scheduler.stop(stopGracefully)
-    logInfo("StreamingContext stopped successfully")
-    waiter.notifyStop()
-    if (stopSparkContext) sc.stop()
+    } else {
+      // Even if the streaming context has not been started, we still need to stop the SparkContext:
+      if (stopSparkContext) sc.stop()
+      if (state == Initialized) {
+        logWarning("StreamingContext has not been started yet")
+      } else {
+        scheduler.stop(stopGracefully)
+        logInfo("StreamingContext stopped successfully")
+        waiter.notifyStop()
+      }
+    }
+    // The state should always be Stopped after calling `stop()`, even if we haven't started yet:
     state = Stopped
   }
 }