apache · uncleGen · Feb 15, 2017 · Feb 15, 2017 · uncleGen · Feb 15, 2017
diff --git a/core/src/main/scala/org/apache/spark/SparkContext.scala b/core/src/main/scala/org/apache/spark/SparkContext.scala
@@ -2762,7 +2762,7 @@ object SparkContext extends Logging {
 /**
  * A collection of regexes for extracting information from the master string.
  */
-private object SparkMasterRegex {
+private[spark] object SparkMasterRegex {
   // Regular expression used for local[N] and local[*] master formats
   val LOCAL_N_REGEX = """local\[([0-9]+|\*)\]""".r
   // Regular expression for local[N, maxRetries], used in tests with failing tasks

diff --git a/core/src/main/scala/org/apache/spark/internal/config/package.scala b/core/src/main/scala/org/apache/spark/internal/config/package.scala
@@ -87,6 +87,14 @@ package object config {
     .intConf
     .createOptional
 
+  private[spark] val EXECUTOR_CORES = ConfigBuilder("spark.executor.cores")
+    .intConf
+    .createWithDefault(1)
+
+  private[spark] val CORES_MAX = ConfigBuilder("spark.cores.max")
+    .intConf
+    .createOptional
+
   private[spark] val PY_FILES = ConfigBuilder("spark.submit.pyFiles")
     .internal()
     .stringConf

diff --git a/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala b/resource-managers/yarn/src/main/scala/org/apache/spark/deploy/yarn/config.scala
@@ -215,10 +215,6 @@ package object config {
 
   /* Executor configuration. */
 
-  private[spark] val EXECUTOR_CORES = ConfigBuilder("spark.executor.cores")
-    .intConf
-    .createWithDefault(1)
-
   private[spark] val EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.yarn.executor.memoryOverhead")
     .bytesConf(ByteUnit.MiB)
     .createOptional

diff --git a/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala b/streaming/src/main/scala/org/apache/spark/streaming/scheduler/ReceiverTracker.scala
@@ -26,6 +26,7 @@ import scala.util.{Failure, Success}
 
 import org.apache.spark._
 import org.apache.spark.internal.Logging
+import org.apache.spark.internal.config._
 import org.apache.spark.rdd.RDD
 import org.apache.spark.rpc._
 import org.apache.spark.scheduler.{ExecutorCacheTaskLocation, TaskLocation}
@@ -260,7 +261,7 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
   }
 
   def numReceivers(): Int = {
-    receiverInputStreams.size
+    receiverInputStreams.length
   }
 
   /** Register a receiver */
@@ -436,6 +437,74 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
     assert(getExecutors.nonEmpty)
   }
 
+  /**
+   * Check if existing resource is enough to run job.
+   */
+  private def checkResourceValid(): Unit = {
+    val coresPerTask = ssc.conf.get(CPUS_PER_TASK)
+
+    def localCpuCount: Int = Runtime.getRuntime.availableProcessors()
+
+    ssc.conf.get("spark.master") match {
+      case m if m.startsWith("yarn") =>
+        val numCoresPerExecutor = ssc.conf.get(EXECUTOR_CORES)
+        val numExecutors = getTargetExecutorNumber()
+        if (numExecutors * numCoresPerExecutor / coresPerTask < numReceivers) {
+          throw new SparkException("There are no enough resource to run Spark Streaming job: " +
+            s"existing resource can only be used to scheduler some of receivers." +
+            s"$numExecutors executors, $numCoresPerExecutor cores per executor, $coresPerTask " +
+            s"cores per task and $numReceivers receivers.")
+        }
+      case m if m.startsWith("spark") || m.startsWith("mesos") =>
+        val coresMax = ssc.conf.get(CORES_MAX).getOrElse(0)
+        if (coresMax / coresPerTask < numReceivers) {
+          throw new SparkException("There are no enough resource to run Spark Streaming job: " +
+            s"existing resource can only be used to scheduler some of receivers." +
+            s"$coresMax cores totally, $coresPerTask cores per task and $numReceivers receivers.")
+        }
+      case m if m.startsWith("local") =>
+        m match {
+          case "local" =>
+            throw new SparkException("There are no enough resource to run Spark Streaming job.")
+          case SparkMasterRegex.LOCAL_N_REGEX(threads) =>
+            val threadCount = if (threads == "*") localCpuCount else threads.toInt
+            if (threadCount / coresPerTask < numReceivers) {
+              throw new SparkException("There are no enough resource to run Spark Streaming job: " +
+                s"existing resource can only be used to scheduler some of receivers." +
+                s"$threadCount threads, $coresPerTask threads per task and $numReceivers " +
+                s"receivers.")
+            }
+          case SparkMasterRegex.LOCAL_N_FAILURES_REGEX(threads, maxFailures) =>
+            val threadCount = if (threads == "*") localCpuCount else threads.toInt
+            if (threadCount / coresPerTask < numReceivers) {
+              throw new SparkException("There are no enough resource to run Spark Streaming job: " +
+                s"existing resource can only be used to scheduler some of receivers." +
+                s"$threadCount threads, $coresPerTask threads per task and $numReceivers " +
+                s"receivers.")
+            }
+          case SparkMasterRegex.LOCAL_CLUSTER_REGEX(numSlaves, coresPerSlave, memoryPerSlave) =>
+            val coresMax = numSlaves.toInt * coresPerSlave.toInt
+            if (coresMax / coresPerTask < numReceivers) {
+              throw new SparkException("There are no enough resource to run Spark Streaming job: " +
+                s"existing resource can only be used to scheduler some of receivers." +
+                s"$numSlaves slaves, $coresPerSlave cores per slave, $coresPerTask " +
+                s"cores per task and $numReceivers receivers.")
+            }
+        }
+    }
+  }
+
+  private def getTargetExecutorNumber(): Int = {
+    if (Utils.isDynamicAllocationEnabled(ssc.conf)) {
+      ssc.conf.get(DYN_ALLOCATION_MAX_EXECUTORS)
+    } else {
+      val targetNumExecutors =
+        sys.env.get("SPARK_EXECUTOR_INSTANCES").map(_.toInt).getOrElse(2)
+      // System property can override environment variable.
+      ssc.conf.get(EXECUTOR_INSTANCES).getOrElse(targetNumExecutors)
+    }
+  }
+
   /**
    * Get the receivers from the ReceiverInputDStreams, distributes them to the
    * worker nodes as a parallel collection, and runs them.
@@ -447,6 +516,8 @@ class ReceiverTracker(ssc: StreamingContext, skipReceiverLaunch: Boolean = false
       rcvr
     }
 
+    checkResourceValid()
+
     runDummySparkJob()
 
     logInfo("Starting " + receivers.length + " receivers")

diff --git a/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala b/streaming/src/test/scala/org/apache/spark/streaming/StreamingContextSuite.scala
@@ -837,6 +837,26 @@ class StreamingContextSuite extends SparkFunSuite with BeforeAndAfter with Timeo
     assert(latch.await(60, TimeUnit.SECONDS))
   }
 
+  for (master <- Array("local", "local[1]", "local[1, 2]", "local-cluster[1, 1, 1024]")) {
+    test(s"check resource enough for $master") {
+      val conf = new SparkConf().setMaster(master).setAppName(appName)
+      ssc = new StreamingContext(conf, Milliseconds(100))
+      val input1 = ssc.receiverStream(new TestReceiver)
+      val input2 = ssc.receiverStream(new TestReceiver)
+      val input = input1.union(input2)
+
+      try {
+        input.foreachRDD { rdd => rdd.count()}
+        ssc.start()
+        ssc.awaitTerminationOrTimeout(60000)
+        assert(false, "There are no enough resource to run Spark Streaming job.")
+      } catch {
+        case e: SparkException =>
+          // expected.
+      }
+    }
+  }
+
   def addInputStream(s: StreamingContext): DStream[Int] = {
     val input = (1 to 100).map(i => 1 to i)
     val inputStream = new TestInputStream(s, input, 1)
@@ -938,7 +958,7 @@ object SlowTestReceiver {
 /** Streaming application for testing DStream and RDD creation sites */
 package object testPackage extends Assertions {
   def test() {
-    val conf = new SparkConf().setMaster("local").setAppName("CreationSite test")
+    val conf = new SparkConf().setMaster("local[2]").setAppName("CreationSite test")
     val ssc = new StreamingContext(conf, Milliseconds(100))
     try {
       val inputStream = ssc.receiverStream(new TestReceiver)