apache · JoshRosen · Jul 14, 2015 · Jul 14, 2015 · Jul 14, 2015 · Jul 14, 2015
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -22,7 +22,7 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.api.java.JavaFutureAction
 import org.apache.spark.rdd.RDD
-import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
+import org.apache.spark.scheduler.JobWaiter
 
 import scala.concurrent._
 import scala.concurrent.duration.Duration
@@ -108,65 +108,45 @@ trait FutureAction[T] extends Future[T] {
 class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc: => T)
   extends FutureAction[T] {
 
+  // Note: `resultFunc` is a closure which may contain references to state that's updated by the
+  // JobWaiter's result handler function. It should only be evaluated once the job has succeeded.
+
   @volatile private var _cancelled: Boolean = false
+  // Null until the job has completed, then holds a Try representing success or failure.
+  @volatile private var _value: Try[T] = null
 
   override def cancel() {
     _cancelled = true
     jobWaiter.cancel()
   }
 
   override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = {
-    if (!atMost.isFinite()) {
-      awaitResult()
-    } else jobWaiter.synchronized {
-      val finishTime = System.currentTimeMillis() + atMost.toMillis
-      while (!isCompleted) {
-        val time = System.currentTimeMillis()
-        if (time >= finishTime) {
-          throw new TimeoutException
-        } else {
-          jobWaiter.wait(finishTime - time)
-        }
-      }
-    }
+    // This call to the JobWaiter's future will throw an exception if the job failed.
+    jobWaiter.toFuture.ready(atMost)(permit)
     this
   }
 
   @throws(classOf[Exception])
   override def result(atMost: Duration)(implicit permit: CanAwait): T = {
-    ready(atMost)(permit)
-    awaitResult() match {
-      case scala.util.Success(res) => res
-      case scala.util.Failure(e) => throw e
-    }
+    // This call to the JobWaiter's future will throw an exception if the job failed.
+    jobWaiter.toFuture.result(atMost)(permit)
+    // At this point, we know that the job succeeded so it's safe to evaluate this function:
+    resultFunc
   }
 
-  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext) {
-    executor.execute(new Runnable {
-      override def run() {
-        func(awaitResult())
-      }
-    })
+  override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext): Unit = {
+    jobWaiter.toFuture.onComplete { (jobWaiterResult: Try[Unit]) =>
+      // If the job succeeded, then evaluate the result function; otherwise, preserve the exception.
+      _value = jobWaiterResult.map(_ => resultFunc)
+      func(_value)
+    }
   }
 
   override def isCompleted: Boolean = jobWaiter.jobFinished
 
   override def isCancelled: Boolean = _cancelled
 
-  override def value: Option[Try[T]] = {
-    if (jobWaiter.jobFinished) {
-      Some(awaitResult())
-    } else {
-      None
-    }
-  }
-
-  private def awaitResult(): Try[T] = {
-    jobWaiter.awaitResult() match {
-      case JobSucceeded => scala.util.Success(resultFunc)
-      case JobFailed(e: Exception) => scala.util.Failure(e)
-    }
-  }
+  override def value: Option[Try[T]] = Option(_value)
 
   def jobIds: Seq[Int] = Seq(jobWaiter.jobId)
 }

diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -17,6 +17,9 @@
 
 package org.apache.spark.scheduler
 
+import scala.concurrent.{Future, Promise}
+import scala.util.Success
+
 /**
  * An object that waits for a DAGScheduler job to complete. As tasks finish, it passes their
  * results to the given handler function.
@@ -28,12 +31,18 @@ private[spark] class JobWaiter[T](
     resultHandler: (Int, T) => Unit)
   extends JobListener {
 
+  private val promise = Promise[Unit]
+
   private var finishedTasks = 0
 
   // Is the job as a whole finished (succeeded or failed)?
   @volatile
   private var _jobFinished = totalTasks == 0
 
+  if (_jobFinished) {
+    promise.complete(Success(Unit))
+  }
+
   def jobFinished: Boolean = _jobFinished
 
   // If the job is finished, this will be its result. In the case of 0 task jobs (e.g. zero
@@ -58,13 +67,15 @@ private[spark] class JobWaiter[T](
     if (finishedTasks == totalTasks) {
       _jobFinished = true
       jobResult = JobSucceeded
+      promise.trySuccess()
       this.notifyAll()
     }
   }
 
   override def jobFailed(exception: Exception): Unit = synchronized {
     _jobFinished = true
     jobResult = JobFailed(exception)
+    promise.tryFailure(exception)
     this.notifyAll()
   }
 
@@ -74,4 +85,10 @@ private[spark] class JobWaiter[T](
     }
     return jobResult
   }
+
+  /**
+   * Return a Future to monitoring the job success or failure event. You can use this method to
+   * avoid blocking your thread.
+   */
+  def toFuture: Future[Unit] = promise.future
 }
diff --git a/core/src/test/scala/org/apache/spark/FutureActionSuite.scala b/core/src/test/scala/org/apache/spark/FutureActionSuite.scala
@@ -17,11 +17,12 @@
 
 package org.apache.spark
 
-import scala.concurrent.Await
+import scala.concurrent.{ExecutionContext, Await}
 import scala.concurrent.duration.Duration
 
 import org.scalatest.{BeforeAndAfter, Matchers}
 
+import org.apache.spark.util.ThreadUtils
 
 class FutureActionSuite
   extends SparkFunSuite
@@ -49,4 +50,20 @@ class FutureActionSuite
     job.jobIds.size should be (2)
   }
 
+  test("simple async action callbacks should not tie up execution context threads (SPARK-9026)") {
+    val rdd = sc.parallelize(1 to 10, 2).map(_ => Thread.sleep(1000 * 1000))
+    val pool = ThreadUtils.newDaemonCachedThreadPool("SimpleFutureActionTest")
+    val executionContext = ExecutionContext.fromExecutorService(pool)
+    val job = rdd.countAsync()
+    try {
+      for (_ <- 1 to 10) {
+        job.onComplete(_ => ())(executionContext)
+        assert(pool.getLargestPoolSize < 10)
+      }
+    } finally {
+      job.cancel()
+      executionContext.shutdownNow()
+    }
+  }
+
 }