apache · reggert · Oct 24, 2015 · Oct 24, 2015 · Oct 24, 2015 · Nov 7, 2015
diff --git a/core/src/main/scala/org/apache/spark/FutureAction.scala b/core/src/main/scala/org/apache/spark/FutureAction.scala
@@ -22,11 +22,11 @@ import java.util.concurrent.TimeUnit
 
 import org.apache.spark.api.java.JavaFutureAction
 import org.apache.spark.rdd.RDD
-import org.apache.spark.scheduler.{JobFailed, JobSucceeded, JobWaiter}
+import org.apache.spark.scheduler.JobWaiter
 
 import scala.concurrent._
 import scala.concurrent.duration.Duration
-import scala.util.{Failure, Try}
+import scala.util.Try
 
 /**
  * A future for the result of an action to support cancellation. This is an extension of the
@@ -116,57 +116,26 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
   }
 
   override def ready(atMost: Duration)(implicit permit: CanAwait): SimpleFutureAction.this.type = {
-    if (!atMost.isFinite()) {
-      awaitResult()
-    } else jobWaiter.synchronized {
-      val finishTime = System.currentTimeMillis() + atMost.toMillis
-      while (!isCompleted) {
-        val time = System.currentTimeMillis()
-        if (time >= finishTime) {
-          throw new TimeoutException
-        } else {
-          jobWaiter.wait(finishTime - time)
-        }
-      }
-    }
+    jobWaiter.completionFuture.ready(atMost)
     this
   }
 
   @throws(classOf[Exception])
   override def result(atMost: Duration)(implicit permit: CanAwait): T = {
-    ready(atMost)(permit)
-    awaitResult() match {
-      case scala.util.Success(res) => res
-      case scala.util.Failure(e) => throw e
-    }
+    jobWaiter.completionFuture.ready(atMost)
+    value.get.get
   }
 
   override def onComplete[U](func: (Try[T]) => U)(implicit executor: ExecutionContext) {
-    executor.execute(new Runnable {
-      override def run() {
-        func(awaitResult())
-      }
-    })
+    jobWaiter.completionFuture onComplete {_ => func(value.get)}
   }
 
   override def isCompleted: Boolean = jobWaiter.jobFinished
 
   override def isCancelled: Boolean = _cancelled
 
-  override def value: Option[Try[T]] = {
-    if (jobWaiter.jobFinished) {
-      Some(awaitResult())
-    } else {
-      None
-    }
-  }
-
-  private def awaitResult(): Try[T] = {
-    jobWaiter.awaitResult() match {
-      case JobSucceeded => scala.util.Success(resultFunc)
-      case JobFailed(e: Exception) => scala.util.Failure(e)
-    }
-  }
+  override def value: Option[Try[T]] =
+    jobWaiter.completionFuture.value map {res => res map {_ => resultFunc}}
 
   def jobIds: Seq[Int] = Seq(jobWaiter.jobId)
 }
@@ -179,44 +148,25 @@ class SimpleFutureAction[T] private[spark](jobWaiter: JobWaiter[_], resultFunc:
  */
 class ComplexFutureAction[T] extends FutureAction[T] {
 
-  // Pointer to the thread that is executing the action. It is set when the action is run.
-  @volatile private var thread: Thread = _
-
-  // A flag indicating whether the future has been cancelled. This is used in case the future
-  // is cancelled before the action was even run (and thus we have no thread to interrupt).
-  @volatile private var _cancelled: Boolean = false
+  @volatile private var _cancelled = false
 
-  @volatile private var jobs: Seq[Int] = Nil
+  @volatile private var subActions: List[FutureAction[_]] = Nil
 
   // A promise used to signal the future.
-  private val p = promise[T]()
+  private val p = Promise[T]()
 
-  override def cancel(): Unit = this.synchronized {
+  override def cancel(): Unit = synchronized {
     _cancelled = true
-    if (thread != null) {
-      thread.interrupt()
-    }
+    p.tryFailure(new SparkException("Action has been cancelled"))
+    subActions foreach {_.cancel()}
   }
 
   /**
    * Executes some action enclosed in the closure. To properly enable cancellation, the closure
    * should use runJob implementation in this promise. See takeAsync for example.
    */
-  def run(func: => T)(implicit executor: ExecutionContext): this.type = {
-    scala.concurrent.future {
-      thread = Thread.currentThread
-      try {
-        p.success(func)
-      } catch {
-        case e: Exception => p.failure(e)
-      } finally {
-        // This lock guarantees when calling `thread.interrupt()` in `cancel`,
-        // thread won't be set to null.
-        ComplexFutureAction.this.synchronized {
-          thread = null
-        }
-      }
-    }
+  def run(func: => Future[T])(implicit executor: ExecutionContext): this.type = {
+    p tryCompleteWith func
     this
   }
 
@@ -229,28 +179,15 @@ class ComplexFutureAction[T] extends FutureAction[T] {
       processPartition: Iterator[T] => U,
       partitions: Seq[Int],
       resultHandler: (Int, U) => Unit,
-      resultFunc: => R) {
+      resultFunc: => R)(implicit executor: ExecutionContext) : FutureAction[R] = synchronized {
     // If the action hasn't been cancelled yet, submit the job. The check and the submitJob
     // command need to be in an atomic block.
-    val job = this.synchronized {
-      if (!isCancelled) {
-        rdd.context.submitJob(rdd, processPartition, partitions, resultHandler, resultFunc)
-      } else {
-        throw new SparkException("Action has been cancelled")
-      }
-    }
-
-    this.jobs = jobs ++ job.jobIds
-
-    // Wait for the job to complete. If the action is cancelled (with an interrupt),
-    // cancel the job and stop the execution. This is not in a synchronized block because
-    // Await.ready eventually waits on the monitor in FutureJob.jobWaiter.
-    try {
-      Await.ready(job, Duration.Inf)
-    } catch {
-      case e: InterruptedException =>
-        job.cancel()
-        throw new SparkException("Action has been cancelled")
+    if (!isCancelled) {
+      val job = rdd.context.submitJob(rdd, processPartition, partitions, resultHandler, resultFunc)
+      subActions = job::subActions
+      job
+    } else {
+      throw new SparkException("Action has been cancelled")
     }
   }
 
@@ -276,7 +213,7 @@ class ComplexFutureAction[T] extends FutureAction[T] {
 
   override def value: Option[Try[T]] = p.future.value
 
-  def jobIds: Seq[Int] = jobs
+  def jobIds: Seq[Int] = subActions flatMap {_.jobIds}
 
 }
 
@@ -303,7 +240,7 @@ class JavaFutureActionWrapper[S, T](futureAction: FutureAction[S], converter: S
     Await.ready(futureAction, timeout)
     futureAction.value.get match {
       case scala.util.Success(value) => converter(value)
-      case Failure(exception) =>
+      case scala.util.Failure(exception) =>
         if (isCancelled) {
           throw new CancellationException("Job cancelled").initCause(exception)
         } else {

diff --git a/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala b/core/src/main/scala/org/apache/spark/rdd/AsyncRDDActions.scala
@@ -22,10 +22,10 @@ import java.util.concurrent.atomic.AtomicLong
 import org.apache.spark.util.ThreadUtils
 
 import scala.collection.mutable.ArrayBuffer
-import scala.concurrent.ExecutionContext
+import scala.concurrent.{Future, ExecutionContext}
 import scala.reflect.ClassTag
 
-import org.apache.spark.{ComplexFutureAction, FutureAction, Logging}
+import org.apache.spark.{SimpleFutureAction, ComplexFutureAction, FutureAction, Logging}
 
 /**
  * A set of asynchronous RDD actions available through an implicit conversion.
@@ -66,14 +66,22 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
    */
   def takeAsync(num: Int): FutureAction[Seq[T]] = self.withScope {
     val f = new ComplexFutureAction[Seq[T]]
-
-    f.run {
-      // This is a blocking action so we should use "AsyncRDDActions.futureExecutionContext" which
-      // is a cached thread pool.
-      val results = new ArrayBuffer[T](num)
-      val totalParts = self.partitions.length
-      var partsScanned = 0
-      while (results.size < num && partsScanned < totalParts) {
+    // Cached thread pool to handle aggregation of subtasks.
+    implicit val executionContext = AsyncRDDActions.futureExecutionContext
+    val results = new ArrayBuffer[T](num)
+    val totalParts = self.partitions.length
+
+    /*
+      Recursively triggers jobs to scan partitions until either the requested
+      number of elements are retrieved, or the partitions to scan are exhausted.
+      This implementation is non-blocking, asynchronously handling the
+      results of each job and triggering the next job using callbacks on futures.
+     */
+    def continue(partsScanned : Int) : Future[Seq[T]] =
+      if (results.size >= num || partsScanned >= totalParts) {
+        Future.successful(results.toSeq)
+      }
+      else {
         // The number of partitions to try in this iteration. It is ok for this number to be
         // greater than totalParts because we actually cap it at totalParts in runJob.
         var numPartsToTry = 1
@@ -95,19 +103,18 @@ class AsyncRDDActions[T: ClassTag](self: RDD[T]) extends Serializable with Loggi
         val p = partsScanned until math.min(partsScanned + numPartsToTry, totalParts)
 
         val buf = new Array[Array[T]](p.size)
-        f.runJob(self,
+        val job = f.runJob(self,
           (it: Iterator[T]) => it.take(left).toArray,
           p,
           (index: Int, data: Array[T]) => buf(index) = data,
           Unit)
-
-        buf.foreach(results ++= _.take(num - results.size))
-        partsScanned += numPartsToTry
+        job flatMap {case _ =>
+          buf.foreach(results ++= _.take(num - results.size))
+          continue(partsScanned + numPartsToTry)
+        }
       }
-      results.toSeq
-    }(AsyncRDDActions.futureExecutionContext)
 
-    f
+    f.run {continue(0)}
   }
 
   /**

diff --git a/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala b/core/src/main/scala/org/apache/spark/scheduler/DAGScheduler.scala
@@ -22,7 +22,10 @@ import java.util.Properties
 import java.util.concurrent.TimeUnit
 import java.util.concurrent.atomic.AtomicInteger
 
+import org.apache.spark.Success
+
 import scala.collection.Map
+import scala.concurrent.Await
 import scala.collection.mutable.{HashMap, HashSet, Stack}
 import scala.concurrent.duration._
 import scala.language.existentials
@@ -609,11 +612,12 @@ class DAGScheduler(
       properties: Properties): Unit = {
     val start = System.nanoTime
     val waiter = submitJob(rdd, func, partitions, callSite, resultHandler, properties)
-    waiter.awaitResult() match {
-      case JobSucceeded =>
+    Await.ready(waiter.completionFuture, atMost = Duration.Inf)
+    waiter.completionFuture.value.get match {
+      case scala.util.Success(_) =>
         logInfo("Job %d finished: %s, took %f s".format
           (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
-      case JobFailed(exception: Exception) =>
+      case scala.util.Failure(exception) =>
         logInfo("Job %d failed: %s, took %f s".format
           (waiter.jobId, callSite.shortForm, (System.nanoTime - start) / 1e9))
         // SPARK-8644: Include user stack trace in exceptions coming from DAGScheduler.

diff --git a/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala b/core/src/main/scala/org/apache/spark/scheduler/JobWaiter.scala
@@ -17,6 +17,10 @@
 
 package org.apache.spark.scheduler
 
+import java.util.concurrent.atomic.AtomicInteger
+
+import scala.concurrent.{Future, Promise}
+
 /**
  * An object that waits for a DAGScheduler job to complete. As tasks finish, it passes their
  * results to the given handler function.
@@ -28,17 +32,15 @@ private[spark] class JobWaiter[T](
     resultHandler: (Int, T) => Unit)
   extends JobListener {
 
-  private var finishedTasks = 0
-
-  // Is the job as a whole finished (succeeded or failed)?
-  @volatile
-  private var _jobFinished = totalTasks == 0
-
-  def jobFinished: Boolean = _jobFinished
-
+  private val finishedTasks = new AtomicInteger(0)
   // If the job is finished, this will be its result. In the case of 0 task jobs (e.g. zero
   // partition RDDs), we set the jobResult directly to JobSucceeded.
-  private var jobResult: JobResult = if (jobFinished) JobSucceeded else null
+  private val jobPromise : Promise[Unit] =
+    if (totalTasks == 0) Promise.successful(()) else Promise()
+
+  def jobFinished: Boolean = jobPromise.isCompleted
+
+  def completionFuture : Future[Unit] = jobPromise.future
 
   /**
    * Sends a signal to the DAGScheduler to cancel the job. The cancellation itself is handled
@@ -49,29 +51,14 @@ private[spark] class JobWaiter[T](
     dagScheduler.cancelJob(jobId)
   }
 
-  override def taskSucceeded(index: Int, result: Any): Unit = synchronized {
-    if (_jobFinished) {
-      throw new UnsupportedOperationException("taskSucceeded() called on a finished JobWaiter")
-    }
+  override def taskSucceeded(index: Int, result: Any): Unit = {
     resultHandler(index, result.asInstanceOf[T])
-    finishedTasks += 1
-    if (finishedTasks == totalTasks) {
-      _jobFinished = true
-      jobResult = JobSucceeded
-      this.notifyAll()
+    if (finishedTasks.incrementAndGet() == totalTasks) {
+      jobPromise.success(())
     }
   }
 
-  override def jobFailed(exception: Exception): Unit = synchronized {
-    _jobFinished = true
-    jobResult = JobFailed(exception)
-    this.notifyAll()
-  }
+  override def jobFailed(exception: Exception): Unit =
+    jobPromise.failure(exception)
 
-  def awaitResult(): JobResult = synchronized {
-    while (!_jobFinished) {
-      this.wait()
-    }
-    return jobResult
-  }
 }
diff --git a/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala b/core/src/test/scala/org/apache/spark/rdd/AsyncRDDActionsSuite.scala
@@ -19,7 +19,7 @@ package org.apache.spark.rdd
 
 import java.util.concurrent.Semaphore
 
-import scala.concurrent.{Await, TimeoutException}
+import scala.concurrent._
 import scala.concurrent.duration.Duration
 import scala.concurrent.ExecutionContext.Implicits.global
 
@@ -197,4 +197,30 @@ class AsyncRDDActionsSuite extends SparkFunSuite with BeforeAndAfterAll with Tim
       Await.result(f, Duration(20, "milliseconds"))
     }
   }
+
+  test("SimpleFutureAction callback must not consume a thread while waiting") {
+    val executorInvoked = Promise[Unit]
+    val fakeExecutionContext = new ExecutionContext {
+      override def execute(runnable: Runnable): Unit = {
+        executorInvoked.success(())
+      }
+      override def reportFailure(t: Throwable): Unit = ???
+    }
+    val f = sc.parallelize(1 to 100, 4).mapPartitions(itr => {Thread.sleep(1000L); itr}).countAsync()
+    f.onComplete(_ => ())(fakeExecutionContext)
+    assert(!executorInvoked.isCompleted)
+  }
+
+  test("ComplexFutureAction callback must not consume a thread while waiting") {
+    val executorInvoked = Promise[Unit]
+    val fakeExecutionContext = new ExecutionContext {
+      override def execute(runnable: Runnable): Unit = {
+        executorInvoked.success(())
+      }
+      override def reportFailure(t: Throwable): Unit = ???
+    }
+    val f = sc.parallelize(1 to 100, 4).mapPartitions(itr => {Thread.sleep(1000L); itr}).takeAsync(100)
+    f.onComplete(_ => ())(fakeExecutionContext)
+    assert(!executorInvoked.isCompleted)
+  }
 }