-
Notifications
You must be signed in to change notification settings - Fork 28.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[SPARK-9026] Refactor SimpleFutureAction.onComplete to not launch separate thread for every callback #7385
[SPARK-9026] Refactor SimpleFutureAction.onComplete to not launch separate thread for every callback #7385
Changes from 4 commits
df20ed5
55c41d3
1deed38
d779af8
1e2db7f
1346313
e08623a
b504384
12ddad6
dae8805
c6fdc21
7b22514
1a19268
c9ef8d4
692b3a4
17edbcd
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,9 @@ | |
|
||
package org.apache.spark.scheduler | ||
|
||
import scala.concurrent.{Future, Promise} | ||
import scala.util.Success | ||
|
||
/** | ||
* An object that waits for a DAGScheduler job to complete. As tasks finish, it passes their | ||
* results to the given handler function. | ||
|
@@ -28,12 +31,18 @@ private[spark] class JobWaiter[T]( | |
resultHandler: (Int, T) => Unit) | ||
extends JobListener { | ||
|
||
private val promise = Promise[Unit] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Stepping back. If we are using promise anyway, why do we need a separate variable called "jobFinished"? The promise is sufficient in keeping the state of whether the job has finished or not.
The rest of the code needs to use |
||
|
||
private var finishedTasks = 0 | ||
|
||
// Is the job as a whole finished (succeeded or failed)? | ||
@volatile | ||
private var _jobFinished = totalTasks == 0 | ||
|
||
if (_jobFinished) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @zsxwing, this |
||
promise.complete(Success(Unit)) | ||
} | ||
|
||
def jobFinished: Boolean = _jobFinished | ||
|
||
// If the job is finished, this will be its result. In the case of 0 task jobs (e.g. zero | ||
|
@@ -58,13 +67,15 @@ private[spark] class JobWaiter[T]( | |
if (finishedTasks == totalTasks) { | ||
_jobFinished = true | ||
jobResult = JobSucceeded | ||
promise.trySuccess() | ||
this.notifyAll() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line can be removed. Right? |
||
} | ||
} | ||
|
||
override def jobFailed(exception: Exception): Unit = synchronized { | ||
_jobFinished = true | ||
jobResult = JobFailed(exception) | ||
promise.tryFailure(exception) | ||
this.notifyAll() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This line can be removed too |
||
} | ||
|
||
|
@@ -74,4 +85,10 @@ private[spark] class JobWaiter[T]( | |
} | ||
return jobResult | ||
} | ||
|
||
/** | ||
* Return a Future to monitoring the job success or failure event. You can use this method to | ||
* avoid blocking your thread. | ||
*/ | ||
def toFuture: Future[Unit] = promise.future | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,11 +17,12 @@ | |
|
||
package org.apache.spark | ||
|
||
import scala.concurrent.Await | ||
import scala.concurrent.{ExecutionContext, Await} | ||
import scala.concurrent.duration.Duration | ||
|
||
import org.scalatest.{BeforeAndAfter, Matchers} | ||
|
||
import org.apache.spark.util.ThreadUtils | ||
|
||
class FutureActionSuite | ||
extends SparkFunSuite | ||
|
@@ -49,4 +50,20 @@ class FutureActionSuite | |
job.jobIds.size should be (2) | ||
} | ||
|
||
test("simple async action callbacks should not tie up execution context threads (SPARK-9026)") { | ||
val rdd = sc.parallelize(1 to 10, 2).map(_ => Thread.sleep(1000 * 1000)) | ||
val pool = ThreadUtils.newDaemonCachedThreadPool("SimpleFutureActionTest") | ||
val executionContext = ExecutionContext.fromExecutorService(pool) | ||
val job = rdd.countAsync() | ||
try { | ||
for (_ <- 1 to 10) { | ||
job.onComplete(_ => ())(executionContext) | ||
assert(pool.getLargestPoolSize < 10) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks flaky. Even they are non blocking, there is NO guarantee that one of the 10 scheduled function There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think that my intention when writing this test was to have a test that demonstrated the eagerly-create-a-thread-per-callback problem with the old implementation of SimpleFutureAction. I don't think that this is flaky but I also don't think that this tests adds much value since we're unlikely to ever switch back to the old inefficient implementation. I'll just drop this test, since I don't think it's adding any real value right now. |
||
} | ||
} finally { | ||
job.cancel() | ||
executionContext.shutdownNow() | ||
} | ||
} | ||
|
||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@tdas, I think there's a bug here because we'll re-assign to
_value
if there are multipleonCompletes
. There's also a race in allowing_value
to be assigned here, since there's a lag between when thejobWaiter
future completes and when this callback runs. Fixing this now...