Skip to content

Commit

Permalink
Merge branch 'master' into SPARK-10117
Browse files Browse the repository at this point in the history
  • Loading branch information
Lewuathe committed Sep 3, 2015
2 parents a97ee97 + 67580f1 commit 62010af
Show file tree
Hide file tree
Showing 35 changed files with 778 additions and 121 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,10 @@ public UnsafeShuffleExternalSorter(
this.maxRecordSizeBytes = pageSizeBytes - 4;
this.writeMetrics = writeMetrics;
initializeForWriting();

// preserve first page to ensure that we have at least one page to work with. Otherwise,
// other operators in the same task may starve this sorter (SPARK-9709).
acquireNewPageIfNecessary(pageSizeBytes);
}

/**
Expand Down
6 changes: 5 additions & 1 deletion core/src/main/scala/org/apache/spark/SparkContext.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1516,8 +1516,12 @@ class SparkContext(config: SparkConf) extends Logging with ExecutorAllocationCli
*/
@DeveloperApi
def getRDDStorageInfo: Array[RDDInfo] = {
getRDDStorageInfo(_ => true)
}

private[spark] def getRDDStorageInfo(filter: RDD[_] => Boolean): Array[RDDInfo] = {
assertNotStopped()
val rddInfos = persistentRdds.values.map(RDDInfo.fromRdd).toArray
val rddInfos = persistentRdds.values.filter(filter).map(RDDInfo.fromRdd).toArray
StorageUtils.updateRddInfo(rddInfos, getExecutorStorageStatus)
rddInfos.filter(_.isCached)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
package org.apache.spark.deploy.history

import java.io.{BufferedInputStream, FileNotFoundException, InputStream, IOException, OutputStream}
import java.util.UUID
import java.util.concurrent.{ExecutorService, Executors, TimeUnit}
import java.util.zip.{ZipEntry, ZipOutputStream}

Expand Down Expand Up @@ -73,7 +74,7 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
// The modification time of the newest log detected during the last scan. This is used
// to ignore logs that are older during subsequent scans, to avoid processing data that
// is already known.
private var lastModifiedTime = -1L
private var lastScanTime = -1L

// Mapping of application IDs to their metadata, in descending end time order. Apps are inserted
// into the map in order, so the LinkedHashMap maintains the correct ordering.
Expand Down Expand Up @@ -179,15 +180,14 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
*/
private[history] def checkForLogs(): Unit = {
try {
val newLastScanTime = getNewLastScanTime()
val statusList = Option(fs.listStatus(new Path(logDir))).map(_.toSeq)
.getOrElse(Seq[FileStatus]())
var newLastModifiedTime = lastModifiedTime
val logInfos: Seq[FileStatus] = statusList
.filter { entry =>
try {
getModificationTime(entry).map { time =>
newLastModifiedTime = math.max(newLastModifiedTime, time)
time >= lastModifiedTime
time >= lastScanTime
}.getOrElse(false)
} catch {
case e: AccessControlException =>
Expand Down Expand Up @@ -224,12 +224,29 @@ private[history] class FsHistoryProvider(conf: SparkConf, clock: Clock)
}
}

lastModifiedTime = newLastModifiedTime
lastScanTime = newLastScanTime
} catch {
case e: Exception => logError("Exception in checking for event log updates", e)
}
}

private def getNewLastScanTime(): Long = {
val fileName = "." + UUID.randomUUID().toString
val path = new Path(logDir, fileName)
val fos = fs.create(path)

try {
fos.close()
fs.getFileStatus(path).getModificationTime
} catch {
case e: Exception =>
logError("Exception encountered when attempting to update last scan time", e)
lastScanTime
} finally {
fs.delete(path)
}
}

override def writeEventLogs(
appId: String,
attemptId: Option[String],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import org.apache.spark.storage.{BlockId, StorageLevel}
* is equivalent to one Spark-level shuffle block.
*/
class NettyBlockRpcServer(
appId: String,
serializer: Serializer,
blockManager: BlockDataManager)
extends RpcHandler with Logging {
Expand All @@ -55,7 +56,7 @@ class NettyBlockRpcServer(
case openBlocks: OpenBlocks =>
val blocks: Seq[ManagedBuffer] =
openBlocks.blockIds.map(BlockId.apply).map(blockManager.getBlockData)
val streamId = streamManager.registerStream(blocks.iterator.asJava)
val streamId = streamManager.registerStream(appId, blocks.iterator.asJava)
logTrace(s"Registered streamId $streamId with ${blocks.size} buffers")
responseContext.onSuccess(new StreamHandle(streamId, blocks.size).toByteArray)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class NettyBlockTransferService(conf: SparkConf, securityManager: SecurityManage
private[this] var appId: String = _

override def init(blockDataManager: BlockDataManager): Unit = {
val rpcHandler = new NettyBlockRpcServer(serializer, blockDataManager)
val rpcHandler = new NettyBlockRpcServer(conf.getAppId, serializer, blockDataManager)
var serverBootstrap: Option[TransportServerBootstrap] = None
var clientBootstrap: Option[TransportClientBootstrap] = None
if (authEnabled) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ private[spark] class MapPartitionsWithPreparationRDD[U: ClassTag, T: ClassTag, M

// In certain join operations, prepare can be called on the same partition multiple times.
// In this case, we need to ensure that each call to compute gets a separate prepare argument.
private[this] var preparedArguments: ArrayBuffer[M] = new ArrayBuffer[M]
private[this] val preparedArguments: ArrayBuffer[M] = new ArrayBuffer[M]

/**
* Prepare a partition for a single call to compute.
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1666,7 +1666,7 @@ abstract class RDD[T: ClassTag](
import Utils.bytesToString

val persistence = if (storageLevel != StorageLevel.NONE) storageLevel.description else ""
val storageInfo = rdd.context.getRDDStorageInfo.filter(_.id == rdd.id).map(info =>
val storageInfo = rdd.context.getRDDStorageInfo(_.id == rdd.id).map(info =>
" CachedPartitions: %d; MemorySize: %s; ExternalBlockStoreSize: %s; DiskSize: %s".format(
info.numCachedPartitions, bytesToString(info.memSize),
bytesToString(info.externalBlockStoreSize), bytesToString(info.diskSize)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1101,7 +1101,6 @@ class DAGScheduler(
s" ${task.stageAttemptId} and there is a more recent attempt for that stage " +
s"(attempt ID ${failedStage.latestInfo.attemptId}) running")
} else {

// It is likely that we receive multiple FetchFailed for a single stage (because we have
// multiple tasks running concurrently on different executors). In that case, it is
// possible the fetch failure has already been handled by the scheduler.
Expand All @@ -1117,6 +1116,11 @@ class DAGScheduler(
if (disallowStageRetryForTest) {
abortStage(failedStage, "Fetch failure will not retry stage due to testing config",
None)
} else if (failedStage.failedOnFetchAndShouldAbort(task.stageAttemptId)) {
abortStage(failedStage, s"$failedStage (${failedStage.name}) " +
s"has failed the maximum allowable number of " +
s"times: ${Stage.MAX_CONSECUTIVE_FETCH_FAILURES}. " +
s"Most recent failure reason: ${failureMessage}", None)
} else if (failedStages.isEmpty) {
// Don't schedule an event to resubmit failed stages if failed isn't empty, because
// in that case the event will already have been scheduled.
Expand Down Expand Up @@ -1240,10 +1244,17 @@ class DAGScheduler(
if (errorMessage.isEmpty) {
logInfo("%s (%s) finished in %s s".format(stage, stage.name, serviceTime))
stage.latestInfo.completionTime = Some(clock.getTimeMillis())

// Clear failure count for this stage, now that it's succeeded.
// We only limit consecutive failures of stage attempts,so that if a stage is
// re-used many times in a long-running job, unrelated failures don't eventually cause the
// stage to be aborted.
stage.clearFailures()
} else {
stage.latestInfo.stageFailed(errorMessage.get)
logInfo("%s (%s) failed in %s s".format(stage, stage.name, serviceTime))
}

outputCommitCoordinator.stageEnd(stage.id)
listenerBus.post(SparkListenerStageCompleted(stage.latestInfo))
runningStages -= stage
Expand Down
30 changes: 29 additions & 1 deletion core/src/main/scala/org/apache/spark/scheduler/Stage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ import org.apache.spark.util.CallSite
* be updated for each attempt.
*
*/
private[spark] abstract class Stage(
private[scheduler] abstract class Stage(
val id: Int,
val rdd: RDD[_],
val numTasks: Int,
Expand Down Expand Up @@ -92,6 +92,29 @@ private[spark] abstract class Stage(
*/
private var _latestInfo: StageInfo = StageInfo.fromStage(this, nextAttemptId)

/**
* Set of stage attempt IDs that have failed with a FetchFailure. We keep track of these
* failures in order to avoid endless retries if a stage keeps failing with a FetchFailure.
* We keep track of each attempt ID that has failed to avoid recording duplicate failures if
* multiple tasks from the same stage attempt fail (SPARK-5945).
*/
private val fetchFailedAttemptIds = new HashSet[Int]

private[scheduler] def clearFailures() : Unit = {
fetchFailedAttemptIds.clear()
}

/**
* Check whether we should abort the failedStage due to multiple consecutive fetch failures.
*
* This method updates the running set of failed stage attempts and returns
* true if the number of failures exceeds the allowable number of failures.
*/
private[scheduler] def failedOnFetchAndShouldAbort(stageAttemptId: Int): Boolean = {
fetchFailedAttemptIds.add(stageAttemptId)
fetchFailedAttemptIds.size >= Stage.MAX_CONSECUTIVE_FETCH_FAILURES
}

/** Creates a new attempt for this stage by creating a new StageInfo with a new attempt ID. */
def makeNewStageAttempt(
numPartitionsToCompute: Int,
Expand All @@ -110,3 +133,8 @@ private[spark] abstract class Stage(
case _ => false
}
}

private[scheduler] object Stage {
// The number of consecutive failures allowed before a stage is aborted
val MAX_CONSECUTIVE_FETCH_FAILURES = 4
}
Original file line number Diff line number Diff line change
Expand Up @@ -260,10 +260,7 @@ final class ShuffleBlockFetcherIterator(
fetchRequests ++= Utils.randomize(remoteRequests)

// Send out initial requests for blocks, up to our maxBytesInFlight
while (fetchRequests.nonEmpty &&
(bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
sendRequest(fetchRequests.dequeue())
}
fetchUpToMaxBytes()

val numFetches = remoteRequests.size - fetchRequests.size
logInfo("Started " + numFetches + " remote fetches in" + Utils.getUsedTimeMs(startTime))
Expand Down Expand Up @@ -296,10 +293,7 @@ final class ShuffleBlockFetcherIterator(
case _ =>
}
// Send fetch requests up to maxBytesInFlight
while (fetchRequests.nonEmpty &&
(bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
sendRequest(fetchRequests.dequeue())
}
fetchUpToMaxBytes()

result match {
case FailureFetchResult(blockId, address, e) =>
Expand All @@ -315,6 +309,14 @@ final class ShuffleBlockFetcherIterator(
}
}

private def fetchUpToMaxBytes(): Unit = {
// Send fetch requests up to maxBytesInFlight
while (fetchRequests.nonEmpty &&
(bytesInFlight == 0 || bytesInFlight + fetchRequests.front.size <= maxBytesInFlight)) {
sendRequest(fetchRequests.dequeue())
}
}

private def throwFetchFailedException(blockId: BlockId, address: BlockManagerId, e: Throwable) = {
blockId match {
case ShuffleBlockId(shufId, mapId, reduceId) =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -530,8 +530,9 @@ public void testPeakMemoryUsed() throws Exception {
for (int i = 0; i < numRecordsPerPage * 10; i++) {
writer.insertRecordIntoSorter(new Tuple2<Object, Object>(1, 1));
newPeakMemory = writer.getPeakMemoryUsedBytes();
if (i % numRecordsPerPage == 0) {
// We allocated a new page for this record, so peak memory should change
if (i % numRecordsPerPage == 0 && i != 0) {
// The first page is allocated in constructor, another page will be allocated after
// every numRecordsPerPage records (peak memory should change).
assertEquals(previousPeakMemory + pageSizeBytes, newPeakMemory);
} else {
assertEquals(previousPeakMemory, newPeakMemory);
Expand Down
Loading

0 comments on commit 62010af

Please sign in to comment.