Skip to content
30 changes: 25 additions & 5 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -1280,6 +1280,19 @@ abstract class RDD[T: ClassTag](

/** A description of this RDD and its recursive dependencies for debugging. */
def toDebugString: String = {
// Get a debug description of an rdd without its children
def debugSelf (rdd: RDD[_]): Seq[String] = {
import Utils.bytesToString

val persistence = storageLevel.description
val storageInfo = rdd.context.getRDDStorageInfo.filter(_.id == rdd.id).map(info =>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hey on this one, this is actually an extremely operation... I wonder if maybe for now it's better to not put this in there and only put the storage level.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what you mean - do you mean "an extremely costly operation"?

Assuming that to be the case, two comments::

  • I though about attaching flags to the function so one could specify the type of debug information desired; I think that makes the function too complex, but I'm hardly firm in that idea.
  • This whole function is specifically to help a developer with debugging. I don't think having it be costly is all that bad.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah sorry, yeah I mean this very costly. I'd rather not do this in a debug function - because people will do things like print debug statements inside of loops. In that case the debugging will significantly alter the performance of their application. There is a separate JIRA to make this function faster (it's a function also used in the UI), but until that's fixed I'd rather not call it here:

https://issues.apache.org/jira/browse/SPARK-2316

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW - we can create a JIRA to add this back once SPARK-2316 is fixed if you'd like.

" CachedPartitions: %d; MemorySize: %s; TachyonSize: %s; DiskSize: %s".format(
info.numCachedPartitions, bytesToString(info.memSize),
bytesToString(info.tachyonSize), bytesToString(info.diskSize)))

s"$rdd [$persistence]" +: storageInfo
}

// Apply a different rule to the last child
def debugChildren(rdd: RDD[_], prefix: String): Seq[String] = {
val len = rdd.dependencies.length
Expand All @@ -1305,7 +1318,11 @@ abstract class RDD[T: ClassTag](
val partitionStr = "(" + rdd.partitions.size + ")"
val leftOffset = (partitionStr.length - 1) / 2
val nextPrefix = (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset))
Seq(partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)

debugSelf(rdd).zipWithIndex.map{
case (desc: String, 0) => s"$partitionStr $desc"
case (desc: String, _) => s"$nextPrefix $desc"
} ++ debugChildren(rdd, nextPrefix)
}
def shuffleDebugString(rdd: RDD[_], prefix: String = "", isLastChild: Boolean): Seq[String] = {
val partitionStr = "(" + rdd.partitions.size + ")"
Expand All @@ -1315,17 +1332,20 @@ abstract class RDD[T: ClassTag](
thisPrefix
+ (if (isLastChild) " " else "| ")
+ (" " * leftOffset) + "|" + (" " * (partitionStr.length - leftOffset)))
Seq(thisPrefix + "+-" + partitionStr + " " + rdd) ++ debugChildren(rdd, nextPrefix)

debugSelf(rdd).zipWithIndex.map{
case (desc: String, 0) => s"$thisPrefix+-$partitionStr $desc"
case (desc: String, _) => s"$nextPrefix$desc"
} ++ debugChildren(rdd, nextPrefix)
}
def debugString(rdd: RDD[_],
prefix: String = "",
isShuffle: Boolean = true,
isLastChild: Boolean = false): Seq[String] = {
if (isShuffle) {
shuffleDebugString(rdd, prefix, isLastChild)
}
else {
Seq(prefix + rdd) ++ debugChildren(rdd, prefix)
} else {
debugSelf(rdd).map(prefix + _) ++ debugChildren(rdd, prefix)
}
}
firstDebugString(this).mkString("\n")
Expand Down