Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,7 @@ function connectRDDs(fromRDDId, toRDDId, edgesContainer, svgContainer) {
* Replace `/n` with `<br/>`
*/
function replaceLineBreak(str) {
return str.replace("\\n", "<br/>");
return str.replace(/\\n/g, "<br/>");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is the reason

}

/* (Job page only) Helper function to add tooltips for RDDs. */
Expand Down
9 changes: 9 additions & 0 deletions core/src/main/scala/org/apache/spark/rdd/RDD.scala
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,15 @@ abstract class RDD[T: ClassTag](
this
}

/** Extra information about the RDD */
@transient var extraInfo: Option[String] = None

/** Assign extraInfo to this RDD */
def setExtraInfo(_extraInfo: String): this.type = {
extraInfo = Option(_extraInfo)
this
}

/**
* Mark this RDD for persisting using the specified level.
*
Expand Down
5 changes: 3 additions & 2 deletions core/src/main/scala/org/apache/spark/storage/RDDInfo.scala
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ class RDDInfo(
var storageLevel: StorageLevel,
val parentIds: Seq[Int],
val callSite: String = "",
val scope: Option[RDDOperationScope] = None)
val scope: Option[RDDOperationScope] = None,
val extraInfo: Option[String] = None)
extends Ordered[RDDInfo] {

var numCachedPartitions = 0
Expand Down Expand Up @@ -68,6 +69,6 @@ private[spark] object RDDInfo {
rdd.creationSite.shortForm
}
new RDDInfo(rdd.id, rddName, rdd.partitions.length,
rdd.getStorageLevel, parentIds, callSite, rdd.scope)
rdd.getStorageLevel, parentIds, callSite, rdd.scope, rdd.extraInfo)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ private[spark] case class RDDOperationGraph(
rootCluster: RDDOperationCluster)

/** A node in an RDDOperationGraph. This represents an RDD. */
private[spark] case class RDDOperationNode(id: Int, name: String, cached: Boolean, callsite: String)
private[spark] case class RDDOperationNode(id: Int, name: String, cached: Boolean, callsite: String,
extraInfo: Option[String] = None)

/**
* A directed edge connecting two nodes in an RDDOperationGraph.
Expand Down Expand Up @@ -143,7 +144,7 @@ private[spark] object RDDOperationGraph extends Logging {

// TODO: differentiate between the intention to cache an RDD and whether it's actually cached
val node = nodes.getOrElseUpdate(rdd.id, RDDOperationNode(
rdd.id, rdd.name, rdd.storageLevel != StorageLevel.NONE, rdd.callSite))
rdd.id, rdd.name, rdd.storageLevel != StorageLevel.NONE, rdd.callSite, rdd.extraInfo))
if (rdd.scope.isEmpty) {
// This RDD has no encompassing scope, so we put it directly in the root cluster
// This should happen only if an RDD is instantiated outside of a public RDD API
Expand Down Expand Up @@ -227,7 +228,10 @@ private[spark] object RDDOperationGraph extends Logging {
} else {
""
}
val label = s"${node.name} [${node.id}]$isCached\n${node.callsite}"
val extraInfo = node.extraInfo.getOrElse("")

val label = s"${node.name} [${node.id}]$isCached\n${node.callsite}\n${extraInfo}"

s"""${node.id} [label="${StringEscapeUtils.escapeJava(label)}"]"""
}

Expand Down
5 changes: 4 additions & 1 deletion core/src/main/scala/org/apache/spark/util/JsonProtocol.scala
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,7 @@ private[spark] object JsonProtocol {
("Name" -> rddInfo.name) ~
("Scope" -> rddInfo.scope.map(_.toJson)) ~
("Callsite" -> rddInfo.callSite) ~
("Extra Info" -> rddInfo.extraInfo) ~
("Parent IDs" -> parentIds) ~
("Storage Level" -> storageLevel) ~
("Number of Partitions" -> rddInfo.numPartitions) ~
Expand Down Expand Up @@ -1030,6 +1031,7 @@ private[spark] object JsonProtocol {
.map(_.extract[String])
.map(RDDOperationScope.fromJson)
val callsite = jsonOption(json \ "Callsite").map(_.extract[String]).getOrElse("")
val extrainfo = jsonOption(json \ "Extra Info").map(_.extract[String])
val parentIds = jsonOption(json \ "Parent IDs")
.map { l => l.extract[List[JValue]].map(_.extract[Int]) }
.getOrElse(Seq.empty)
Expand All @@ -1039,7 +1041,8 @@ private[spark] object JsonProtocol {
val memSize = (json \ "Memory Size").extract[Long]
val diskSize = (json \ "Disk Size").extract[Long]

val rddInfo = new RDDInfo(rddId, name, numPartitions, storageLevel, parentIds, callsite, scope)
val rddInfo =
new RDDInfo(rddId, name, numPartitions, storageLevel, parentIds, callsite, scope, extrainfo)
rddInfo.numCachedPartitions = numCachedPartitions
rddInfo.memSize = memSize
rddInfo.diskSize = diskSize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ class JsonProtocolSuite extends SparkFunSuite {
val logUrlMap = Map("stderr" -> "mystderr", "stdout" -> "mystdout").toMap
val attributes = Map("ContainerId" -> "ct1", "User" -> "spark").toMap
testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L))
testRDDInfo(makeRddInfo(2, 3, 4, 5L, 6L, Some("Extra Info")))
testStageInfo(makeStageInfo(10, 20, 30, 40L, 50L))
testTaskInfo(makeTaskInfo(999L, 888, 55, 777L, false))
testTaskMetrics(makeTaskMetrics(
Expand Down Expand Up @@ -840,8 +841,10 @@ private[spark] object JsonProtocolSuite extends Assertions {
)
}

private def makeRddInfo(a: Int, b: Int, c: Int, d: Long, e: Long) = {
val r = new RDDInfo(a, "mayor", b, StorageLevel.MEMORY_AND_DISK, Seq(1, 4, 7), a.toString)
private def makeRddInfo(a: Int, b: Int, c: Int, d: Long, e: Long,
extraInfo: Option[String] = None) = {
val r = new RDDInfo(a, "mayor", b, StorageLevel.MEMORY_AND_DISK, Seq(1, 4, 7),
a.toString, None, extraInfo)
r.numCachedPartitions = c
r.memSize = d
r.diskSize = e
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -616,8 +616,13 @@ case class UnionExec(children: Seq[SparkPlan]) extends SparkPlan {
}
}

protected override def doExecute(): RDD[InternalRow] =
def addExtraInfo: RDD[InternalRow] => RDD[InternalRow] = {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's your plan to add this info for all Exec nodes? Add the function into SparkPlan?

Copy link
Contributor Author

@planga82 planga82 Aug 2, 2019

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Different nodes must implement diferent versions of addExtra info because the structure of the nodes and the interesting informatión to show is different.
Of course we could move this function to SparkPlan, with a predefined empty behavior and implement it in the different exec nodes that we want to add extra info. Do you think it would be better?

_.setExtraInfo(s"Union(${output.mkString(", ")})")
}

protected override def doExecute(): RDD[InternalRow] = addExtraInfo {
sparkContext.union(children.map(_.execute()))
}
}

/**
Expand Down