apache · erenavsarogullari · Apr 13, 2020 · Apr 13, 2020 · Apr 18, 2020 · Apr 20, 2020
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/ui/SparkPlanGraph.scala
@@ -153,7 +153,7 @@ object SparkPlanGraph {
  * @param name the name of this SparkPlan node
  * @param metrics metrics that this SparkPlan node will track
  */
-private[ui] class SparkPlanGraphNode(
+class SparkPlanGraphNode(
     val id: Long,
     val name: String,
     val desc: String,
@@ -193,7 +193,7 @@ private[ui] class SparkPlanGraphNode(
 /**
  * Represent a tree of SparkPlan for WholeStageCodegen.
  */
-private[ui] class SparkPlanGraphCluster(
+class SparkPlanGraphCluster(
     id: Long,
     name: String,
     desc: String,
@@ -229,7 +229,7 @@ private[ui] class SparkPlanGraphCluster(
  * Represent an edge in the SparkPlan tree. `fromId` is the child node id, and `toId` is the parent
  * node id.
  */
-private[ui] case class SparkPlanGraphEdge(fromId: Long, toId: Long) {
+case class SparkPlanGraphEdge(fromId: Long, toId: Long) {
 
   def makeDotEdge: String = s"""  $fromId->$toId;\n"""
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/SqlResource.scala
@@ -21,46 +21,55 @@ import java.util.Date
 import javax.ws.rs._
 import javax.ws.rs.core.MediaType
 
+import scala.util.{Failure, Success, Try}
+
 import org.apache.spark.JobExecutionStatus
-import org.apache.spark.sql.execution.ui.{SQLAppStatusStore, SQLExecutionUIData, SQLPlanMetric}
+import org.apache.spark.sql.execution.ui.{SparkPlanGraph, SparkPlanGraphCluster, SparkPlanGraphNode, SQLAppStatusStore, SQLExecutionUIData}
 import org.apache.spark.status.api.v1.{BaseAppResource, NotFoundException}
 
 @Produces(Array(MediaType.APPLICATION_JSON))
 private[v1] class SqlResource extends BaseAppResource {
 
+  val WHOLE_STAGE_CODEGEN = "WholeStageCodegen"
+
   @GET
   def sqlList(
-      @DefaultValue("false") @QueryParam("details") details: Boolean,
+      @DefaultValue("true") @QueryParam("details") details: Boolean,
+      @DefaultValue("true") @QueryParam("planDescription") planDescription: Boolean,
       @DefaultValue("0") @QueryParam("offset") offset: Int,
       @DefaultValue("20") @QueryParam("length") length: Int): Seq[ExecutionData] = {
     withUI { ui =>
       val sqlStore = new SQLAppStatusStore(ui.store.store)
-      sqlStore.executionsList(offset, length).map(prepareExecutionData(_, details))
+      sqlStore.executionsList(offset, length).map { exec =>
+        val graph = sqlStore.planGraph(exec.executionId)
+        prepareExecutionData(exec, graph, details, planDescription)
+      }
     }
   }
 
   @GET
   @Path("{executionId:\\d+}")
   def sql(
       @PathParam("executionId") execId: Long,
-      @DefaultValue("false") @QueryParam("details") details: Boolean): ExecutionData = {
+      @DefaultValue("true") @QueryParam("details") details: Boolean,
+      @DefaultValue("true") @QueryParam("planDescription")
+      planDescription: Boolean): ExecutionData = {
     withUI { ui =>
       val sqlStore = new SQLAppStatusStore(ui.store.store)
+      val graph = sqlStore.planGraph(execId)
       sqlStore
         .execution(execId)
-        .map(prepareExecutionData(_, details))
-        .getOrElse(throw new NotFoundException("unknown id: " + execId))
+        .map(prepareExecutionData(_, graph, details, planDescription))
+        .getOrElse(throw new NotFoundException("unknown query execution id: " + execId))
     }
   }
 
-  private def printableMetrics(
-      metrics: Seq[SQLPlanMetric],
-      metricValues: Map[Long, String]): Seq[Metrics] = {
-    metrics.map(metric =>
-      Metrics(metric.name, metricValues.get(metric.accumulatorId).getOrElse("")))
-  }
+  private def prepareExecutionData(
+    exec: SQLExecutionUIData,
+    graph: SparkPlanGraph,
+    details: Boolean,
+    planDescription: Boolean): ExecutionData = {
 
-  private def prepareExecutionData(exec: SQLExecutionUIData, details: Boolean): ExecutionData = {
     var running = Seq[Int]()
     var completed = Seq[Int]()
     var failed = Seq[Int]()
@@ -84,18 +93,65 @@ private[v1] class SqlResource extends BaseAppResource {
     }
 
     val duration = exec.completionTime.getOrElse(new Date()).getTime - exec.submissionTime
-    val planDetails = if (details) exec.physicalPlanDescription else ""
-    val metrics = if (details) printableMetrics(exec.metrics, exec.metricValues) else Seq.empty
+    val planDetails = if (planDescription) exec.physicalPlanDescription else ""
+    val nodes = if (details) printableMetrics(graph.allNodes, exec.metricValues) else Seq.empty
+    val edges = if (details) graph.edges else Seq.empty
+
     new ExecutionData(
       exec.executionId,
       status,
       exec.description,
       planDetails,
-      metrics,
       new Date(exec.submissionTime),
       duration,
       running,
       completed,
-      failed)
+      failed,
+      nodes,
+      edges)
   }
+
+  private def printableMetrics(allNodes: Seq[SparkPlanGraphNode],
+    metricValues: Map[Long, String]): Seq[Node] = {
+
+    def getMetric(metricValues: Map[Long, String], accumulatorId: Long,
+      metricName: String): Option[Metric] = {
+
+      metricValues.get(accumulatorId).map( mv => {
+        val metricValue = if (mv.startsWith("\n")) mv.substring(1, mv.length) else mv
+        Metric(metricName, metricValue)
+      })
+    }
+
+    val nodeIdAndWSCGIdMap = getNodeIdAndWSCGIdMap(allNodes)
+    val nodes = allNodes.map { node =>
+      val wholeStageCodegenId = nodeIdAndWSCGIdMap.get(node.id).flatten
+      val metrics =
+        node.metrics.flatMap(m => getMetric(metricValues, m.accumulatorId, m.name.trim))
+      Node(nodeId = node.id, nodeName = node.name.trim, wholeStageCodegenId, metrics)
+    }
+
+    nodes.sortBy(_.nodeId).reverse
+  }
+
+  private def getNodeIdAndWSCGIdMap(allNodes: Seq[SparkPlanGraphNode]): Map[Long, Option[Long]] = {
+    val wscgNodes = allNodes.filter(_.name.trim.startsWith(WHOLE_STAGE_CODEGEN))
+    val nodeIdAndWSCGIdMap: Map[Long, Option[Long]] = wscgNodes.flatMap {
+      _ match {
+        case x: SparkPlanGraphCluster => x.nodes.map(_.id -> getWholeStageCodegenId(x.name.trim))
+        case _ => Seq.empty
+      }
+    }.toMap
+
+    nodeIdAndWSCGIdMap
+  }
+
+  private def getWholeStageCodegenId(wscgNodeName: String): Option[Long] = {
+    Try(wscgNodeName.substring(
+      s"$WHOLE_STAGE_CODEGEN (".length, wscgNodeName.length - 1).toLong) match {
+      case Success(wscgId) => Some(wscgId)
+      case Failure(t) => None
+    }
+  }
+
 }
diff --git a/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala b/sql/core/src/main/scala/org/apache/spark/status/api/v1/sql/api.scala
@@ -18,16 +18,25 @@ package org.apache.spark.status.api.v1.sql
 
 import java.util.Date
 
+import org.apache.spark.sql.execution.ui.SparkPlanGraphEdge
+
 class ExecutionData private[spark] (
     val id: Long,
     val status: String,
     val description: String,
     val planDescription: String,
-    val metrics: Seq[Metrics],
     val submissionTime: Date,
     val duration: Long,
     val runningJobIds: Seq[Int],
     val successJobIds: Seq[Int],
-    val failedJobIds: Seq[Int])
+    val failedJobIds: Seq[Int],
+    val nodes: Seq[Node],
+    val edges: Seq[SparkPlanGraphEdge])
+
+case class Node private[spark](
+    nodeId: Long,
+    nodeName: String,
+    wholeStageCodegenId: Option[Long] = None,
+    metrics: Seq[Metric])
 
-case class Metrics private[spark] (metricName: String, metricValue: String)
+case class Metric private[spark] (name: String, value: String)